Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refine partial_program for new run_program OP #40355

Merged
merged 5 commits into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/fluid/eager/to_static/run_program_op_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ inline void run_program_dygraph_function(
auto grad_node = std::make_shared<GradNodeRunProgram>(1, 2);

grad_node->SetFwdOutNames(out_names);
grad_node->SetOut(out);
// Set Attributes
grad_node->SetAttrMap(attrs);
// Set TensorWrappers
Expand Down
23 changes: 18 additions & 5 deletions paddle/fluid/eager/to_static/run_program_op_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ inline void RunProgramAPI(
}
VLOG(2) << "The number of sub scopes after forward: "
<< out_scope_vec->front()->kids().size();
// #ifdef PADDLE_WITH_MKLDNN
// if (FLAGS_use_mkldnn) paddle::platform::DontClearMKLDNNCache(place);
// #endif
#ifdef PADDLE_WITH_MKLDNN
if (FLAGS_use_mkldnn) paddle::platform::DontClearMKLDNNCache(place);
#endif
}

inline void RunProgramGradAPI(
Expand Down Expand Up @@ -357,7 +357,7 @@ inline void RunProgramGradAPI(
details::ShareTensorsFromScope(params_grad, *global_block, &scope);

// Step5. drop current scope
// global_inner_scope->DeleteScope(&scope);
global_inner_scope->DeleteScope(&scope);
VLOG(2) << "The number of sub scopes after backward: "
<< global_inner_scope->kids().size();
}
Expand Down Expand Up @@ -400,6 +400,10 @@ class GradNodeRunProgram : public egr::GradNodeBase {
paddle::platform::errors::InvalidArgument(
"The grads[0].size() and fwd_out_names_.size() should be equal."));
for (size_t i = 0; i < fwd_out_names_.size(); ++i) {
auto &out_grad = egr::EagerUtils::unsafe_autograd_meta(*out_[i])->Grad();
const_cast<paddle::experimental::Tensor &>(out_grad).set_impl(
grads[0][i].impl());

const_cast<paddle::experimental::Tensor &>(grads[0][i])
.set_name(fwd_out_names_[i] + "@GRAD");
}
Expand Down Expand Up @@ -432,6 +436,10 @@ class GradNodeRunProgram : public egr::GradNodeBase {
fwd_out_names_ = out_names;
}

void SetOut(const std::vector<paddle::experimental::Tensor *> &out) {
out_ = out;
}

protected:
void ConstructGradTensors(
const std::vector<paddle::experimental::Tensor> &fwd_tensors,
Expand All @@ -440,7 +448,11 @@ class GradNodeRunProgram : public egr::GradNodeBase {
// such as: name, tensor type(DenseTensor or SelectedRows).
VLOG(3) << "fwd_tensors.size(): " << fwd_tensors.size();
for (auto &fwd_t : fwd_tensors) {
grad_tensors->emplace_back(fwd_t.impl());
if (phi::DenseTensor::classof(fwd_t.impl().get())) {
grad_tensors->emplace_back(std::make_shared<phi::DenseTensor>());
} else if (phi::SelectedRows::classof(fwd_t.impl().get())) {
grad_tensors->emplace_back(std::make_shared<phi::SelectedRows>());
}
auto &grad_t = grad_tensors->back();
grad_t.set_name(fwd_t.name() + "@GRAD");
}
Expand All @@ -462,6 +474,7 @@ class GradNodeRunProgram : public egr::GradNodeBase {
std::vector<paddle::framework::Scope *> step_scope_;

std::vector<std::string> fwd_out_names_;
std::vector<paddle::experimental::Tensor *> out_;

// Attribute Map
paddle::framework::AttributeMap attrs_;
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/pybind/custom_handwrite_op_funcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ static PyObject *eager_api_run_program(PyObject *self, PyObject *args,

tstate = PyEval_SaveThread();
run_program_dygraph_function(X, Params, Out, OutScope, DOut, attrs);
std::cout << "end run_program_dygraph_function" << std::endl;
PyEval_RestoreThread(tstate);
tstate = nullptr;
} catch (...) {
Expand Down
17 changes: 17 additions & 0 deletions paddle/fluid/pybind/eager_method.cc
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,21 @@ static PyObject* tensor_register_reduce_hook(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
}

static PyObject* set_grad_type(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
auto var_type = pybind::CastPyArg2ProtoType(PyTuple_GET_ITEM(args, 0), 0);
auto grad_tensor =
egr::EagerUtils::unsafe_autograd_meta(self->tensor)->Grad();
if (var_type == framework::proto::VarType::LOD_TENSOR) {
grad_tensor.set_impl(std::make_shared<phi::DenseTensor>());
} else if (var_type == framework::proto::VarType::SELECTED_ROWS) {
grad_tensor.set_impl(std::make_shared<phi::SelectedRows>());
}
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}

PyMethodDef variable_methods[] = {
{"numpy", (PyCFunction)(void (*)(void))tensor_method_numpy,
METH_VARARGS | METH_KEYWORDS, NULL},
Expand Down Expand Up @@ -734,6 +749,8 @@ PyMethodDef variable_methods[] = {
{"_register_backward_hook",
(PyCFunction)(void (*)(void))tensor_register_reduce_hook,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_set_grad_type", (PyCFunction)(void (*)(void))set_grad_type,
METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}};

} // namespace pybind
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _replace_value_with_input_spec(self, args):
if isinstance(input_var, np.ndarray):
input_var = paddle.static.InputSpec.from_numpy(input_var)
_set_spec_stop_gradient(input_var, True)
elif isinstance(input_var, core.VarBase):
elif isinstance(input_var, (core.VarBase, core.eager.Tensor)):
stop_gradient = input_var.stop_gradient
input_var = paddle.static.InputSpec.from_tensor(input_var)
_set_spec_stop_gradient(input_var, stop_gradient)
Expand Down
35 changes: 18 additions & 17 deletions python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,7 @@ def __init__(self, main_program, inputs, outputs, parameters=None,

self._origin_main_program = self._verify_program(main_program)
self._tmp_scope_vec = self._create_scope_vec()
# A fake_var to handle empty input or output
self.__fake_vars = _create_fake_var()
# Set default mode to train
self._double_grads = self._get_double_grads(self._origin_main_program)
self.training = True

custom_white_list, custom_black_list = None, None
Expand All @@ -163,6 +160,14 @@ def __init__(self, main_program, inputs, outputs, parameters=None,
custom_white_list=custom_white_list,
custom_black_list=custom_black_list)

@LazyInitialized
def __fake_vars(self):
return _create_fake_var()

@LazyInitialized
def _double_grads(self):
return self._get_double_grads(self._origin_main_program)

@LazyInitialized
def _infer_program(self):
"""
Expand Down Expand Up @@ -356,8 +361,10 @@ def _cast_fp16_if_pure_fp16(self, in_vars):

def drop_scope_if_no_grad(self):
tracer = framework._dygraph_tracer()
scope = self._tmp_scope_vec.value().get_scope() if isinstance(
self._tmp_scope_vec, (core.VarBase)) else self._tmp_scope_vec[0]
if self.training and not tracer._has_grad:
self._tmp_scope_vec.value().get_scope().drop_kids()
scope.drop_kids()

@property
def program(self):
Expand Down Expand Up @@ -449,18 +456,14 @@ def create_out(var_id):
def _create_scope_vec(self):
# Hold forward variables
tmp_scope_vec = None
inner_scope = core.Scope()
if not core._in_eager_mode():
tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
"program_out_scope",
core.VarDesc.VarType.STEP_SCOPES, True)
# TODO(jiabin): Support this later.
# else:
# tmp_scope_vec = core.eager.Tensor(core.VarDesc.VarType.FP32, [],
# "program_out_scope",
# core.VarDesc.VarType.STEP_SCOPES, True)

inner_scope = core.Scope()
tmp_scope_vec.value().set_scope(inner_scope)
else:
tmp_scope_vec = [inner_scope]
return tmp_scope_vec

def _restore_out(self, out_vars):
Expand Down Expand Up @@ -598,12 +601,10 @@ def _create_fake_var():
core.VarDesc.VarType.RAW, False)
]
else:
return []
# TODO(jiabin): Support this later
# return [
# core.eager.Tensor(core.VarDesc.VarType.FP32, [], "Fake_var",
# core.VarDesc.VarType.RAW, False)
# ]
return [
core.eager.Tensor(core.VarDesc.VarType.FP32, [], "Fake_var",
core.VarDesc.VarType.RAW, False)
]


def partial_program_from(concrete_program):
Expand Down