Skip to content

Commit

Permalink
fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiqiu committed Mar 29, 2021
1 parent b5d92f9 commit 0911405
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 17 deletions.
5 changes: 4 additions & 1 deletion paddle/fluid/operators/expand_op_npu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,15 @@ class ExpandNPUKernel : public framework::OpKernel<T> {
expand_times.size(), static_cast<size_t>(in_dims.size())));
auto* out0 = context.Output<framework::LoDTensor>("Out");
framework::DDim out_dims(in_dims);

for (size_t i = 0; i < expand_times.size(); ++i) {
out_dims[i] *= expand_times[i];
}

out0->Resize(out_dims);
out0->mutable_data<T>(context.device_context().GetPlace());
auto runner = NpuOpRunner("TileD", {*in0}, {*out0}, {{"multiples", expand_times}});
auto runner =
NpuOpRunner("TileD", {*in0}, {*out0}, {{"multiples", expand_times}});
auto stream =
context.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
Expand Down
40 changes: 32 additions & 8 deletions paddle/fluid/platform/npu_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,52 @@ limitations under the License. */
namespace paddle {
namespace platform {

void NPUProfilerInit(std::string output_path, std::string output_mode,
std::string config_file) {
// ACL_AICORE_ARITHMETIC_UTILIZATION = 0, record arithmetic stats
// ACL_AICORE_PIPE_UTILIZATION = 1, record pipe
// ACL_AICORE_MEMORY_BANDWIDTH = 2, record memory
// ACL_AICORE_L0B_AND_WIDTH = 3, recore internal io
// ACL_AICORE_RESOURCE_CONFLICT_RATI = 4, record conflict ratio
constexpr aclprofAicoreMetrics default_metrics =
ACL_AICORE_ARITHMETIC_UTILIZATION;

// ACL_PROF_ACL_API, record ACL API stats
// ACL_PROF_TASK_TIME, record AI core stats
// ACL_PROF_AICORE_METRICS, must include
// ACL_PROF_AICPU_TRACE, recore AICPU, not supported yet
constexpr dataTypeConfig default_type =
ACL_PROF_ACL_API | ACL_PROF_AICORE_METRICS | ACL_PROF_TASK_TIME;

void NPUProfilerInit(std::string output_path) {
PADDLE_ENFORCE_NPU_SUCCESS(
aclprofInit(output_path.c_str(), output_path.size()));
}

void NPUProfilerStart(const aclprofConfig *config)) {
if (config == nullptr) {
// NOTE(zhiqiu): support single device by default.
int device_id = GetCurrentNPUDeviceId();
std::vector<uint32_t> devices = {static_cast<uint32_t>(device_id)};
config = NPUProfilerCreateConfig(devices, metrics, c);
}
PADDLE_ENFORCE_NPU_SUCCESS(aclprofStart(config));
}

void NPUProfilerStop(const aclprofConfig *config)) {
PADDLE_ENFORCE_NPU_SUCCESS(aclprofStop(config));
NPUProfilerDestroyConfig(config);
}

void NPUProfilerFinalize() { PADDLE_ENFORCE_NPU_SUCCESS(aclprofFinalize()); }

void NPUProfilerCreateConfig(std::vector<int32_t> devices,
aclprofAicoreMetrics metrics,
dataTypeConfig config,
p aclprofAicoreEvents *events = nullptr) {
PADDLE_ENFORCE_NPU_SUCCESS(aclprofCreateConfig(devices.data(), devices.size(),
metrics, events, config));
aclprofConfig *NPUProfilerCreateConfig(
std::vector<int32_t> devices,
aclprofAicoreMetrics metrics = default_metrics,
dataTypeConfig c = default_type, p aclprofAicoreEvents *events = nullptr) {
aclprofConfig* config = aclprofCreateConfig(devices.data(), devices.size(),
metrics, events, c));
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::External(
"Failed to create prof config for NPU"));
return config;
}

void NPUProfilerDestroyConfig(const aclprofConfig *config) {
Expand Down
14 changes: 6 additions & 8 deletions python/paddle/fluid/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,28 +142,26 @@ def npu_profiler(output_file, config=None):
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output_file = 'cuda_profiler.txt'
with profiler.cuda_profiler(output_file, 'csv') as nvprof:
output_file = 'npu.txt'
with profiler.cuda_profiler(output_file) as nvprof:
for i in range(epoc):
input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input})
# then use NVIDIA Visual Profiler (nvvp) to load this output file
# then use NPU profiler tools to load this output file
# to visualize results.
"""
# TODO: support config in python.

if not config:
config = core.npu_prof_create_config()

core.npu_prof_init(output_file, output_mode, config_file)
# Enables profiler collection by the active CUDA profiling tool.
core.npu_prof_start()
core.npu_prof_init(output_file)
# Enables profiler collection by the active NPU profiling tool.
core.npu_prof_start(config)
try:
yield
# Disables profiler collection.
finally:
core.npu_prof_stop(config)
os.remove(config_file)


def reset_profiler():
Expand Down

0 comments on commit 0911405

Please sign in to comment.