fix bugs

PaddlePaddle · Mar 29, 2021 · 0911405 · 0911405
1 parent b5d92f9
commit 0911405
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 17 deletions.
diff --git a/paddle/fluid/operators/expand_op_npu.cc b/paddle/fluid/operators/expand_op_npu.cc
@@ -58,12 +58,15 @@ class ExpandNPUKernel : public framework::OpKernel<T> {
             expand_times.size(), static_cast<size_t>(in_dims.size())));
     auto* out0 = context.Output<framework::LoDTensor>("Out");
     framework::DDim out_dims(in_dims);
+
     for (size_t i = 0; i < expand_times.size(); ++i) {
       out_dims[i] *= expand_times[i];
     }
+
     out0->Resize(out_dims);
     out0->mutable_data<T>(context.device_context().GetPlace());
-    auto runner = NpuOpRunner("TileD", {*in0}, {*out0}, {{"multiples", expand_times}});
+    auto runner =
+        NpuOpRunner("TileD", {*in0}, {*out0}, {{"multiples", expand_times}});
     auto stream =
         context.template device_context<paddle::platform::NPUDeviceContext>()
             .stream();

diff --git a/paddle/fluid/platform/npu_profiler.h b/paddle/fluid/platform/npu_profiler.h
@@ -23,28 +23,52 @@ limitations under the License. */
 namespace paddle {
 namespace platform {
 
-void NPUProfilerInit(std::string output_path, std::string output_mode,
-                     std::string config_file) {
+// ACL_AICORE_ARITHMETIC_UTILIZATION = 0, record arithmetic stats
+// ACL_AICORE_PIPE_UTILIZATION = 1, record pipe
+// ACL_AICORE_MEMORY_BANDWIDTH = 2, record memory
+// ACL_AICORE_L0B_AND_WIDTH = 3, recore internal io
+// ACL_AICORE_RESOURCE_CONFLICT_RATI = 4, record conflict ratio
+constexpr aclprofAicoreMetrics default_metrics =
+    ACL_AICORE_ARITHMETIC_UTILIZATION;
+
+// ACL_PROF_ACL_API, record ACL API stats
+// ACL_PROF_TASK_TIME, record AI core stats
+// ACL_PROF_AICORE_METRICS, must include
+// ACL_PROF_AICPU_TRACE, recore AICPU, not supported yet
+constexpr dataTypeConfig default_type =
+    ACL_PROF_ACL_API | ACL_PROF_AICORE_METRICS | ACL_PROF_TASK_TIME;
+
+void NPUProfilerInit(std::string output_path) {
   PADDLE_ENFORCE_NPU_SUCCESS(
       aclprofInit(output_path.c_str(), output_path.size()));
 }
 
 void NPUProfilerStart(const aclprofConfig *config)) {
+  if (config == nullptr) {
+    // NOTE(zhiqiu): support single device by default.
+    int device_id = GetCurrentNPUDeviceId();
+    std::vector<uint32_t> devices = {static_cast<uint32_t>(device_id)};
+    config = NPUProfilerCreateConfig(devices, metrics, c);
+  }
   PADDLE_ENFORCE_NPU_SUCCESS(aclprofStart(config));
 }
 
 void NPUProfilerStop(const aclprofConfig *config)) {
   PADDLE_ENFORCE_NPU_SUCCESS(aclprofStop(config));
+  NPUProfilerDestroyConfig(config);
 }
 
 void NPUProfilerFinalize() { PADDLE_ENFORCE_NPU_SUCCESS(aclprofFinalize()); }
 
-void NPUProfilerCreateConfig(std::vector<int32_t> devices,
-                             aclprofAicoreMetrics metrics,
-                             dataTypeConfig config,
-                             p aclprofAicoreEvents *events = nullptr) {
-  PADDLE_ENFORCE_NPU_SUCCESS(aclprofCreateConfig(devices.data(), devices.size(),
-                                                 metrics, events, config));
+aclprofConfig *NPUProfilerCreateConfig(
+    std::vector<int32_t> devices,
+    aclprofAicoreMetrics metrics = default_metrics,
+    dataTypeConfig c = default_type, p aclprofAicoreEvents *events = nullptr) {
+  aclprofConfig* config = aclprofCreateConfig(devices.data(), devices.size(),
+                                            metrics, events, c));
+  PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::External(
+                                      "Failed to create prof config for NPU"));
+  return config;
 }
 
 void NPUProfilerDestroyConfig(const aclprofConfig *config) {

diff --git a/python/paddle/fluid/profiler.py b/python/paddle/fluid/profiler.py
@@ -142,28 +142,26 @@ def npu_profiler(output_file, config=None):
             exe = fluid.Executor(place)
             exe.run(fluid.default_startup_program())
 
-            output_file = 'cuda_profiler.txt'
-            with profiler.cuda_profiler(output_file, 'csv') as nvprof:
+            output_file = 'npu.txt'
+            with profiler.cuda_profiler(output_file) as nvprof:
                 for i in range(epoc):
                     input = np.random.random(dshape).astype('float32')
                     exe.run(fluid.default_main_program(), feed={'data': input})
-            # then use  NVIDIA Visual Profiler (nvvp) to load this output file
+            # then use  NPU profiler tools to load this output file
             # to visualize results.
     """
     # TODO: support config in python.
-
     if not config:
         config = core.npu_prof_create_config()
 
-    core.npu_prof_init(output_file, output_mode, config_file)
-    # Enables profiler collection by the active CUDA profiling tool.
-    core.npu_prof_start()
+    core.npu_prof_init(output_file)
+    # Enables profiler collection by the active NPU profiling tool.
+    core.npu_prof_start(config)
     try:
         yield
     # Disables profiler collection.
     finally:
         core.npu_prof_stop(config)
-        os.remove(config_file)
 
 
 def reset_profiler():