Skip to content

Commit

Permalink
[Feature] Add getJobLimitCapability interface and use it in nms (#2337)
Browse files Browse the repository at this point in the history
Co-authored-by: budefei <budefei@cambricon.com>
  • Loading branch information
defei-coder and budefei committed Oct 18, 2022
1 parent bd1da5a commit be684ee
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
10 changes: 10 additions & 0 deletions mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@

#define CEIL_ALIGN(x, y) (((x) + (y)-1) / (y) * (y))

inline int32_t getJobLimitCapability() {
CNcontext drv_ctx;
CNctxConfigParam ctx_conf_param;
TORCH_CHECK(
CN_SUCCESS == cnGetCtxConfigParam(drv_ctx, CN_CTX_CONFIG_UNION_LIMIT,
&ctx_conf_param),
"cnGetCtxConfigParam fails.");
return (int32_t)ctx_conf_param.unionLimit;
}

#endif // MMCV_WITH_MLU

#endif // PYTORCH_MLU_HELPER_HPP_
3 changes: 1 addition & 2 deletions mmcv/ops/csrc/pytorch/mlu/nms_mlu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ static cnnlStatus_t policyFunc(cnrtDim3_t *k_dim, cnrtFunctionType_t *k_type,
int &core_num_per_class,
const int input_box_num) {
uint32_t core_dim = torch_mlu::getDeviceAttr(cnrtAttrMcorePerCluster);
uint32_t cluster_number = torch_mlu::getDeviceAttr(cnrtAttrClusterCount);
uint32_t job_limit = cluster_number * core_dim;
uint32_t job_limit = getJobLimitCapability();
uint32_t core_number = job_limit;

int box_num_per_core = (input_box_num + core_number - 1) / core_number;
Expand Down

0 comments on commit be684ee

Please sign in to comment.