Skip to content

Commit

Permalink
Integrate MediaTek APU Support (#440)
Browse files Browse the repository at this point in the history
* Integrate APU

* fix code style issue
  • Loading branch information
yungchienhsu authored and llhe committed May 15, 2019
1 parent 9fbb9e1 commit 2d650b6
Show file tree
Hide file tree
Showing 25 changed files with 1,175 additions and 14 deletions.
12 changes: 12 additions & 0 deletions mace/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ config_setting(
visibility = ["//visibility:public"],
)

config_setting(
name = "apu_enabled",
define_values = {
"apu": "true",
},
values = {
"crosstool_top": "//external:android/crosstool",
"cpu": "arm64-v8a",
},
visibility = ["//visibility:public"],
)

config_setting(
name = "hexagon_enabled",
define_values = {
Expand Down
12 changes: 11 additions & 1 deletion mace/core/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ load(
"//mace:mace.bzl",
"if_android",
"if_android_armv7",
"if_apu_enabled",
"if_hexagon_enabled",
"if_hexagon_or_hta_enabled",
"if_hta_enabled",
"if_neon_enabled",
"if_not_apu_enabled",
"if_not_hexagon_enabled",
"if_opencl_enabled",
"if_openmp_enabled",
Expand All @@ -39,7 +41,9 @@ cc_library(
"runtime/hexagon/hexagon_dsp_wrapper.cc",
]) + if_hta_enabled([
"runtime/hexagon/hexagon_hta_wrapper.cc",
]),
]) + if_apu_enabled(glob([
"runtime/apu/*.cc",
])),
hdrs = glob([
"*.h",
"runtime/cpu/*.h",
Expand All @@ -52,6 +56,8 @@ cc_library(
"runtime/hexagon/*dsp*.h",
])) + if_hta_enabled(glob([
"runtime/hexagon/*hta*.h",
])) + if_apu_enabled(glob([
"runtime/apu/*.h"
])),
copts = [
"-Werror",
Expand All @@ -68,6 +74,8 @@ cc_library(
"-DMACE_ENABLE_HEXAGON",
]) + if_hta_enabled([
"-DMACE_ENABLE_HTA",
]) + if_apu_enabled([
"-DMACE_ENABLE_APU",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
Expand All @@ -90,6 +98,8 @@ cc_library(
"//third_party/nnlib:libhexagon",
]) + if_hta_enabled([
"//third_party/hta",
]) + if_apu_enabled([
"//third_party/apu:libapu-frontend",
]),
)

Expand Down
33 changes: 33 additions & 0 deletions mace/core/runtime/apu/apu_device.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef MACE_CORE_RUNTIME_APU_APU_DEVICE_H_
#define MACE_CORE_RUNTIME_APU_APU_DEVICE_H_

#include "mace/core/device.h"

namespace mace {

class ApuDevice : public CPUDevice {
public:
explicit ApuDevice(utils::ThreadPool *thread_pool)
: CPUDevice(0, AFFINITY_NONE, thread_pool) {}

DeviceType device_type() const override {
return DeviceType::APU;
};
};

} // namespace mace
#endif // MACE_CORE_RUNTIME_APU_APU_DEVICE_H_
295 changes: 295 additions & 0 deletions mace/core/runtime/apu/apu_wrapper.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mace/core/runtime/apu/apu_wrapper.h"

#include <algorithm>

#include "mace/core/quantize.h"

namespace mace {

ApuWrapper::ApuWrapper(Device *device)
: quantize_util_(&device->cpu_runtime()->thread_pool()) {
}

apu_data_type ApuWrapper::MapToApuDataType(DataType mace_type) {
switch (mace_type) {
case DT_FLOAT:
return APU_DATA_TYPE_FLOAT;
case DT_INT32:
return APU_DATA_TYPE_INT32;
case DT_HALF:
return APU_DATA_TYPE_HALF;
case DT_UINT8:
return APU_DATA_TYPE_UINT8;
default:
MACE_CHECK(true, "unsupport mace data type");
break;
}
return APU_DATA_TYPE_UNDEFINED;
}

apu_pooling_mode ApuWrapper::MapToApuPoolingMode(int mace_mode) {
switch (mace_mode) {
case 1:
return APU_POOLING_AVG;
case 2:
return APU_POOLING_MAX;
default:
MACE_CHECK(true, "unsupport mace pooling mode");
break;
}
return APU_POOLING_UNDEFINED;
}

apu_eltwise_mode ApuWrapper::MapToApuEltwiseMode(int mace_mode) {
switch (mace_mode) {
case 0:
return APU_ELTWISE_ADD;
case 1:
return APU_ELTWISE_SUB;
case 2:
return APU_ELTWISE_MUL;
case 4:
return APU_ELTWISE_MIN;
case 5:
return APU_ELTWISE_MAX;
default:
MACE_CHECK(true, "unsupport mace eltwise mode");
break;
}
return APU_ELTWISE_UNDEFINED;
}

bool ApuWrapper::Init(const NetDef &net_def,
unsigned const char *model_data) {
frontend = new ApuFrontend();

// parse model argument
int const_data_num = 0;
for (auto arg : net_def.arg()) {
if (arg.name().compare("const_data_num") == 0) {
const_data_num = arg.i();
}
}

// const tensors
std::vector<apu_tensor> const_tensors;
for (auto const_tensor : net_def.tensors()) {
apu_tensor tensor;
tensor.tensor_id = const_tensor.node_id();
tensor.tensor_type = (tensor.tensor_id < const_data_num) ?
APU_TENSOR_CONST_DATA :
APU_TENSOR_CONST_ARGUMENT;
tensor.data_type = MapToApuDataType(const_tensor.data_type());
tensor.scale = const_tensor.has_scale() ? const_tensor.scale() : 0.0f;
tensor.zero_point = const_tensor.has_zero_point() ?
const_tensor.zero_point() : 0;
tensor.dim_size = const_tensor.dims_size();
MACE_CHECK(tensor.dim_size <= APU_TENSOR_MAX_DIMS,
"tensor dimension size not supported");
for (auto i = 0 ; i < tensor.dim_size ; i++) {
tensor.dims[i] = const_tensor.dims(i);
}
tensor.data_buf =
const_cast<unsigned char*>(model_data + const_tensor.offset());
const_tensors.push_back(tensor);
}

// input tensors
std::vector<apu_tensor> input_tensors;
for (auto input_info : net_def.input_info()) {
apu_tensor tensor;
tensor.tensor_id = input_info.node_id();
tensor.tensor_type = APU_TENSOR_MODEL_INPUT;
tensor.data_type = APU_DATA_TYPE_UINT8; // will do quantize in Run()
tensor.scale = input_info.has_scale() ? input_info.scale() : 0.0f;
tensor.zero_point = input_info.has_zero_point() ?
input_info.zero_point() : 0;
tensor.dim_size = input_info.dims_size();
MACE_CHECK(tensor.dim_size <= APU_TENSOR_MAX_DIMS,
"tensor dimension size not supported");
tensor_info info;
info.name = input_info.name();
info.size = 1;
for (auto i = 0 ; i < tensor.dim_size ; i++) {
tensor.dims[i] = input_info.dims(i);
info.size *= input_info.dims(i);
info.shape.push_back(input_info.dims(i));
}
info.buf = std::shared_ptr<uint8_t>(new uint8_t[info.size],
std::default_delete<uint8_t[]>());
info.scale = tensor.scale;
info.zero_point = tensor.zero_point;
input_infos.push_back(info);
tensor.data_buf = info.buf.get();
input_tensors.push_back(tensor);
}

// output tensors
std::vector<int> output_tensor_ids;
std::vector<void*> output_buffers;
for (auto output_info : net_def.output_info()) {
output_tensor_ids.push_back(output_info.node_id());
tensor_info info;
info.name = output_info.name();
info.size = 1;
for (auto i = 0 ; i < output_info.dims().size() ; i++) {
info.size *= output_info.dims(i);
info.shape.push_back(output_info.dims(i));
}
info.buf = std::shared_ptr<uint8_t>(new uint8_t[info.size],
std::default_delete<uint8_t[]>());
for (auto op_def : net_def.op()) {
if (output_info.name() == op_def.output(0)) {
info.scale = op_def.quantize_info(0).scale();
info.zero_point = op_def.quantize_info(0).zero_point();
}
}
output_infos.push_back(info);
output_buffers.push_back(info.buf.get());
}

// operators
std::vector<apu_operator> ops;
std::vector<std::vector<int>> cached_op_inputs;
for (auto op_def : net_def.op()) {
apu_operator op;
strncpy(op.type, op_def.type().c_str(), APU_OP_TYPE_MAX_SIZE);
op.input_size = op_def.node_input_size();
std::vector<int> input_ids;
for (auto i = 0 ; i < op.input_size ; i++) {
input_ids.push_back(op_def.node_input(i).node_id());
}
cached_op_inputs.push_back(input_ids);
op.input_ids = cached_op_inputs.back().data();
op.output.tensor_id = op_def.node_id();
op.output.tensor_type = APU_TENSOR_OP_OUTPUT;
op.output.data_type = MapToApuDataType(op_def.output_type(0));
if (op.output.data_type == APU_DATA_TYPE_UINT8) {
op.output.scale = op_def.quantize_info(0).scale();
op.output.zero_point = op_def.quantize_info(0).zero_point();
} else {
op.output.scale = 0.0f;
op.output.zero_point = 0;
}
op.output.dim_size = op_def.output_shape(0).dims_size();
MACE_CHECK(op.output.dim_size <= APU_TENSOR_MAX_DIMS,
"tensor dimension size not supported");
for (auto i = 0 ; i < op.output.dim_size ; i++) {
op.output.dims[i] = op_def.output_shape(0).dims(i);
}
op.output.data_buf = nullptr;
// get op mode and activation mode
bool is_pooling = (strcmp(op.type, "Pooling") == 0);
bool is_eltwise = (strcmp(op.type, "Eltwise") == 0);
std::string activation;
float max_limit = 0.0f;
for (auto arg : op_def.arg()) {
if (arg.name().compare("activation") == 0) {
activation = arg.s();
}
if (arg.name().compare("max_limit") == 0) {
max_limit = arg.f();
}
if (is_pooling && arg.name().compare("pooling_type") == 0) {
op.op_mode = static_cast<int>(MapToApuPoolingMode(arg.i()));
}
if (is_eltwise && arg.name().compare("type") == 0) {
op.op_mode = static_cast<int>(MapToApuEltwiseMode(arg.i()));
}
}
if (activation.compare("RELU") == 0) {
op.act_mode = APU_ACT_RELU;
} else if (activation.compare("RELUX") == 0 && max_limit == 6.0) {
op.act_mode = APU_ACT_RELU6;
} else {
op.act_mode = APU_ACT_NONE;
}
ops.push_back(op);
}

bool print_model = false;
bool ret = frontend->InitGraph(
const_tensors.size(), const_tensors.data(),
input_tensors.size(), input_tensors.data(),
output_tensor_ids.size(), output_tensor_ids.data(),
output_buffers.data(),
ops.size(), ops.data(),
print_model);
cached_op_inputs.clear();
MACE_CHECK(ret == true, "apu init graph failed");

return ret;
}

bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
std::map<std::string, Tensor *> *output_tensors) {
MACE_ASSERT(input_tensors.size() == input_infos.size(), "Wrong inputs num");
MACE_ASSERT(output_tensors.size() == output_infos.size(),
"Wrong outputs num");

// prepare input
for (int i = 0 ; i < static_cast<int>(input_tensors.size()) ; i++) {
Tensor* tensor = input_tensors.at(input_infos[i].name);

// check size
int size = input_infos[i].size;
MACE_ASSERT(size == static_cast<int>(tensor->size()), "Wrong input size");

// quantize
quantize_util_.QuantizeWithScaleAndZeropoint(
(const float*)tensor->raw_data(),
size,
input_infos[i].scale,
input_infos[i].zero_point,
input_infos[i].buf.get());
}

// run model
bool ret = frontend->RunGraph();
MACE_CHECK(ret == true, "neuron run model failed");

// process output
for (int i = 0 ; i < static_cast<int>(output_tensors->size()) ; i++) {
Tensor* tensor = output_tensors->at(output_infos[i].name);

// prepare out buffer
tensor->SetDtype(DT_FLOAT);
tensor->Resize(output_infos[i].shape);
int size = output_infos[i].size;
MACE_ASSERT(size == static_cast<int>(tensor->size()), "Wrong output size");

// dequantize
quantize_util_.Dequantize(
output_infos[i].buf.get(),
size,
output_infos[i].scale,
output_infos[i].zero_point,
reinterpret_cast<float*>(tensor->raw_mutable_data()));
}

return true;
}

bool ApuWrapper::Uninit() {
bool ret = frontend->UninitGraph();
frontend = nullptr;
input_infos.clear();
output_infos.clear();
return ret;
}

} // namespace mace
Loading

0 comments on commit 2d650b6

Please sign in to comment.