Update prebuilt-sdk to 6.4.16

Type: Code Improvement Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
VeriSilicon · Nov 1, 2023 · 44ba712 · 44ba712
1 parent 1e484f7
commit 44ba712
Show file tree

Hide file tree

Showing 17 changed files with 487 additions and 12 deletions.
diff --git a/prebuilt-sdk/x86_64_linux/VERSION b/prebuilt-sdk/x86_64_linux/VERSION
@@ -1 +1 @@
-6.4.15_CL690884A_D690855_A690484_R690194_T690259_O688896
+6.4.16_CL733538A_D733484_A732742_R733271_T733538_O82cb0ee
diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h
@@ -515,27 +515,35 @@ enum vx_kernel_e {
 
     VX_KERNEL_NN_LOGICAL_OPS_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x39,
 
-    VX_KERNEL_NN_LOGICAL_NOT_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x40,
+    VX_KERNEL_NN_LOGICAL_NOT_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x3A,
 
-    VX_KERNEL_NN_RELATIONAL_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x41,
+    VX_KERNEL_NN_RELATIONAL_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x3B,
 
-    VX_KERNEL_NN_TENSOR_REDUCE_MAX = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x42,
+    VX_KERNEL_NN_TENSOR_REDUCE_MAX = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x3C,
 
-    VX_KERNEL_NN_MAXIMUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x43,
+    VX_KERNEL_NN_MAXIMUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x3D,
 
-    VX_KERNEL_NN_MINIMUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x44,
+    VX_KERNEL_NN_MINIMUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x3E,
 
-    VX_KERNEL_NN_TENSOR_SELECT_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x45,
+    VX_KERNEL_NN_TENSOR_SELECT_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x3F,
 
-    VX_KERNEL_NN_REDUCE_SUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x46,
+    VX_KERNEL_NN_REDUCE_SUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x40,
 
-    VX_KERNEL_NN_GRU_CELL_ACTIVATION_Z_H_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x47,
+    VX_KERNEL_NN_GRU_CELL_ACTIVATION_Z_H_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x41,
 
-    VX_KERNEL_NN_GRU_CELL_H_TIMES_ACTIVATION_R_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x48,
+    VX_KERNEL_NN_GRU_CELL_H_TIMES_ACTIVATION_R_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x42,
 
-    VX_KERNEL_NN_GRU_CELL_RESET_AFTER_ACTIVATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x49,
+    VX_KERNEL_NN_GRU_CELL_RESET_AFTER_ACTIVATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x43,
 
-    VX_KERNEL_NN_LSTM_ACTIVATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x50,
+    VX_KERNEL_NN_LSTM_ACTIVATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x44,
+
+    VX_KERNEL_NN_BATCH_GEMM_RELU_POOLING_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x45,
+
+    VX_KERNEL_NN_TENSOR_POW = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x51,
+
+    VX_KERNEL_NN_TENSOR_GATHER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x52,
+
+    VX_KERNEL_NN_TENSOR_TILE = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x53,
 
     VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */
 };

diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h
@@ -27,6 +27,7 @@
 
 #ifndef __VX_KHR_COMPATIBLE_H__
 #define __VX_KHR_COMPATIBLE_H__
+#endif
 /*
  VX_DECONVOLUTION_WEIGHT_LAYOUT_COMPATIBLE_KHRONOS is used to distingush deconvolution weight layout
  [value]
@@ -247,6 +248,36 @@ VX_STREAM_PROCESSOR_SUPPORT is used to declare that vsi openvx driver can suppor
 #define VX_INVALIDATE_HANDLE_SUPPORT 1
 #endif
 
+/*
+ VX_GRAPH_V500_DMA_SUPPOPRT is used to declare driver support V500 DMA.
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_GRAPH_V500_DMA_SUPPOPRT
+#define VX_GRAPH_V500_DMA_SUPPOPRT 1
+#endif
+
+/*
+ VX_GRAPH_V500_RGB2RAW_REFOUTPUTCROP_SUPPOPRT is used to declare driver support V500 RGB2Raw & RefOutCropping feature.
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_GRAPH_V500_RGB2RAW_REFOUTPUTCROP_SUPPOPRT
+#define VX_GRAPH_V500_RGB2RAW_REFOUTPUTCROP_SUPPOPRT 1
+#endif
+
+/*
+ VX_GRAPH_V500_EXTRACTOR_FILLER_CROP_SUPPORT is used to declare driver support V500 DMA extractor/filler cropping setting.
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_GRAPH_V500_EXTRACTOR_FILLER_CROP_SUPPORT
+#define VX_GRAPH_V500_EXTRACTOR_FILLER_CROP_SUPPORT 1
+#endif
+
 /*
  VX_ACTIVATION_EXT2_SUPPORT is used to declare that ACTIVATION can support sign, hard_sigmoid, neg, clip, exp, sin, cos,
  log, mish, gelu, hgelu, elu, selu, celu, rcp, softsign, atan, atanh, acosh, inverse sigmoid, round and erf.
@@ -398,4 +429,105 @@ VX_DEPTH2SPACE_CRD_MODE_SUPPORT is used to declare that SPACE2DEPTH can support
 #define VX_LSTM_ACTIVATION_SUPPORT 1
 #endif
 
+#ifndef VX_GRAPH_TRANSFORM_OPTION_SUPPORT
+#define VX_GRAPH_TRANSFORM_OPTION_SUPPORT 1
+#endif
+
+/*
+ VX_GRAPH_V500_NEW_DPP_SUPPORT is used to declare driver support V500 new DPP parameter setting.
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_GRAPH_V500_NEW_DPP_SUPPORT
+#define VX_GRAPH_V500_NEW_DPP_SUPPORT 1
+#endif
+
+/*
+ VX_GRAPH_V500_UNALIGNED_HEIGHT_SUPPORT is used to declare driver support unaligned dma in/out height.
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_GRAPH_V500_UNALIGNED_HEIGHT_SUPPORT
+#define VX_GRAPH_V500_UNALIGNED_HEIGHT_SUPPORT 1
+#endif
+
+/*
+ VX_GRAPH_V500_STRIPES_MEMORY_ALIGNMENT_SUPPORT is used to declare driver support strips memory alignment requirememt.
+*/
+#ifndef VX_GRAPH_V500_STRIPES_MEMORY_ALIGNMENT_SUPPORT
+#define VX_GRAPH_V500_STRIPES_MEMORY_ALIGNMENT_SUPPORT 1
+
+/*
+VX_TENSOR_POW_API_SUPPORT is used to declare that vsi openvx driver can support vxTensorPowNode API
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_TENSOR_POW_API_SUPPORT
+#define VX_TENSOR_POW_API_SUPPORT 1
+#endif
+
+/*
+VX_TENSOR_GATHER_API_SUPPORT is used to declare that vsi openvx driver can support vxTensorGatherNode API
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_TENSOR_GATHER_API_SUPPORT
+#define VX_TENSOR_GATHER_API_SUPPORT 1
+#endif
+
+/*
+VX_RELATIONAL_OPS_VX_SUPPORT_EXT is used to declare that vsi openvx driver can support vxRelationalLayer API
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_RELATIONAL_OPS_VX_SUPPORT_EXT
+#define VX_RELATIONAL_OPS_VX_SUPPORT_EXT 1
+
+#endif
+
+/*
+VX_TENSOR_POW_API_SUPPORT is used to declare that vsi openvx driver can support vxTensorPOWNode API
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_TENSOR_POW_API_SUPPORT
+#define VX_TENSOR_POW_API_SUPPORT 1
+#endif
+
+/*
+VX_TENSOR_GATHER_API_SUPPORT is used to declare that vsi openvx driver can support vxTensorGatherNode API
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_TENSOR_GATHER_API_SUPPORT
+#define VX_TENSOR_GATHER_API_SUPPORT 1
+#endif
+
+/*
+VX_TENSOR_TILE_API_SUPPORT is used to declare that vsi openvx driver can support vxTensorTileNode API
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_TENSOR_TILE_API_SUPPORT
+#define VX_TENSOR_TILE_API_SUPPORT 1
+#endif
+
+/*
+VX_RELATIONAL_OPS_VX_SUPPORT_EXT is used to declare that vsi openvx driver can support vxRelationalLayer API
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_RELATIONAL_OPS_VX_SUPPORT_EXT
+#define VX_RELATIONAL_OPS_VX_SUPPORT_EXT 1
+#endif
+
 #endif /* __VX_KHR_COMPATIBLE_H__ */
diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn.h
@@ -65,6 +65,8 @@ enum vx_graph_attribute_internal_type_e
     VX_GRAPH_PROCESS_FPS                          = VX_ATTRIBUTE_BASE(VX_ID_VIVANTE, VX_TYPE_GRAPH) + 0x9,
     /*This parameter.come from customer, not used by unify driver but lite driver*/
     VX_GRAPH_CUSTOMER_PARAMETER_FOR_NBG           = VX_ATTRIBUTE_BASE(VX_ID_VIVANTE, VX_TYPE_GRAPH) + 0xA,
+    VX_GRAPH_V500_DMA_CONFIG_PARAMETER            = VX_ATTRIBUTE_BASE(VX_ID_VIVANTE, VX_TYPE_GRAPH) + 0xB,
+    VX_GRAPH_VSI_TRANSFORM_OPTIONS                = VX_ATTRIBUTE_BASE(VX_ID_VIVANTE, VX_TYPE_GRAPH) + 0xC,
 };
 
 /*! \brief Size Alignment of User Memory
@@ -456,6 +458,15 @@ typedef struct _vx_nn_deconvolution_3d_params_t
     vx_enum down_scale_size_rounding;       /*!< \brief Rounding method for calculating output dimensions. See <tt>\ref vx_nn_rounding_type_e</tt> */
 }vx_nn_deconvolution_3d_params_t;
 
+typedef enum _vx_nn_relation_ops_type_t {
+    VX_RELATION_OPS_GREAT = 0,
+    VX_RELATION_OPS_OPS_GREAT_EQUAL,
+    VX_RELATION_OPS_OPS_LESS,
+    VX_RELATION_OPS_OPS_LESS_EQUAL,
+    VX_RELATION_OPS_OPS_NOT_EQUAL,
+    VX_RELATION_OPS_OPS_EQUAL,
+} vx_nn_relation_op_type_t;
+
 /*==============================================================================
     TENSOR DATA FUNCTIONS
 =============================================================================*/

diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h
@@ -268,6 +268,12 @@ typedef struct _vx_nn_fused_sp_params_t
             vx_scalar clip_a, clip_b;
         } clip;
         struct
+        {
+            vx_scalar eps_a;
+            vx_float32 eps;
+            vx_int32 axis;
+        } rms_norm;
+        struct
         {
             vx_scalar scalar_a, scalar_b, scalar_c, scalar_d;
         } params;
@@ -1146,6 +1152,15 @@ typedef struct _vx_nn_gemm_relu_pooling_params_t
     vx_float32 const_multiplier;                            /*!< \brief  const multiplier */
 } vx_nn_gemm_relu_pooling_params_t, * vx_nn_gemm_relu_pooling_params;
 
+typedef struct _vx_nn_gemm_relu_pooling_params_ext_t
+{
+    vx_nn_gemm_relu_pooling_params_t base; /*!< \brief gemm relu pooling params <tt>\ref vx_nn_gemm_relu_pooling_params_t</tt> */
+    vx_object_array inputs_list;           /*!< \brief  streamProcessor input array */
+    vx_object_array outputs_list;          /*!< \brief  streamProcessor output array */
+    vx_nn_fused_sp_params_t  sp_param;            /*!< \brief  stresmProcessor instruction object*/
+} vx_nn_gemm_relu_pooling_params_ext_t, * vx_nn_gemm_relu_pooling_params_ext;
+
+
 /*! \brief Create a batch gemm node, the calcution formula is output = matrix_a * matrix_b + matrix_c.
  * \param [in] graph The reference to the graph.
  * \param [in] matrix_a The first input tensor.
@@ -1171,6 +1186,61 @@ VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmReluPoolingLayer(vx_graph graph,
                                                              const vx_nn_gemm_relu_pooling_params merge_param,
                                                              vx_tensor output);
 
+/*! \brief Create a batch gemm node, the calcution formula is output = matrix_a * matrix_b + matrix_c.
+ * \param [in] graph The reference to the graph.
+ * \param [in] matrix_a The first input tensor.
+ * \param [in] matrix_b The second input tensor. Must be in the same data type and batch count as first input tensor.
+ * \param [in] matrix_c The third input tensor. Must be in the same data type and batch count as first input tensor. [optional]
+ * \param [in] trans_a If true, the matrix_a has been transposed before calcution.
+ * \param [in] trans_b If true, the matrix_b has been transposed before calcution.
+ * \param [in] trans_c If true, the matrix_c has been transposed before calcution. [optional]
+ * \param [in] merge_param the parameters for gemm + op merging, refer to vx_nn_gemm_relu_pooling_params_t and vx_nn_gemm_relu_pooling_params_ext_t.
+ * \param [in] size of merge_param.
+ * \param [out] output The output tensor. Output dimension must agree the formula in the description.
+ * \return <tt>\ref vx_node</tt>.
+ * \retval vx_node A node reference. Any possible errors preventing a successful creation
+ * should be checked using <tt>\ref vxGetStatus</tt>
+ * \ingroup group_vision_function_gemm
+ */
+VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmReluPoolingLayer2(vx_graph graph,
+                                                              vx_tensor matrix_a,
+                                                              vx_tensor matrix_b,
+                                                              vx_tensor matrix_c,
+                                                              vx_scalar trans_a,
+                                                              vx_scalar trans_b,
+                                                              vx_scalar trans_c,
+                                                              const vx_nn_gemm_relu_pooling_params merge_param,
+                                                              vx_size size_of_nn_gemm_relu_pooling_params,
+                                                              vx_tensor output);
+
+/*! \brief Create a batch gemm node, the calcution formula is output = matrix_a * matrix_b + matrix_c.
+ * \param [in] graph The reference to the graph.
+ * \param [in] matrix_a The first input tensor.
+ * \param [in] matrix_b The second input tensor. Must be in the same data type and batch count as first input tensor.
+ * \param [in] matrix_c The third input tensor. Must be in the same data type and batch count as first input tensor. [optional]
+ * \param [in] trans_a If true, the matrix_a has been transposed before calcution.
+ * \param [in] trans_b If true, the matrix_b has been transposed before calcution.
+ * \param [in] trans_c If true, the matrix_c has been transposed before calcution. [optional]
+ * \param [in] merge_param the parameters for gemm + op merging
+ * \param [in] size_of_gemm_relu_pooling_params [static] Size in bytes of merge_param.
+ * \param [out] output The output tensor. Output dimension must agree the formula in the description.
+ * \return <tt>\ref vx_node</tt>.
+ * \retval vx_node A node reference. Any possible errors preventing a successful creation
+ * should be checked using <tt>\ref vxGetStatus</tt>
+ * \ingroup group_vision_function_gemm
+ */
+VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmReluPoolingSpLayer(vx_graph graph,
+                                                             vx_tensor matrix_a,
+                                                             vx_tensor matrix_b,
+                                                             vx_tensor matrix_c,
+                                                             vx_scalar trans_a,
+                                                             vx_scalar trans_b,
+                                                             vx_scalar trans_c,
+                                                             const vx_nn_gemm_relu_pooling_params merge_param,
+                                                             vx_size size_of_gemm_relu_pooling_params,
+                                                             vx_tensor output);
+
+
 /*! \brief  Create a fuse stream process node.
  * \param [in] graph The handle to the graph.
  * \param [in] input_list input tensor list.

diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_nodes.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_nodes.h
@@ -997,6 +997,55 @@ VX_API_ENTRY vx_node VX_API_CALL vxStreamProcessorNode(
     vx_lut_params               lut_params
     );
 
+/*! \brief [Graph] Creates a tensor pow node to perform input^y.
+ * \param [in] graph The handle to the graph.
+ * \param [in] input The input tensor
+ * \param [in] y The power param.
+ * \param [out] output The output tensor data.
+ * \return <tt> vx_node</tt>.
+ * \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
+ * successful creation should be checked using <tt>\ref vxGetStatus</tt>.
+ */
+VX_API_ENTRY vx_node VX_API_CALL vxTensorPowNode(
+    vx_graph graph,
+    vx_tensor input,
+    vx_tensor y,
+    vx_tensor out);
+
+/*! \brief [Graph] Creates a tensor gather node.
+ * \param [in] graph The handle to the graph.
+ * \param [in] input The input tensor
+ * \param [in] indices The indices param.
+ * \param [in] axis The axis param.
+ * \param [in] batch_dims The batch_dims param.
+ * \param [out] output The output tensor data.
+ * \return <tt> vx_node</tt>.
+ * \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
+ * successful creation should be checked using <tt>\ref vxGetStatus</tt>.
+ */
+VX_API_ENTRY vx_node VX_API_CALL vxTensorGatherNode(
+    vx_graph graph,
+    vx_tensor input,
+    vx_tensor indices,
+    vx_int32 axis,
+    vx_int32 batch_dims,
+    vx_tensor out);
+
+/*! \brief [Graph] Creates a tensor tile node.
+ * \param [in] graph The handle to the graph.
+ * \param [in] input The input tensor
+ * \param [in] multiples The multiples param.
+ * \param [out] output The output tensor data.
+ * \return <tt> vx_node</tt>.
+ * \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
+ * successful creation should be checked using <tt>\ref vxGetStatus</tt>.
+ */
+VX_API_ENTRY vx_node VX_API_CALL vxTensorTileNode(
+    vx_graph graph,
+    vx_tensor input,
+    vx_tensor multiples,
+    vx_tensor out);
+
 #ifdef __cplusplus
 }
 #endif