hz
This commit is contained in:
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
@@ -0,0 +1,76 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_dotprod_platform.h"
|
||||
#if (dsps_dotprod_s16_aes3_enabled == 1)
|
||||
#include "dsp_err_codes.h"
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_dp_s8_aes3
|
||||
.type dsps_dp_s8_aes3,@function
|
||||
|
||||
|
||||
//esp_err_t dsps_dp_s8_aes3(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
|
||||
dsps_dp_s8_aes3:
|
||||
// src1 - a2
|
||||
// src2 - a3
|
||||
// dest - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 48
|
||||
|
||||
// Check minimum length
|
||||
movi a8, 4
|
||||
blt a5, a8, .dsps_dp_s8_aes3_error
|
||||
|
||||
// len must be a multiple of 16 for the vectorized path
|
||||
extui a8, a5, 0, 4
|
||||
bnez.n a8, .dsps_dp_s8_aes3_via_ansi
|
||||
/* DEBUG: всегда ANSI; убрать перед релизом */
|
||||
|
||||
movi.n a14,0
|
||||
wur.accx_0 a14
|
||||
wur.accx_1 a14
|
||||
ee.vld.128.ip q0,a2,16
|
||||
ee.vld.128.ip q1,a3,16
|
||||
|
||||
srli a5, a5, 4
|
||||
|
||||
loopnez a5,.loop_dsps_dp_s8_aes3
|
||||
ee.vmulas.s8.accx.ld.ip q1,a3,16, q0,q1
|
||||
.loop_dsps_dp_s8_aes3: ee.vld.128.ip q0,a2,16
|
||||
|
||||
|
||||
rur.accx_0 a14
|
||||
s32i a14, a4, 0
|
||||
|
||||
movi.n a2, 0
|
||||
retw.n
|
||||
|
||||
.dsps_dp_s8_aes3_error:
|
||||
movi.n a2, ESP_ERR_DSP_INVALID_LENGTH
|
||||
retw.n
|
||||
|
||||
.dsps_dp_s8_aes3_via_ansi:
|
||||
mov.n a10, a2
|
||||
mov.n a11, a3
|
||||
mov.n a12, a4
|
||||
mov.n a13, a5
|
||||
.type dsps_dp_s8_ansi, @function
|
||||
call8 dsps_dp_s8_ansi
|
||||
movi.n a2, 0
|
||||
retw.n
|
||||
|
||||
#endif // dsps_dotprod_s16_ae32_enabled
|
||||
@@ -0,0 +1,11 @@
|
||||
#include "dsps_dotprod.h"
|
||||
|
||||
esp_err_t dsps_dp_s8_ansi(const int8_t *src1, const int8_t *src2, int32_t *dest, int len)
|
||||
{
|
||||
int32_t acc = 0;
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
acc += (int32_t)src1[i] * (int32_t)src2[i];
|
||||
}
|
||||
*dest = acc;
|
||||
return ESP_OK;
|
||||
}
|
||||
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
+1
-1
@@ -13,7 +13,7 @@
|
||||
#endif
|
||||
#endif // __XTENSA__
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32P4
|
||||
#if CONFIG_IDF_TARGET_ESP32P4 || CONFIG_IDF_TARGET_ESP32S31
|
||||
#ifdef CONFIG_DSP_OPTIMIZED
|
||||
#define dspi_dotprod_arp4_enabled 1
|
||||
#else
|
||||
|
||||
Executable → Regular
+32
-3
@@ -26,6 +26,28 @@ extern "C"
|
||||
#endif
|
||||
// These functions calculates dotproduct of two vectors.
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief dot product of two 8 bit vectors
|
||||
* Dot product calculation for two signed 8 bit arrays: *dest += (src1[i] * src2[i]); i= [0..N)
|
||||
* The result is stored in a 32 bit integer without any shift.
|
||||
* This function could be used as a primitive for other dot product functions.
|
||||
* The extension (_ansi) use ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_aes3) is optimized for ESP32S3 chip.
|
||||
*
|
||||
* @param[in] src1 source array 1
|
||||
* @param[in] src2 source array 2
|
||||
* @param dest destination pointer
|
||||
* @param[in] len length of input arrays
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_dp_s8_ansi(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
|
||||
esp_err_t dsps_dp_s8_aes3(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief dot product of two 16 bit vectors
|
||||
@@ -97,12 +119,18 @@ esp_err_t dsps_dotprode_f32_arp4(const float *src1, const float *src2, float *de
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#if (dsps_dotprod_s16_ae32_enabled == 1)
|
||||
#if (dsps_dotprod_s16_aes3_enabled == 1)
|
||||
#define dsps_dotprod_s16 dsps_dotprod_s16_ae32
|
||||
#define dsps_dp_s8 dsps_dp_s8_aes3
|
||||
#elif (dsps_dotprod_s16_ae32_enabled == 1)
|
||||
#define dsps_dotprod_s16 dsps_dotprod_s16_ae32
|
||||
#define dsps_dp_s8 dsps_dp_s8_ansi
|
||||
#elif (dsps_dotprod_s16_arp4_enabled == 1)
|
||||
#define dsps_dotprod_s16 dsps_dotprod_s16_arp4
|
||||
#define dsps_dp_s8 dsps_dp_s8_ansi
|
||||
#else
|
||||
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
|
||||
#define dsps_dp_s8 dsps_dp_s8_ansi
|
||||
#endif // dsps_dotprod_s16_ae32_enabled
|
||||
|
||||
#if (dsps_dotprod_f32_aes3_enabled == 1)
|
||||
@@ -120,9 +148,10 @@ esp_err_t dsps_dotprode_f32_arp4(const float *src1, const float *src2, float *de
|
||||
#endif // dsps_dotprod_f32_ae32_enabled
|
||||
|
||||
#else // CONFIG_DSP_OPTIMIZED
|
||||
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
|
||||
#define dsps_dotprod_f32 dsps_dotprod_f32_ansi
|
||||
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
|
||||
#define dsps_dotprod_f32 dsps_dotprod_f32_ansi
|
||||
#define dsps_dotprode_f32 dsps_dotprode_f32_ansi
|
||||
#define dsps_dp_s8 dsps_dp_s8_ansi
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif // _DSPI_DOTPROD_H_
|
||||
|
||||
Executable → Regular
+1
-2
@@ -28,7 +28,7 @@
|
||||
#define dsps_dotprod_f32_aes3_enabled 1
|
||||
#endif
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32P4
|
||||
#if CONFIG_IDF_TARGET_ESP32P4 || CONFIG_IDF_TARGET_ESP32S31
|
||||
#ifdef CONFIG_DSP_OPTIMIZED
|
||||
#define dsps_dotprod_s16_arp4_enabled 1
|
||||
#define dsps_dotprod_f32_arp4_enabled 1
|
||||
@@ -38,5 +38,4 @@
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
#endif
|
||||
|
||||
|
||||
#endif // _dsps_dotprod_platform_H_
|
||||
|
||||
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Executable → Regular
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user