This commit is contained in:
2026-05-06 20:17:26 +07:00
parent 801dac9328
commit d6a8619e70
589 changed files with 3132 additions and 2405 deletions
View File
View File
View File
View File
View File
View File
View File
@@ -0,0 +1,76 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod_platform.h"
#if (dsps_dotprod_s16_aes3_enabled == 1)
#include "dsp_err_codes.h"
.text
.align 4
.global dsps_dp_s8_aes3
.type dsps_dp_s8_aes3,@function
//esp_err_t dsps_dp_s8_aes3(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
dsps_dp_s8_aes3:
// src1 - a2
// src2 - a3
// dest - a4
// len - a5
entry a1, 48
// Check minimum length
movi a8, 4
blt a5, a8, .dsps_dp_s8_aes3_error
// len must be a multiple of 16 for the vectorized path
extui a8, a5, 0, 4
bnez.n a8, .dsps_dp_s8_aes3_via_ansi
/* DEBUG: всегда ANSI; убрать перед релизом */
movi.n a14,0
wur.accx_0 a14
wur.accx_1 a14
ee.vld.128.ip q0,a2,16
ee.vld.128.ip q1,a3,16
srli a5, a5, 4
loopnez a5,.loop_dsps_dp_s8_aes3
ee.vmulas.s8.accx.ld.ip q1,a3,16, q0,q1
.loop_dsps_dp_s8_aes3: ee.vld.128.ip q0,a2,16
rur.accx_0 a14
s32i a14, a4, 0
movi.n a2, 0
retw.n
.dsps_dp_s8_aes3_error:
movi.n a2, ESP_ERR_DSP_INVALID_LENGTH
retw.n
.dsps_dp_s8_aes3_via_ansi:
mov.n a10, a2
mov.n a11, a3
mov.n a12, a4
mov.n a13, a5
.type dsps_dp_s8_ansi, @function
call8 dsps_dp_s8_ansi
movi.n a2, 0
retw.n
#endif // dsps_dotprod_s16_ae32_enabled
@@ -0,0 +1,11 @@
#include "dsps_dotprod.h"
esp_err_t dsps_dp_s8_ansi(const int8_t *src1, const int8_t *src2, int32_t *dest, int len)
{
int32_t acc = 0;
for (int i = 0 ; i < len ; i++) {
acc += (int32_t)src1[i] * (int32_t)src2[i];
}
*dest = acc;
return ESP_OK;
}
@@ -13,7 +13,7 @@
#endif
#endif // __XTENSA__
#if CONFIG_IDF_TARGET_ESP32P4
#if CONFIG_IDF_TARGET_ESP32P4 || CONFIG_IDF_TARGET_ESP32S31
#ifdef CONFIG_DSP_OPTIMIZED
#define dspi_dotprod_arp4_enabled 1
#else
+32 -3
View File
@@ -26,6 +26,28 @@ extern "C"
#endif
// These functions calculates dotproduct of two vectors.
/**@{*/
/**
* @brief dot product of two 8 bit vectors
* Dot product calculation for two signed 8 bit arrays: *dest += (src1[i] * src2[i]); i= [0..N)
* The result is stored in a 32 bit integer without any shift.
* This function could be used as a primitive for other dot product functions.
* The extension (_ansi) use ANSI C and could be compiled and run on any platform.
* The extension (_aes3) is optimized for ESP32S3 chip.
*
* @param[in] src1 source array 1
* @param[in] src2 source array 2
* @param dest destination pointer
* @param[in] len length of input arrays
* @return
* - ESP_OK on success
* - One of the error codes from DSP library
*/
esp_err_t dsps_dp_s8_ansi(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
esp_err_t dsps_dp_s8_aes3(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
/**@}*/
/**@{*/
/**
* @brief dot product of two 16 bit vectors
@@ -97,12 +119,18 @@ esp_err_t dsps_dotprode_f32_arp4(const float *src1, const float *src2, float *de
#if CONFIG_DSP_OPTIMIZED
#if (dsps_dotprod_s16_ae32_enabled == 1)
#if (dsps_dotprod_s16_aes3_enabled == 1)
#define dsps_dotprod_s16 dsps_dotprod_s16_ae32
#define dsps_dp_s8 dsps_dp_s8_aes3
#elif (dsps_dotprod_s16_ae32_enabled == 1)
#define dsps_dotprod_s16 dsps_dotprod_s16_ae32
#define dsps_dp_s8 dsps_dp_s8_ansi
#elif (dsps_dotprod_s16_arp4_enabled == 1)
#define dsps_dotprod_s16 dsps_dotprod_s16_arp4
#define dsps_dp_s8 dsps_dp_s8_ansi
#else
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
#define dsps_dp_s8 dsps_dp_s8_ansi
#endif // dsps_dotprod_s16_ae32_enabled
#if (dsps_dotprod_f32_aes3_enabled == 1)
@@ -120,9 +148,10 @@ esp_err_t dsps_dotprode_f32_arp4(const float *src1, const float *src2, float *de
#endif // dsps_dotprod_f32_ae32_enabled
#else // CONFIG_DSP_OPTIMIZED
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
#define dsps_dotprod_f32 dsps_dotprod_f32_ansi
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
#define dsps_dotprod_f32 dsps_dotprod_f32_ansi
#define dsps_dotprode_f32 dsps_dotprode_f32_ansi
#define dsps_dp_s8 dsps_dp_s8_ansi
#endif // CONFIG_DSP_OPTIMIZED
#endif // _DSPI_DOTPROD_H_
@@ -28,7 +28,7 @@
#define dsps_dotprod_f32_aes3_enabled 1
#endif
#if CONFIG_IDF_TARGET_ESP32P4
#if CONFIG_IDF_TARGET_ESP32P4 || CONFIG_IDF_TARGET_ESP32S31
#ifdef CONFIG_DSP_OPTIMIZED
#define dsps_dotprod_s16_arp4_enabled 1
#define dsps_dotprod_f32_arp4_enabled 1
@@ -38,5 +38,4 @@
#endif // CONFIG_DSP_OPTIMIZED
#endif
#endif // _dsps_dotprod_platform_H_

Some files were not shown because too many files have changed in this diff Show More