This commit is contained in:
2026-05-06 20:15:17 +07:00
parent d429ce6d2c
commit 801dac9328
590 changed files with 2406 additions and 3133 deletions
@@ -1,76 +0,0 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dsps_dotprod_platform.h"
#if (dsps_dotprod_s16_aes3_enabled == 1)
#include "dsp_err_codes.h"
.text
.align 4
.global dsps_dp_s8_aes3
.type dsps_dp_s8_aes3,@function
//esp_err_t dsps_dp_s8_aes3(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
dsps_dp_s8_aes3:
// src1 - a2
// src2 - a3
// dest - a4
// len - a5
entry a1, 48
// Check minimum length
movi a8, 4
blt a5, a8, .dsps_dp_s8_aes3_error
// len must be a multiple of 16 for the vectorized path
extui a8, a5, 0, 4
bnez.n a8, .dsps_dp_s8_aes3_via_ansi
/* DEBUG: всегда ANSI; убрать перед релизом */
movi.n a14,0
wur.accx_0 a14
wur.accx_1 a14
ee.vld.128.ip q0,a2,16
ee.vld.128.ip q1,a3,16
srli a5, a5, 4
loopnez a5,.loop_dsps_dp_s8_aes3
ee.vmulas.s8.accx.ld.ip q1,a3,16, q0,q1
.loop_dsps_dp_s8_aes3: ee.vld.128.ip q0,a2,16
rur.accx_0 a14
s32i a14, a4, 0
movi.n a2, 0
retw.n
.dsps_dp_s8_aes3_error:
movi.n a2, ESP_ERR_DSP_INVALID_LENGTH
retw.n
.dsps_dp_s8_aes3_via_ansi:
mov.n a10, a2
mov.n a11, a3
mov.n a12, a4
mov.n a13, a5
.type dsps_dp_s8_ansi, @function
call8 dsps_dp_s8_ansi
movi.n a2, 0
retw.n
#endif // dsps_dotprod_s16_ae32_enabled
@@ -1,11 +0,0 @@
#include "dsps_dotprod.h"
esp_err_t dsps_dp_s8_ansi(const int8_t *src1, const int8_t *src2, int32_t *dest, int len)
{
int32_t acc = 0;
for (int i = 0 ; i < len ; i++) {
acc += (int32_t)src1[i] * (int32_t)src2[i];
}
*dest = acc;
return ESP_OK;
}
@@ -13,7 +13,7 @@
#endif
#endif // __XTENSA__
#if CONFIG_IDF_TARGET_ESP32P4 || CONFIG_IDF_TARGET_ESP32S31
#if CONFIG_IDF_TARGET_ESP32P4
#ifdef CONFIG_DSP_OPTIMIZED
#define dspi_dotprod_arp4_enabled 1
#else
+3 -32
View File
@@ -26,28 +26,6 @@ extern "C"
#endif
// These functions calculates dotproduct of two vectors.
/**@{*/
/**
* @brief dot product of two 8 bit vectors
* Dot product calculation for two signed 8 bit arrays: *dest += (src1[i] * src2[i]); i= [0..N)
* The result is stored in a 32 bit integer without any shift.
* This function could be used as a primitive for other dot product functions.
* The extension (_ansi) use ANSI C and could be compiled and run on any platform.
* The extension (_aes3) is optimized for ESP32S3 chip.
*
* @param[in] src1 source array 1
* @param[in] src2 source array 2
* @param dest destination pointer
* @param[in] len length of input arrays
* @return
* - ESP_OK on success
* - One of the error codes from DSP library
*/
esp_err_t dsps_dp_s8_ansi(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
esp_err_t dsps_dp_s8_aes3(const int8_t *src1, const int8_t *src2, int32_t *dest, int len);
/**@}*/
/**@{*/
/**
* @brief dot product of two 16 bit vectors
@@ -119,18 +97,12 @@ esp_err_t dsps_dotprode_f32_arp4(const float *src1, const float *src2, float *de
#if CONFIG_DSP_OPTIMIZED
#if (dsps_dotprod_s16_aes3_enabled == 1)
#if (dsps_dotprod_s16_ae32_enabled == 1)
#define dsps_dotprod_s16 dsps_dotprod_s16_ae32
#define dsps_dp_s8 dsps_dp_s8_aes3
#elif (dsps_dotprod_s16_ae32_enabled == 1)
#define dsps_dotprod_s16 dsps_dotprod_s16_ae32
#define dsps_dp_s8 dsps_dp_s8_ansi
#elif (dsps_dotprod_s16_arp4_enabled == 1)
#define dsps_dotprod_s16 dsps_dotprod_s16_arp4
#define dsps_dp_s8 dsps_dp_s8_ansi
#else
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
#define dsps_dp_s8 dsps_dp_s8_ansi
#endif // dsps_dotprod_s16_ae32_enabled
#if (dsps_dotprod_f32_aes3_enabled == 1)
@@ -148,10 +120,9 @@ esp_err_t dsps_dotprode_f32_arp4(const float *src1, const float *src2, float *de
#endif // dsps_dotprod_f32_ae32_enabled
#else // CONFIG_DSP_OPTIMIZED
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
#define dsps_dotprod_f32 dsps_dotprod_f32_ansi
#define dsps_dotprod_s16 dsps_dotprod_s16_ansi
#define dsps_dotprod_f32 dsps_dotprod_f32_ansi
#define dsps_dotprode_f32 dsps_dotprode_f32_ansi
#define dsps_dp_s8 dsps_dp_s8_ansi
#endif // CONFIG_DSP_OPTIMIZED
#endif // _DSPI_DOTPROD_H_
@@ -28,7 +28,7 @@
#define dsps_dotprod_f32_aes3_enabled 1
#endif
#if CONFIG_IDF_TARGET_ESP32P4 || CONFIG_IDF_TARGET_ESP32S31
#if CONFIG_IDF_TARGET_ESP32P4
#ifdef CONFIG_DSP_OPTIMIZED
#define dsps_dotprod_s16_arp4_enabled 1
#define dsps_dotprod_f32_arp4_enabled 1
@@ -38,4 +38,5 @@
#endif // CONFIG_DSP_OPTIMIZED
#endif
#endif // _dsps_dotprod_platform_H_
@@ -1,82 +0,0 @@
// Copyright 2026 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include <malloc.h>
#include "dsps_dotprod.h"
#include "dsp_tests.h"
static const char *TAG = "dsps_dp_s8_aeXX";
// Test dsps_dp_s8_aeXX function
TEST_CASE("dsps_dp_s8_aeXX functionality", "[dsps]")
{
int32_t check_value = 123;
int max_N = 1024;
int8_t *x = (int8_t *)memalign(16, max_N * sizeof(int8_t));
int8_t *y = (int8_t *)memalign(16, max_N * sizeof(int8_t));
int32_t *z = (int32_t *)memalign(16, max_N * sizeof(int32_t));
for (int i = 0 ; i < max_N ; i++) {
x[i] = 1;
y[i] = 10;
}
// Check result == 0
for (int i = 4; i < 33; i++) {
esp_err_t status = dsps_dp_s8(x, y, &z[1], i);
TEST_ASSERT_EQUAL(status, ESP_OK);
check_value = i * 10;
ESP_LOGD(TAG, "check_value for i = %d: %d, z[1]: %d", (int)i, (int)check_value, (int)z[1]);
TEST_ASSERT_EQUAL(check_value, z[1]);
}
free(x);
free(y);
free(z);
}
TEST_CASE("dsps_dp_s8_aeXX benchmark", "[dsps]")
{
int max_N = 1024;
int8_t *x = (int8_t *)memalign(16, max_N * sizeof(int8_t));
int8_t *y = (int8_t *)memalign(16, max_N * sizeof(int8_t));
int32_t *z = (int32_t *)memalign(16, max_N * sizeof(int32_t));
for (int i = 0 ; i < max_N ; i++) {
x[i] = 1;
y[i] = 10;
}
int check_N_max[] = {127, 128, 255, 256, 511, 512, 1023, 1024};
for (int i = 0 ; i < sizeof(check_N_max) / sizeof(check_N_max[0]) ; i++) {
int N = check_N_max[i];
unsigned int start_b = dsp_get_cpu_cycle_count();
int repeat_count = 128;
for (int j = 0 ; j < repeat_count ; j++) {
dsps_dp_s8(x, y, &z[1], N);
}
unsigned int end_b = dsp_get_cpu_cycle_count();
float cycles = end_b - start_b;
ESP_LOGI(TAG, "dsps_dp_s8_aeXX - %f cycles for %d samples", cycles / repeat_count, N);
}
free(x);
free(y);
free(z);
}
@@ -1,79 +0,0 @@
// Copyright 2026 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include <malloc.h>
#include "dsps_dotprod.h"
#include "dsp_tests.h"
static const char *TAG = "dsps_dp_s8_ansi";
// Test dsps_dp_s8_ansi function
TEST_CASE("dsps_dp_s8_ansi functionality", "[dsps]")
{
int32_t check_value = 123;
int max_N = 1024;
int8_t *x = (int8_t *)memalign(16, max_N * sizeof(int8_t));
int8_t *y = (int8_t *)memalign(16, max_N * sizeof(int8_t));
int32_t *z = (int32_t *)memalign(16, max_N * sizeof(int32_t));
for (int i = 0 ; i < max_N ; i++) {
x[i] = 1;
y[i] = 10;
}
// Check result == 0
for (int i = 1; i < max_N; i++) {
esp_err_t status = dsps_dp_s8_ansi(x, y, &z[1], i);
TEST_ASSERT_EQUAL(status, ESP_OK);
check_value = i * 10;
ESP_LOGD(TAG, "check_value for i = %d: %d, z[1]: %d", (int)i, (int)check_value, (int)z[1]);
TEST_ASSERT_EQUAL(check_value, z[1]);
}
free(x);
free(y);
free(z);
}
TEST_CASE("dsps_dp_s8_ansi benchmark", "[dsps]")
{
int max_N = 1024;
int8_t *x = (int8_t *)memalign(16, max_N * sizeof(int8_t));
int8_t *y = (int8_t *)memalign(16, max_N * sizeof(int8_t));
int32_t *z = (int32_t *)memalign(16, max_N * sizeof(int32_t));
for (int i = 0 ; i < max_N ; i++) {
x[i] = 1;
y[i] = 10;
}
int check_N_max[] = {127, 128, 255, 256, 511, 512, 1023, 1024};
for (int i = 0 ; i < sizeof(check_N_max) / sizeof(check_N_max[0]) ; i++) {
int N = check_N_max[i];
unsigned int start_b = dsp_get_cpu_cycle_count();
dsps_dp_s8_ansi(x, y, &z[1], N);
unsigned int end_b = dsp_get_cpu_cycle_count();
float cycles = end_b - start_b;
ESP_LOGI(TAG, "dsps_dp_s8_ansi - %f cycles for %d samples", cycles, N);
}
free(x);
free(y);
free(z);
}