This commit is contained in:
2026-05-06 20:17:26 +07:00
parent 801dac9328
commit d6a8619e70
589 changed files with 3132 additions and 2405 deletions
View File
View File
@@ -0,0 +1,18 @@
/*
* SPDX-FileCopyrightText: 2026 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "dspm_mult.h"
#include "dsps_dotprod.h"
esp_err_t dspm_mult_mxn_1xm_int8_ansi(const int8_t *A, const int8_t *B, int32_t *C, int M, int N)
{
for (int i = 0 ; i < M ; i++) {
int32_t acc = 0;
dsps_dp_s8(A + i * N, B, &acc, N);
C[i] = acc;
}
return ESP_OK;
}
+19
View File
@@ -63,6 +63,23 @@ esp_err_t dspm_mult_f32_arp4(const float *A, const float *B, float *C, int m, in
*/
esp_err_t dspm_mult_3x3x1_f32_ae32(const float *A, const float *B, float *C);
/**
* @brief Matrix multiplication A[MxN]xB[1xM] int8
*
* Matrix multiplication to vector: C[1][M] = A[M][N] * B[1][N]
* The implementation is optimized for ESP32 chip.
*
* @param[in] A input matrix A[M][N]
* @param[in] B input matrix/vector B[1][N]
* @param C result matrix/vector C[1][M]
* @param[in] M matrix rows dimension
* @param[in] N matrix columns dimension (vector length)
* @return
* - ESP_OK on success
* - One of the error codes from DSP library
*/
esp_err_t dspm_mult_mxn_1xm_int8_ansi(const int8_t *A, const int8_t *B, int32_t *C, int M, int N);
/**
* @brief Matrix multiplication A[3x3]xB[3x3]
*
@@ -134,6 +151,7 @@ esp_err_t dspm_mult_s16_aes3(const int16_t *A, const int16_t *B, int16_t *C, int
esp_err_t dspm_mult_s16_arp4(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift);
/**@}*/
/**@{*/
/**
* @brief Matrix subset multiplication
@@ -229,5 +247,6 @@ esp_err_t dspm_mult_ex_f32_arp4(const float *A, const float *B, float *C, int m,
#define dspm_mult_4x4x1_f32(A,B,C) dspm_mult_f32(A,B,C, 4, 4, 1)
#endif // CONFIG_DSP_OPTIMIZED
#define dspm_mult_mxn_1xm_int8 dspm_mult_mxn_1xm_int8_ansi
#endif // _dspm_mult_H_
@@ -30,7 +30,7 @@
#define dspm_mult_s16_aes3_enabled 1
#endif
#if CONFIG_IDF_TARGET_ESP32P4
#if CONFIG_IDF_TARGET_ESP32P4 || CONFIG_IDF_TARGET_ESP32S31
#ifdef CONFIG_DSP_OPTIMIZED
#define dspm_mult_f32_arp4_enabled 1
#define dspm_mult_s16_arp4_enabled 1
@@ -0,0 +1,90 @@
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "unity.h"
#include "esp_dsp.h"
#include "dsp_platform.h"
#include "esp_log.h"
#include <malloc.h>
#include "dspm_mult.h"
#include "esp_attr.h"
#include "dsp_tests.h"
static const char *TAG = "dspm_mult_mxn_1xm_int8_ansi";
// Test dspm_mult_mxn_1xm_int8_ansi functionality
TEST_CASE("dspm_mult_mxn_1xm_int8_ansi functionality", "[dspm]")
{
int m = 32;
int n = 32;
int8_t *A = (int8_t *)memalign(16, m * n * sizeof(int8_t));
int8_t *B = (int8_t *)memalign(16, n * sizeof(int8_t));
int32_t *C = (int32_t *)memalign(16, m * sizeof(int32_t));
for (int i = 0 ; i < m; i++) {
for (int j = 0 ; j < n; j++) {
A[i * n + j] = i;
}
}
for (int i = 0 ; i < n; i++) {
B[i] = 1;
}
dspm_mult_mxn_1xm_int8_ansi(A, B, C, m, n);
for (int i = 0 ; i < m ; i++) {
ESP_LOGD(TAG, "C[%i] calc=%i, expected =%i", i, (int)C[i], i * n);
TEST_ASSERT_EQUAL(i * n, C[i]);
}
free(A);
free(B);
free(C);
}
TEST_CASE("dspm_mult_mxn_1xm_int8_ansi benchmark", "[dspm]")
{
int m = 128;
int n = 128;
int8_t *A = (int8_t *)memalign(16, m * n * sizeof(int8_t));
int8_t *B = (int8_t *)memalign(16, n * sizeof(int8_t));
int32_t *C = (int32_t *)memalign(16, m * sizeof(int32_t));
for (int i = 0 ; i < m; i++) {
for (int j = 0 ; j < n; j++) {
A[i * n + j] = i;
}
}
for (int i = 0 ; i < n; i++) {
B[i] = 1;
}
int check_m_max[] = {16, 32, 64, 128};
for (int i = 0 ; i < sizeof(check_m_max) / sizeof(check_m_max[0]) ; i++) {
int m = check_m_max[i];
int n = check_m_max[i];
unsigned int start_b = dsp_get_cpu_cycle_count();
int repeat_count = 128;
for (int i = 0 ; i < repeat_count ; i++) {
dspm_mult_mxn_1xm_int8_ansi(A, B, C, m, n);
}
unsigned int end_b = dsp_get_cpu_cycle_count();
float cycles = end_b - start_b;
ESP_LOGI(TAG, "dspm_mult_mxn_1xm_int8_ansi - %f cycles per row for %d columns", cycles / repeat_count / m, n);
}
free(A);
free(B);
free(C);
}