hz
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
.macro fir_s16_ae32_mul x1, x2, count, ID
|
||||
// This macro calculates fixed point dot product for ((count + 1)*4) int16 samples
|
||||
// x1 - input array1 register (samples)
|
||||
// x2 - input array2 register (coefficients) the array is inverted and is being decremented
|
||||
// count - counter register (for example a7)
|
||||
// count - (samples_count / 4) - 1
|
||||
// acc += x1[i + 0]*x2[N - i - 1] + x1[i + 1]*x2[N - i - 2] + x1[i + 2]*x2[N - i - 3] + x1[i + 3]*x2[N - i - 4]; i: 0..count
|
||||
// acchi, and acclo have to be initialized before
|
||||
// Result - acchi || acclo
|
||||
// Modifies:
|
||||
// m0, m1, m2, m3
|
||||
// acchi || acclo - must be loaded before (for example 0x3fff to acclo).
|
||||
|
||||
/*
|
||||
* Data schedule. Each line represents instruction and columns represent
|
||||
* register contents. Last column (MUL) shows the multiplication which
|
||||
* takes place. Values loaded in the given cycle are shown in square brackets.
|
||||
*
|
||||
* m0 m1 m2 m3 MUL
|
||||
* ----------------- pre-load --------------------------
|
||||
*[x0 x1] (no MULs in the first 3 instructions)
|
||||
* x0 x1 [y(N-1) y(N-2)]
|
||||
* x0 x1 [x2 x3] y(N-1) y(N-2)
|
||||
* x0 x1 x2 x3 y(N-1) y(N-2) [y(N-3) y(N-4)] x0*y(N-1)
|
||||
* -------------------- loop ------------------------ (the following 4 instructions are
|
||||
*[x4 x5] x2 x3 y(N-1) y(N-2) y(N-3) y(N-4) x1*y(N-2) repeated as much as needed)
|
||||
* x4 x5 x2 x3 [y(N-5) y(M-6)] y(N-3) y(N-4) x2*y(N-3)
|
||||
* x4 x5 [x6 x7] y(N-5) y(M-6) y(N-3) y(N-4) x3*y(N-4)
|
||||
* x4 x5 x6 x7 y(N-5) y(M-6) [y(N-7) y(M-8)] x4*y(N-5)
|
||||
* ------------------- finalize ----------------------
|
||||
* x4 x5 x6 x7 y(N-5) y(M-6) y(N-7) y(M-8) x5*y(N-6) (nothing is load)
|
||||
* x4 x5 x6 x7 y(N-5) y(M-6) y(N-7) y(M-8) x6*y(N-7)
|
||||
* x4 x5 x6 x7 y(N-5) y(M-6) y(N-7) y(M-8) x7*y(N-8)
|
||||
*/
|
||||
|
||||
ldinc m0, \x1
|
||||
lddec m2, \x2
|
||||
ldinc m1, \x1
|
||||
|
||||
mula.dd.lh.lddec m3, \x2, m0, m2
|
||||
loopnez \count, .loop_end_\ID
|
||||
.loop_\ID:
|
||||
mula.dd.hl.ldinc m0, \x1, m0, m2
|
||||
mula.dd.lh.lddec m2, \x2, m1, m3
|
||||
mula.dd.hl.ldinc m1, \x1, m1, m3
|
||||
mula.dd.lh.lddec m3, \x2, m0, m2
|
||||
.loop_end_\ID:
|
||||
|
||||
mula.dd.hl m0, m2
|
||||
mula.dd.lh m1, m3
|
||||
mula.dd.hl m1, m3
|
||||
|
||||
.endm // fir_s16_ae32_mul
|
||||
|
||||
.macro fir_s16_ae32_full x1, x2, count, full_count, ID
|
||||
// This macro calculates fixed point dot product for ((count + 1)*4) int16 samples
|
||||
// x1 - input array1 register (for example a2)
|
||||
// x2 - input array2 register (for example a3)
|
||||
// count - counter register (for example a7)
|
||||
// count - samples_count / 4 - 1
|
||||
// full_count - samples_count
|
||||
// acc += x1[i + 0]*x2[N - i - 1] + x1[i + 1]*x2[N - i - 2] + x1[i + 2]*x2[N - i - 3] + x1[i + 3]*x2[N - i - 4]; i: 0..count
|
||||
// acchi, and acclo have to be initialized before
|
||||
// Result - acchi || acclo
|
||||
// Modifies:
|
||||
// m0, m1, m2, m3
|
||||
// acchi || acclo - must be loaded before (for example 0x3fff to acclo).
|
||||
|
||||
// the main mac16 multiplication loop is skipped for cases with less than 4 samples
|
||||
blti \full_count, 4, .less_than_4_operands_\ID
|
||||
fir_s16_ae32_mul \x1, \x2, \count, \ID
|
||||
|
||||
.less_than_4_operands_\ID:
|
||||
|
||||
bbci \full_count, 1, .mod2chk_\ID
|
||||
ldinc m0, \x1
|
||||
lddec m2, \x2
|
||||
mula.dd.hl m0, m2
|
||||
mula.dd.lh m0, m2
|
||||
.mod2chk_\ID:
|
||||
|
||||
bbci \full_count, 0, .mod1chk_\ID
|
||||
ldinc m0, \x1
|
||||
lddec m2, \x2
|
||||
mula.dd.lh m0, m2
|
||||
.mod1chk_\ID:
|
||||
|
||||
.endm // fir_s16_ae32_full
|
||||
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "malloc.h"
|
||||
#include <string.h>
|
||||
#include "dsp_tests.h"
|
||||
|
||||
#define ROUNDING_VALUE 0x7fff
|
||||
|
||||
esp_err_t dsps_fird_init_s16(fir_s16_t *fir, int16_t *coeffs, int16_t *delay, int16_t coeffs_len, int16_t decim, int16_t start_pos, int16_t shift)
|
||||
{
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->coeffs_len = coeffs_len;
|
||||
fir->pos = 0;
|
||||
fir->decim = decim;
|
||||
fir->d_pos = start_pos;
|
||||
fir->shift = shift;
|
||||
fir->rounding_val = (int16_t)(ROUNDING_VALUE);
|
||||
fir->free_status = 0;
|
||||
|
||||
if (fir->coeffs_len < 2) { // number of coefficients must be higher than 1
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
|
||||
if ((fir->shift > 40) || (fir->shift < -40)) { // shift amount must be within a range from -40 to 40
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
if (fir->d_pos >= fir->decim) { // start position must be lower than decimation
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
// Rounding value buffer primary for a purpose of ee.ld.accx.ip, but used for both the esp32 and esp32s3
|
||||
// dsps_fird_s16_aexx_free() must be called to free the memory after the FIR function is finished
|
||||
int32_t *aexx_rounding_buff = (int32_t *)memalign(16, 2 * sizeof(int32_t));
|
||||
|
||||
long long rounding = (long long)(fir->rounding_val);
|
||||
|
||||
if (fir->shift >= 0) {
|
||||
rounding = (rounding >> fir->shift);
|
||||
} else {
|
||||
rounding = (rounding << (-fir->shift));
|
||||
}
|
||||
#if dsps_fird_s16_arp4_enabled
|
||||
fir->pos = start_pos;
|
||||
|
||||
int16_t *new_delay_buff = (int16_t *)memalign(16, (coeffs_len + 8 * 2) * sizeof(int16_t));
|
||||
for (int i = 0 ; i < (coeffs_len + 8 * 2) ; i++) {
|
||||
new_delay_buff[i] = 0;
|
||||
}
|
||||
fir->delay = &new_delay_buff[8];
|
||||
fir->free_status |= 0x0001;
|
||||
|
||||
#endif // dsps_fird_s16_arp4_enabled
|
||||
|
||||
|
||||
aexx_rounding_buff[0] = (int32_t)(rounding); // 32 lower bits (acclo) type reassignment to 32-bit
|
||||
aexx_rounding_buff[1] = (int32_t)((rounding >> 32) & 0xFF); // 8 higher bits (acchi) shift by 32 and apply the mask
|
||||
fir->rounding_buff = aexx_rounding_buff;
|
||||
fir->free_status |= 0x0004;
|
||||
|
||||
#if dsps_fird_s16_aes3_enabled
|
||||
|
||||
if (fir->delay == NULL) { // New delay buffer is allocated if the current delay line is NULL
|
||||
int16_t *new_delay_buff = (int16_t *)memalign(16, coeffs_len * sizeof(int16_t));
|
||||
fir->delay = new_delay_buff;
|
||||
fir->free_status |= 0x0001;
|
||||
} else {
|
||||
if ((int)fir->delay & 0xf) { // Delay line array must be aligned
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
}
|
||||
|
||||
if ((int)fir->coeffs & 0xf) { // Coefficients array must be aligned
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
|
||||
// If the number of coefficients is not divisible by 8, a new delay line a new coefficients arrays are allocated
|
||||
// the newly allocated arrays are divisible by 8. Coefficients are copied from the original fir structure to
|
||||
// the new coeffs array and the remaining space is filled with zeroes
|
||||
// dsps_fird_s16_free_coeffs_delay must be called to free the memory after the FIR function is finished
|
||||
if (fir->coeffs_len % 8) { // Number of coefficients must be divisible by 8
|
||||
int16_t zero_coeffs = (8 - (fir->coeffs_len % 8));
|
||||
int16_t new_coeffs_len = fir->coeffs_len + zero_coeffs;
|
||||
int16_t *aes3_delay_buff = (int16_t *)memalign(16, new_coeffs_len * sizeof(int16_t));
|
||||
int16_t *aes3_coeffs_buff = (int16_t *)memalign(16, new_coeffs_len * sizeof(int16_t));
|
||||
|
||||
for (int i = 0; i < fir->coeffs_len; i++) { // copy fir->coeffs to aes3_coeffs_buff
|
||||
aes3_coeffs_buff[i] = fir->coeffs[i];
|
||||
}
|
||||
|
||||
for (int i = fir->coeffs_len; i < new_coeffs_len; i++) { // add zeroes to the end
|
||||
aes3_coeffs_buff[i] = 0;
|
||||
}
|
||||
|
||||
fir->delay = aes3_delay_buff;
|
||||
fir->coeffs = aes3_coeffs_buff;
|
||||
fir->coeffs_len = new_coeffs_len;
|
||||
fir->free_status |= 0x0002;
|
||||
}
|
||||
|
||||
#endif // dsps_fird_s16_aes3_enabled
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
for (int i = 0; i < fir->coeffs_len; i++) { // Initialize the delay line to zero
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dsps_fird_s16_aexx_free(fir_s16_t *fir)
|
||||
{
|
||||
|
||||
if (fir->free_status == 0) {
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
if (fir->free_status & 0x0003) {
|
||||
|
||||
if (fir->free_status & 0x0002) {
|
||||
free(fir->coeffs);
|
||||
}
|
||||
#if dsps_fird_s16_arp4_enabled
|
||||
fir->delay = &fir->delay[-8];
|
||||
#endif
|
||||
free(fir->delay);
|
||||
}
|
||||
|
||||
if (fir->free_status & 0x0004) {
|
||||
free(fir->rounding_buff);
|
||||
}
|
||||
fir->free_status = 0;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
|
||||
esp_err_t dsps_16_array_rev(int16_t *arr, int16_t len)
|
||||
{
|
||||
|
||||
int16_t temp;
|
||||
|
||||
for (int i = 0; i < (int)(len / 2); i++) {
|
||||
temp = arr[i];
|
||||
arr[i] = arr[len - 1 - i];
|
||||
arr[len - 1 - i] = temp;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,181 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_s16_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_fir_s16_m_ae32.S"
|
||||
|
||||
// This is FIR filter for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_s16_ae32
|
||||
.type dsps_fird_s16_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//int32_t dsps_fird_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len)
|
||||
|
||||
|
||||
dsps_fird_s16_ae32:
|
||||
// Input params Variables
|
||||
//
|
||||
// fir - a2 N - a6
|
||||
// input - a3 pos - a7
|
||||
// output - a4 rounding_lo - a8
|
||||
// len - a5 d_pos - a9
|
||||
// &coeffs[N] - a10
|
||||
// delay - a11
|
||||
// decim - a12
|
||||
// rounding_hi - a13
|
||||
// final_shift - a14 (shift)
|
||||
|
||||
entry a1, 32
|
||||
|
||||
l16si a7, a2, 10 // a7 - pos
|
||||
l16si a6, a2, 8 // a6 - N
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
addx2 a10, a6, a10 // a10 - coeffs[N+1]
|
||||
addi a10, a10, -4 // a10 - coeffs[N]
|
||||
s32i a10, a1, 0 // save pointer to a1
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
l16si a12, a2, 12 // a12 - decimation
|
||||
l16si a9, a2, 14 // a9 - d_pos
|
||||
l16si a14, a2, 16 // a14 - shift
|
||||
|
||||
// prepare rounding value
|
||||
l32i a15, a2, 20 // get address of rounding array to a15
|
||||
l32i a8, a15, 0 // a8 = lower 32 bits of the rounding value (acclo)
|
||||
l32i a13, a15, 4 // a13 = higher 8 bits of the rounding value (acchi), offset 4 (32 bits)
|
||||
|
||||
// prepare final_shift value
|
||||
addi a14, a14, -15 // shift - 15
|
||||
abs a15, a14
|
||||
blti a15, 32, _shift_lower_than_32_init // check if lower than 32
|
||||
|
||||
// greater than 32 could only be negative shift ((-40 to +40) - 15) -> -55 to +25
|
||||
addi a14, a14, 32 // if greater than 32, add 32 (SRC is not defined for SAR greater than 32)
|
||||
_shift_lower_than_32_init:
|
||||
|
||||
bltz a14, _shift_negative_init // branch if lower than zero (not including zero)
|
||||
beqz a14, _shift_negative_init // branch if equal to zero (add zero to the previous statement)
|
||||
ssl a14 // if positive, set SAR register to left shift value (SAR = 32 - shift)
|
||||
|
||||
j _end_of_shift_init
|
||||
|
||||
_shift_negative_init: // negative shift
|
||||
abs a14, a14 // absolute value
|
||||
ssr a14 // SAR = -shift
|
||||
// final_shift is saved to SAR register, SAR is not being changed during the execution
|
||||
|
||||
_end_of_shift_init:
|
||||
l16si a14, a2, 16 // a14 - load shift value
|
||||
addi a14, a14, -15 // shift - 15
|
||||
|
||||
s32i a5, a1, 4 // save len to a1, used as the return value
|
||||
|
||||
|
||||
// first delay line load (decim - d_pos) when d_pos is not 0
|
||||
beqz a9, _fird_loop_len
|
||||
sub a15, a12, a9 // a15 = decim - d_pos
|
||||
|
||||
loopnez a15, ._loop_d_pos
|
||||
|
||||
blt a7, a6, reset_fir_pos_d_pos // branch if fir->pos >= fir->N
|
||||
movi.n a7, 0 // fir->pos = 0
|
||||
l32i a11, a2, 4 // reset delay line to the beginning
|
||||
reset_fir_pos_d_pos:
|
||||
|
||||
l16si a15, a3, 0 // load 16 bits from input (a3) to a15
|
||||
addi a7, a7, 1 // fir->pos++
|
||||
s16i a15, a11, 0 // save 16 bits from a15 to delay line (a11)
|
||||
addi a3, a3, 2 // increment input pointer
|
||||
addi a11, a11, 2 // increment delay line pointer
|
||||
._loop_d_pos:
|
||||
|
||||
j .fill_delay_line // skip the first iteration of the delay line filling routine
|
||||
|
||||
// outer loop
|
||||
_fird_loop_len:
|
||||
|
||||
loopnez a12, .fill_delay_line
|
||||
|
||||
blt a7, a6, reset_fir_pos // branch if fir->pos >= fir->N
|
||||
movi.n a7, 0 // fir->pos = 0
|
||||
l32i a11, a2, 4 // reset delay line to the beginning
|
||||
reset_fir_pos:
|
||||
|
||||
l16si a15, a3, 0 // load 16 bits from input (a3) to a15
|
||||
addi a7, a7, 1 // fir->pos++
|
||||
s16i a15, a11, 0 // save 16 bits from a15 to delay line (a11)
|
||||
addi a3, a3, 2 // increment input pointer
|
||||
addi a11, a11, 2 // increment delay line pointer
|
||||
.fill_delay_line:
|
||||
|
||||
// prepare MAC unit
|
||||
wsr a8, acclo // acclo = a8
|
||||
wsr a13, acchi // acchi = a13
|
||||
|
||||
addi a11, a11, -4 // preset delay line pointer, samples (array is being incremented)
|
||||
sub a9, a6, a7 // a9 = full_count = fir->N - fir->pos
|
||||
|
||||
// (Count / 4) - 1
|
||||
srli a15, a9, 2 // a15 = count = full_count /4
|
||||
addi a10, a10, 4 // preset coeffs pointer, samples (array is being decremented)
|
||||
addi a15, a15, -1 // count - 1
|
||||
|
||||
// x1, x2, count, full_count, ID
|
||||
fir_s16_ae32_full a11, a10, a15, a9, __LINE__
|
||||
|
||||
l32i a10, a2, 0 // load coeffs
|
||||
l32i a11, a2, 4 // reset delay line to the beginning
|
||||
addx2 a10, a7, a10 // move coeffs pointer to the end
|
||||
|
||||
srli a15, a7, 2 // a15 = count = full_count (fir->pos) / 4
|
||||
addi a11, a11, -4 // preset delay line pointer, samples (array is being incremented)
|
||||
addi a15, a15, -1 // count - 1
|
||||
|
||||
// x1, x2, count, full_count, ID
|
||||
fir_s16_ae32_full a11, a10, a15, a7, __LINE__
|
||||
|
||||
// SAR already set from the beginning to final_shift value
|
||||
abs a15, a14 // absolute value of shift
|
||||
l32i a10, a1, 0 // reset coefficient pointer
|
||||
blti a15, 32, _shift_lower_than_32
|
||||
rsr a9, acchi // get only higher 8 bits of the acc register
|
||||
movi.n a15, 0xFF // higher 8 bits mask
|
||||
and a9, a9, a15 // apply mask
|
||||
srl a15, a9
|
||||
j _shift_set
|
||||
|
||||
_shift_lower_than_32:
|
||||
rsr a9, acchi // get higher 8 bits of the acc register
|
||||
movi.n a11, 0xFF // higher 8 bits mask
|
||||
rsr a15, acclo // get lower 32 bits of the acc register
|
||||
and a9, a9, a11 // apply mask
|
||||
|
||||
|
||||
bltz a14, _shift_negative // branch if lower than zero (if negative)
|
||||
beqz a14, _shift_negative
|
||||
src a15, a15, a9 // funnel shift left
|
||||
j _shift_set
|
||||
|
||||
_shift_negative: // negative shift
|
||||
src a15, a9, a15 // funnel shift right
|
||||
|
||||
_shift_set:
|
||||
|
||||
l32i a11, a2, 4 // Load initial position of the delay line
|
||||
s16i a15, a4, 0 // save the shifted value to the output array (a4)
|
||||
addi a5, a5, -1 // len--
|
||||
addi a4, a4, 2 // increase pointer of the output array
|
||||
addx2 a11, a7, a11 // p_delay[fir->pos] - (two times the fir->pos)
|
||||
|
||||
// counter
|
||||
bnez a5, _fird_loop_len // break if a5 == 0
|
||||
|
||||
l32i.n a2, a1, 4 // load return value to a2
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fird_s16_ae32_enabled
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
int32_t dsps_fird_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len)
|
||||
{
|
||||
int32_t result = 0;
|
||||
int32_t input_pos = 0;
|
||||
long long rounding = 0;
|
||||
const int32_t final_shift = fir->shift - 15;
|
||||
|
||||
rounding = (long long)(fir->rounding_val);
|
||||
|
||||
if (fir->shift >= 0) {
|
||||
rounding = (rounding >> fir->shift) & 0xFFFFFFFFFF; // 40-bit mask
|
||||
} else {
|
||||
rounding = (rounding << (-fir->shift)) & 0xFFFFFFFFFF; // 40-bit mask
|
||||
}
|
||||
|
||||
// len is already a length of the *output array, calculated as (length of the input array / decimation)
|
||||
for (int i = 0; i < len; i++) {
|
||||
|
||||
for (int j = 0; j < fir->decim - fir->d_pos; j++) {
|
||||
|
||||
if (fir->pos >= fir->coeffs_len) {
|
||||
fir->pos = 0;
|
||||
}
|
||||
fir->delay[fir->pos++] = input[input_pos++];
|
||||
}
|
||||
fir->d_pos = 0;
|
||||
|
||||
long long acc = rounding;
|
||||
int16_t coeff_pos = fir->coeffs_len - 1;
|
||||
|
||||
for (int n = fir->pos; n < fir->coeffs_len ; n++) {
|
||||
acc += (int32_t)fir->coeffs[coeff_pos--] * (int32_t)fir->delay[n];
|
||||
}
|
||||
for (int n = 0; n < fir->pos ; n++) {
|
||||
acc += (int32_t)fir->coeffs[coeff_pos--] * (int32_t)fir->delay[n];
|
||||
}
|
||||
|
||||
if (final_shift > 0) {
|
||||
output[result++] = (int16_t)(acc << final_shift);
|
||||
} else {
|
||||
output[result++] = (int16_t)(acc >> (-final_shift));
|
||||
}
|
||||
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_s16_arp4_enabled == 1)
|
||||
|
||||
// This is FIR filter for esp32p4 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_s16_arp4
|
||||
.global dsps_fird_s16_ansi
|
||||
.type dsps_fird_s16_arp4,@function
|
||||
// The function implements the following C code:
|
||||
// int32_t dsps_fird_s16_arp4(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
|
||||
dsps_fird_s16_arp4:
|
||||
// In case of filter length different then 8*K
|
||||
lh t2, 8(a0) // t2 - coeffs_len
|
||||
andi t2, t2, 7
|
||||
beqz t2, .dsps_fird_s16_arp4_body
|
||||
j dsps_fird_s16_ansi
|
||||
|
||||
.dsps_fird_s16_arp4_body:
|
||||
add sp,sp,-48
|
||||
sw s0, 0(sp)
|
||||
sw s1, 4(sp)
|
||||
sw s2, 8(sp)
|
||||
sw s3, 12(sp)
|
||||
sw s4, 16(sp)
|
||||
sw s5, 20(sp)
|
||||
sw s6, 24(sp)
|
||||
sw s7, 28(sp)
|
||||
sw s8, 32(sp)
|
||||
sw s9, 36(sp)
|
||||
sw s10, 40(sp)
|
||||
sw s11, 44(sp)
|
||||
|
||||
// Enable analigned data access
|
||||
esp.movx.r.cfg t6
|
||||
or t6, t6, 2
|
||||
esp.movx.w.cfg t6
|
||||
|
||||
lw t1, 4(a0) // t1 - delay_line
|
||||
lh t2, 8(a0) // t2 - coeffs_len
|
||||
lh t3, 10(a0) // t3 - pos
|
||||
lh t6, 16(a0) // t6 - shift
|
||||
add t6, t6, -15
|
||||
neg t6, t6
|
||||
lw t5, 20(a0) // t5 - rounding_buff
|
||||
lw s2, 4(a0) // s2 - delay_line* current position
|
||||
add s2, s2, t3 // s2 = delay_line + pos*2
|
||||
add s2, s2, t3 //
|
||||
add s4, t2, t2 // s4 = coeff_len*2
|
||||
add s0, t1, s4 // s0 - &delay[coeffs_len]
|
||||
|
||||
lh a4, 0(t1)
|
||||
.loop_len:
|
||||
lh t4, 12(a0) // t4 - decim
|
||||
.loop_decim_copy:
|
||||
lh s1, 0(a1) // load input data
|
||||
add a1, a1, 2
|
||||
|
||||
sh s1, 0(s2)
|
||||
add s2, s2, 2 // preincrement of delay line
|
||||
bgt s0, s2, .skeep_reset
|
||||
lw s2, 4(a0) // s2 - delay_line
|
||||
.skeep_reset:
|
||||
add t4, t4, -1
|
||||
bgtz t4, .loop_decim_copy
|
||||
|
||||
// s5 - count1 = length - pos
|
||||
// s6 = count1 >> 3 :
|
||||
sub t3, s2, t1
|
||||
srli t3, t3, 1 // t3 = (pos*2)>>1
|
||||
sub s5, t2, t3
|
||||
srli s6, s5, 3 // s6 = (coeff_len - pos)>>3
|
||||
|
||||
srli s7, t3, 3 // s7 = pos>>3
|
||||
and s8, t3, 0x07 // s8 = pos&0x07
|
||||
|
||||
esp.ld.xacc.ip t5, 0 // load rounding value to accx
|
||||
|
||||
lw s10, 0(a0) // s10 - coeffs
|
||||
esp.vld.128.ip q0, s10, 16 //q0 - coeffs
|
||||
mv s9, s2 // s9 - pointer to delay line
|
||||
esp.vld.128.ip q1, s9, 16 // q1 - delay line data
|
||||
|
||||
beqz s6, .skip_main_loop1
|
||||
esp.lp.setup 0, s6, .main_loop1
|
||||
esp.vmulas.s16.xacc.ld.ip q0, s10, 16, q0, q1 // q0 - coeffs, q1 - data
|
||||
.main_loop1: esp.vld.128.ip q1, s9, 16 // Load delay line
|
||||
.skip_main_loop1: nop
|
||||
|
||||
|
||||
add s9, s9, -16
|
||||
sub s9, s9, s4
|
||||
beqz s8, .skip_rest_add
|
||||
esp.vld.128.ip q2, s9, 16
|
||||
esp.vadd.s16 q1, q2, q1
|
||||
esp.vmulas.s16.xacc.ld.ip q0, s10, 16, q0, q1 // q0 - coeffs, q1 - data
|
||||
.skip_rest_add:
|
||||
esp.vld.128.ip q1, s9, 16
|
||||
|
||||
beqz s7, .skip_main_loop3
|
||||
esp.lp.setup 1, s7, .main_loop3
|
||||
esp.vmulas.s16.xacc.ld.ip q0, s10, 16, q0, q1 // q0 - coeffs, q1 - data
|
||||
esp.vld.128.ip q1, s9, 16
|
||||
.main_loop3: nop
|
||||
.skip_main_loop3: nop
|
||||
|
||||
// Shift and Store result
|
||||
esp.srs.s.xacc s11, t6 // shift accx register by final_shift amount (a6), save the lower 32bits to a15
|
||||
sh s11, 0(a2) // store result to output buffer
|
||||
add a2, a2, 2
|
||||
|
||||
add a3, a3, -1
|
||||
bgtz a3, .loop_len
|
||||
sh t3, 10(a0)
|
||||
|
||||
.fast_exit:
|
||||
mv a0, a6
|
||||
|
||||
lw s0, 0(sp)
|
||||
lw s1, 4(sp)
|
||||
lw s2, 8(sp)
|
||||
lw s3, 12(sp)
|
||||
lw s4, 16(sp)
|
||||
lw s5, 20(sp)
|
||||
lw s6, 24(sp)
|
||||
lw s7, 28(sp)
|
||||
lw s8, 32(sp)
|
||||
lw s9, 36(sp)
|
||||
lw s10, 40(sp)
|
||||
lw s11, 44(sp)
|
||||
|
||||
add sp,sp,48
|
||||
ret
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "malloc.h"
|
||||
#include <string.h>
|
||||
#include "dsp_tests.h"
|
||||
#include "dsp_common.h"
|
||||
|
||||
#define ROUNDING_VALUE 0x7fff
|
||||
|
||||
esp_err_t dsps_firmr_init_s16(fir_s16_t *fir, int16_t *coeffs, int16_t *delay, int16_t coeffs_len, int16_t interp, int16_t decim, int16_t start_pos, int16_t shift)
|
||||
{
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
|
||||
|
||||
fir->coeffs_len = coeffs_len;
|
||||
fir->pos = 0;
|
||||
fir->decim = decim;
|
||||
fir->d_pos = start_pos;
|
||||
fir->shift = shift;
|
||||
fir->rounding_val = (int16_t)(ROUNDING_VALUE);
|
||||
fir->free_status = 0;
|
||||
|
||||
if (delay == NULL) {
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
fir->delay = (int16_t *)memalign(16, (fir->delay_size + 4) * sizeof(int16_t));
|
||||
#else
|
||||
fir->delay = (int16_t *)malloc((fir->delay_size + 4) * sizeof(int16_t));
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
fir->free_status = 1;
|
||||
} else {
|
||||
fir->free_status = 0;
|
||||
}
|
||||
|
||||
|
||||
fir->interp = interp;
|
||||
fir->interp_pos = 0;
|
||||
fir->start_pos = start_pos;
|
||||
fir->delay_size = coeffs_len / interp;
|
||||
|
||||
|
||||
if (fir->coeffs_len < 2) { // number of coefficients must be higher than 1
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
|
||||
if ((fir->shift > 40) || (fir->shift < -40)) { // shift amount must be within a range from -40 to 40
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
if (fir->d_pos >= fir->decim) { // start position must be lower than decimation
|
||||
return ESP_ERR_DSP_PARAM_OUTOFRANGE;
|
||||
}
|
||||
|
||||
for (int i = 0; i < fir->delay_size; i++) { // Initialize the delay line to zero
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_common.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
int32_t dsps_firmr_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t input_len)
|
||||
{
|
||||
int32_t result = 0;
|
||||
long long rounding = 0;
|
||||
const int32_t final_shift = fir->shift - 15;
|
||||
rounding = (long long)(fir->rounding_val);
|
||||
|
||||
if (fir->shift >= 0) {
|
||||
rounding = (rounding >> fir->shift) & 0xFFFFFFFFFF; // 40-bit mask
|
||||
} else {
|
||||
rounding = (rounding << (-fir->shift)) & 0xFFFFFFFFFF; // 40-bit mask
|
||||
}
|
||||
|
||||
int32_t m = fir->start_pos;
|
||||
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
fir->delay[fir->pos] = input[i];
|
||||
|
||||
for (m = fir->start_pos; m < fir->interp; m += fir->decim) {
|
||||
long long acc = rounding;
|
||||
int coeff_pos = 0;
|
||||
for (int n = fir->pos; n < fir->delay_size; n++) {
|
||||
acc += (int32_t)fir->delay[n] * (int32_t)fir->coeffs[coeff_pos++ * fir->interp + m];
|
||||
}
|
||||
for (int n = 0; n < fir->pos; n++) {
|
||||
acc += (int32_t)fir->delay[n] * (int32_t)fir->coeffs[coeff_pos++ * fir->interp + m];
|
||||
}
|
||||
|
||||
if (final_shift > 0) {
|
||||
output[result++] = (int16_t)(acc << final_shift);
|
||||
} else {
|
||||
output[result++] = (int16_t)(acc >> (-final_shift));
|
||||
}
|
||||
}
|
||||
fir->start_pos = m - fir->interp;
|
||||
|
||||
fir->pos--;
|
||||
if (fir->pos < 0) {
|
||||
fir->pos = fir->delay_size - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fir_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_f32_m_ae32.S"
|
||||
|
||||
// This is FIR filter for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fir_f32_ae32
|
||||
.type dsps_fir_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fir_f32_ae32(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fir_f32_ae32:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
movi a10, 4
|
||||
mull a13, a7, a10// a13 - a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
mull a6, a6, a10// a6 = a6*4
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
|
||||
movi.n a9, 0
|
||||
movi.n a8, 4
|
||||
movi.n a12, 4
|
||||
|
||||
// a13 - delay index
|
||||
fir_loop_len:
|
||||
// Store to delay line
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
lsi f0, a3, 0 // f0 = x[i]
|
||||
addi a3, a3, 4 // x++
|
||||
ssx f0, a11, a13 // delay[a13] = f0;
|
||||
addi a13, a13, 4 // a13++
|
||||
addi a7, a7, 1 // a7++
|
||||
// verify deley line
|
||||
blt a7, a6, do_not_reset_a13
|
||||
movi a13, 0
|
||||
movi a7, 0
|
||||
do_not_reset_a13:
|
||||
// Calc amount for delay line before end
|
||||
mov a15, a10 // a15 - coeffs
|
||||
wfr f2, a9 // f2 = 0;
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
|
||||
// a11 = &delay[pos]
|
||||
add a11, a11, a13
|
||||
|
||||
loopnez a14, first_fir_loop // pos...N-1
|
||||
lsxp f1, a15, a8 // f1 = *(coeffs--)
|
||||
lsxp f0, a11, a12 // load delay f0 = *(delay++)
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
first_fir_loop:
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
loopnez a7, second_fir_loop // 0..pos
|
||||
lsxp f1, a15, a8 // f1 = *(coeffs--)
|
||||
lsxp f0, a11, a12 // load delay f0 = *(delay++)
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
second_fir_loop:
|
||||
|
||||
// and after end
|
||||
// Store result
|
||||
ssi f2, a4, 0
|
||||
addi a4, a4, 4 // y++ - increment output pointer
|
||||
// Check loop
|
||||
addi a5, a5, -1
|
||||
bnez a5, fir_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_ae32_enabled
|
||||
@@ -0,0 +1,233 @@
|
||||
// Copyright 2018-2023 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fir_f32_aes3_enabled == 1)
|
||||
|
||||
// This is FIR filter for Esp32s3 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fir_f32_aes3
|
||||
.type dsps_fir_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fir_f32_aes3(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fir_f32_aes3:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
// a2 - fir structure
|
||||
// a3 - input
|
||||
// a4 - output
|
||||
// a5 - length
|
||||
|
||||
// a6 - fir length
|
||||
// a7 - position in delay line
|
||||
// a8 - temp
|
||||
// a9 - const 0
|
||||
// a10 - coeffs ptr
|
||||
// a11 - delay line ptr
|
||||
// a12 - const
|
||||
// a13 -
|
||||
// a14 - temp for loops
|
||||
// a15 - delay line rounded to 16
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
|
||||
l32i a6, a2, 8 // a6 - N - amount of coefficients
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
addx4 a11, a7, a11 // a11 = a11 + a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
|
||||
movi.n a9, 0
|
||||
movi.n a12, 3
|
||||
|
||||
movi.n a12, -16
|
||||
movi.n a13, 15
|
||||
// Main loop for input samples
|
||||
.fir_loop_len:
|
||||
// Store to delay line
|
||||
lsip f15, a3, 4 // a3 += 4, f15 = input[n]
|
||||
ssip f15, a11, 4 // a11 += 4, *a11 = f15
|
||||
addi a7, a7, 1 // a7++ - position in delay line
|
||||
|
||||
//
|
||||
blt a7, a6, .do_not_reset_a11
|
||||
l32i a11, a2, 4 // Load delay line
|
||||
movi a7, 0
|
||||
.do_not_reset_a11:
|
||||
// Load rounded delay line address
|
||||
and a15, a11, a12
|
||||
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
|
||||
// Clear f4, f5 for multiplications
|
||||
const.s f4, 0
|
||||
const.s f5, 0
|
||||
const.s f6, 0
|
||||
const.s f7, 0
|
||||
|
||||
and a8, a11, a13 // a8 = a11 & 15
|
||||
beqz a8, .offset_0
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_1
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_2
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_3
|
||||
|
||||
// a10 - coeffs
|
||||
// a11 - delay line
|
||||
.offset_0:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
srli a14, a14, 2
|
||||
loopnez a14, .first_fir_loop_0 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.first_fir_loop_0:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_0 // 0..pos
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8
|
||||
madd.s f5, f1, f9
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.second_fir_loop_0:
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_1:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f9, f12, a15, 16 // Load data from delay line
|
||||
// f12 - delay[N-1], store for the last operation
|
||||
// f9..f11 - delay[0..2]
|
||||
loopnez a14, .first_fir_loop_1 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f9
|
||||
madd.s f5, f1, f10
|
||||
madd.s f6, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f7, f3, f8
|
||||
.first_fir_loop_1:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_1 // 0..pos
|
||||
madd.s f4, f3, f8
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f5, f0, f9
|
||||
madd.s f6, f1, f10
|
||||
madd.s f7, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_1:
|
||||
|
||||
madd.s f4, f3, f12
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_2:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f10, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13 - delay[N-1], delay[N-2], store for the last operation
|
||||
// f10..f11 - delay[0..1]
|
||||
loopnez a14, .first_fir_loop_2 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f10
|
||||
madd.s f5, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f6, f2, f8
|
||||
madd.s f7, f3, f9
|
||||
.first_fir_loop_2:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_2 // 0..pos
|
||||
madd.s f4, f2, f8
|
||||
madd.s f5, f3, f9
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f6, f0, f10
|
||||
madd.s f7, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_2:
|
||||
|
||||
madd.s f4, f2, f12
|
||||
madd.s f5, f3, f13
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_3:
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
EE.LDF.128.IP f11, f14, f13, f12, a15, 16 // Load data from delay line
|
||||
// f12, f13, f14 - delay[N-1], delay[N-2], delay[N-3], store for the last operation
|
||||
// f11 - delay[0]
|
||||
loopnez a14, .first_fir_loop_3 // pos...N-1
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f4, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f5, f1, f8
|
||||
madd.s f6, f2, f9
|
||||
madd.s f7, f3, f10
|
||||
.first_fir_loop_3:
|
||||
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_3 // 0..pos
|
||||
madd.s f4, f1, f8
|
||||
madd.s f5, f2, f9
|
||||
madd.s f6, f3, f10
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f7, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
.second_fir_loop_3:
|
||||
|
||||
madd.s f4, f1, f12
|
||||
madd.s f5, f2, f13
|
||||
madd.s f4, f3, f14
|
||||
|
||||
.store_fir_result:
|
||||
|
||||
add.s f4, f4, f5
|
||||
add.s f6, f6, f7
|
||||
add.s f4, f4, f6
|
||||
|
||||
// Store result
|
||||
ssip f4, a4, 4 // y++ - save result and increment output pointer
|
||||
// Check loop length
|
||||
addi a5, a5, -1
|
||||
bnez a5, .fir_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
movi.n a2, 0 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_aes3_enabled
|
||||
@@ -0,0 +1,36 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
esp_err_t dsps_fir_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len)
|
||||
{
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
float acc = 0;
|
||||
int coeff_pos = 0;
|
||||
fir->delay[fir->pos] = input[i];
|
||||
fir->pos++;
|
||||
if (fir->pos >= fir->N) {
|
||||
fir->pos = 0;
|
||||
}
|
||||
for (int n = fir->pos; n < fir->N ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
for (int n = 0; n < fir->pos ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
output[i] = acc;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "malloc.h"
|
||||
|
||||
|
||||
esp_err_t dsps_fir_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int coeffs_len)
|
||||
{
|
||||
// Allocate delay line in case if it's NULL
|
||||
if (delay == NULL) {
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
delay = (float *)memalign(16, (coeffs_len + 4) * sizeof(float));
|
||||
#else
|
||||
delay = (float *)malloc((coeffs_len + 4) * sizeof(float));
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
fir->use_delay = 1;
|
||||
} else {
|
||||
fir->use_delay = 0;
|
||||
}
|
||||
for (int i = 0; i < (coeffs_len + 4); i++) {
|
||||
delay[i] = 0;
|
||||
}
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->N = coeffs_len;
|
||||
fir->pos = 0;
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
if (fir->N % 4 != 0) {
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
// The coeffs array should be aligned to 16
|
||||
if (((uint32_t)coeffs) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
// The delay array should be aligned to 16
|
||||
if (((uint32_t)delay) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
|
||||
for (int i = 0 ; i < coeffs_len; i++) {
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t dsps_fir_f32_free(fir_f32_t *fir)
|
||||
{
|
||||
if (fir->use_delay != 0) {
|
||||
fir->use_delay = 0;
|
||||
free(fir->delay);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_ae32_enabled == 1)
|
||||
|
||||
#include "dsps_dotprod_f32_m_ae32.S"
|
||||
|
||||
// This is FIR filter for ESP32 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_ae32
|
||||
.type dsps_fird_f32_ae32,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_ae32(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_ae32:
|
||||
// fir - a2
|
||||
// input - a3
|
||||
// output - a4
|
||||
// len - a5
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
movi a10, 4
|
||||
mull a13, a7, a10// a13 - a7*4
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
mull a6, a6, a10// a6 = a6*4
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
l32i a6, a2, 8 // a6 - N
|
||||
l32i a12, a2, 16 // a12 - decimation
|
||||
movi a8, 0 // result = 0;
|
||||
|
||||
// a13 - delay index
|
||||
fird_loop_len:
|
||||
// Store to delay line
|
||||
|
||||
loopnez a12, .fird_load_data // K loops
|
||||
lsip f0, a3, 4 // f0 = x[i++]
|
||||
ssx f0, a11, a13 // delay[a13] = f0;
|
||||
addi a13, a13, 4 // a13++
|
||||
addi a7, a7, 1 // a7++
|
||||
// verify deley line
|
||||
blt a7, a6, do_not_reset_a13
|
||||
movi a13, 0
|
||||
movi a7, 0
|
||||
do_not_reset_a13:
|
||||
const.s f2, 0
|
||||
.fird_load_data:
|
||||
|
||||
addi a8, a8, 1
|
||||
|
||||
// Calc amount for delay line before end
|
||||
mov a15, a10 // a15 - coeffs
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
loopnez a14, first_fird_loop // pos...N-1
|
||||
lsip f1, a15, 4 // a15++
|
||||
lsx f0, a11, a13 // load delay f0 = delay[pos]
|
||||
addi a13, a13, 4 // a13++, pos++
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
first_fird_loop:
|
||||
movi a13, 0 // load delay line counter to 0
|
||||
loopnez a7, second_fird_loop // 0..pos
|
||||
lsip f1, a15, 4 // a15++
|
||||
lsx f0, a11, a13 // load delay f0 = delay[pos]
|
||||
addi a13, a13, 4 // a13++, pos++
|
||||
madd.s f2, f0, f1 // f2 += f0*f1
|
||||
second_fird_loop:
|
||||
|
||||
// and after end
|
||||
// Store result
|
||||
ssi f2, a4, 0
|
||||
addi a4, a4, 4 // y++ - increment output pointer
|
||||
next_itt_fir32:
|
||||
// Check loop
|
||||
addi a5, a5, -1
|
||||
bnez a5, fird_loop_len
|
||||
// store state
|
||||
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
|
||||
mov a2, a8 // return status ESP_OK
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fird_f32_ae32_enabled
|
||||
@@ -0,0 +1,262 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2018-2025 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileContributor: 2025 Schuwi
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_aes3_enabled == 1)
|
||||
|
||||
// This is FIR filter for Esp32s3 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_aes3
|
||||
.type dsps_fird_f32_aes3,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_aes3(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_aes3:
|
||||
|
||||
// a2 - fir structure
|
||||
// a3 - input
|
||||
// a4 - output
|
||||
// a5 - length
|
||||
|
||||
// a6 - fir->N - amount of coefficients
|
||||
// a7 - fir->pos - position in delay line
|
||||
// a8 - temp
|
||||
// a9 - return value (= length)
|
||||
// a10 - fir->coeffs - pointer to constant coefficients
|
||||
// a11 - fir->delay - pointer to delay line
|
||||
// a12 - constant: -16 (= 0xFFFFFFF0)
|
||||
// a13 - constant: 15 (= 0x0000000F)
|
||||
// a14 - temp for loops
|
||||
// a15 - delay line rounded down to 16
|
||||
|
||||
entry a1, 16
|
||||
// Array increment for floating point data should be 4
|
||||
l32i a7, a2, 12 // a7 - pos
|
||||
|
||||
l32i a6, a2, 8 // a6 - N - amount of coefficients
|
||||
l32i a11, a2, 4 // a11 - delay line
|
||||
addx4 a11, a7, a11 // a11 = a11 + a7*4
|
||||
|
||||
mov.n a9, a5
|
||||
|
||||
movi.n a12, -16
|
||||
movi.n a13, 15
|
||||
// Main loop for input samples
|
||||
.fird_loop_len:
|
||||
// Store K values from input to delay line:
|
||||
|
||||
l32i a14, a2, 16 // a14 - fir->decim = K (decimation factor)
|
||||
loopnez a14, .fird_load_data // K loops
|
||||
// Store to delay line
|
||||
lsip f15, a3, 4 // a3 += 4, f15 = input[n]
|
||||
ssip f15, a11, 4 // a11 += 4, *a11 = f15
|
||||
addi a7, a7, 1 // a7++ - position in delay line
|
||||
|
||||
blt a7, a6, .do_not_reset_a11
|
||||
l32i a11, a2, 4 // Load delay line
|
||||
movi a7, 0
|
||||
.do_not_reset_a11:
|
||||
and a15, a11, a12
|
||||
.fird_load_data:
|
||||
//
|
||||
// Process data
|
||||
//
|
||||
|
||||
l32i a10, a2, 0 // a10 - coeffs
|
||||
|
||||
// Clear f4, f5, f6, f7 for multiplications
|
||||
const.s f4, 0
|
||||
const.s f5, 0
|
||||
const.s f6, 0
|
||||
const.s f7, 0
|
||||
|
||||
// Branch according to the current position (modulo 4) in delay line
|
||||
and a8, a11, a13 // a8 = a11 & 15
|
||||
beqz a8, .offset_0
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_1
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_2
|
||||
addi a8, a8, -4
|
||||
beqz a8, .offset_3
|
||||
|
||||
// a10 - coeffs
|
||||
// a11 - delay line
|
||||
.offset_0:
|
||||
// a14 = (N - pos) / 4
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
srli a14, a14, 2
|
||||
loopnez a14, .first_fir_loop_0 // d in [pos,N[ (stride of 4) ; c in [0,N-pos[ (stride of 4)
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8 // f4 += coeffs[c] * delay[d]
|
||||
madd.s f5, f1, f9 // f5 += coeffs[c+1] * delay[d+1]
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.first_fir_loop_0:
|
||||
|
||||
// Reset delay line pointer
|
||||
l32i a15, a2, 4 // a15 - delay line [0]
|
||||
|
||||
// a14 = pos / 4
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_0 // i in [0,pos[ (stride of 4) ; c in [N-pos,N[ (stride of 4)
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f0, f8 // f4 += coeffs[c] * delay[d]
|
||||
madd.s f5, f1, f9 // f5 += coeffs[c+1] * delay[d+1]
|
||||
madd.s f6, f2, f10
|
||||
madd.s f7, f3, f11
|
||||
.second_fir_loop_0:
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_1:
|
||||
// a14 = (N - pos + 3) / 4
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
|
||||
const.s f3, 0 // f3 = 0
|
||||
EE.LDF.128.IP f11, f10, f9, f12, a15, 16 // Load data from delay line
|
||||
|
||||
// f12 - delay[N-1], store for the last operation
|
||||
// f9..f11 - delay[0..2]
|
||||
loopnez a14, .first_fir_loop_1 // pos...N-1
|
||||
madd.s f4, f3, f8 // multiplies f8 by 0 in the first iteration
|
||||
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f5, f0, f9
|
||||
madd.s f6, f1, f10
|
||||
madd.s f7, f2, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line (out of bounds access on last iteration! - delay[N], delay[N+1], delay[N+2], delay[N+3])
|
||||
.first_fir_loop_1:
|
||||
|
||||
// Reset delay line pointer
|
||||
l32i a15, a2, 4 // a15 - delay line [0]
|
||||
|
||||
// a14 = pos / 4
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_1 // 0..pos
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f3, f8
|
||||
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f5, f0, f9
|
||||
madd.s f6, f1, f10
|
||||
madd.s f7, f2, f11
|
||||
.second_fir_loop_1:
|
||||
|
||||
// Both loops together evaluate to N/4 iterations (N madds)
|
||||
|
||||
madd.s f4, f3, f12
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_2:
|
||||
// a14 = (N - pos + 3) / 4
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
|
||||
const.s f2, 0 // f2 = 0
|
||||
const.s f3, 0 // f3 = 0
|
||||
EE.LDF.128.IP f11, f10, f13, f12, a15, 16 // Load data from delay line
|
||||
|
||||
// f12, f13 - delay[N-1], delay[N-2], store for the last operation
|
||||
// f10..f11 - delay[0..1]
|
||||
loopnez a14, .first_fir_loop_2 // pos...N-1
|
||||
madd.s f4, f2, f8 // multiplies f8 by 0 in the first iteration
|
||||
madd.s f5, f3, f9 // multiplies f9 by 0 in the first iteration
|
||||
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f6, f0, f10
|
||||
madd.s f7, f1, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line (out of bounds access on last iteration! - delay[N], delay[N+1], delay[N+2], delay[N+3])
|
||||
.first_fir_loop_2:
|
||||
|
||||
// Reset delay line pointer
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_2 // 0..pos
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f2, f8
|
||||
madd.s f5, f3, f9
|
||||
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f6, f0, f10
|
||||
madd.s f7, f1, f11
|
||||
.second_fir_loop_2:
|
||||
|
||||
// Both loops together evaluate to N/4 iterations (N madds)
|
||||
|
||||
madd.s f4, f2, f12
|
||||
madd.s f5, f3, f13
|
||||
j .store_fir_result;
|
||||
|
||||
.offset_3:
|
||||
// a14 = (N - pos + 3) / 4
|
||||
sub a14, a6, a7 // a14 = N-pos
|
||||
addi a14, a14, 3
|
||||
srli a14, a14, 2
|
||||
|
||||
const.s f1, 0 // f1 = 0
|
||||
const.s f2, 0 // f2 = 0
|
||||
const.s f3, 0 // f3 = 0
|
||||
EE.LDF.128.IP f11, f14, f13, f12, a15, 16 // Load data from delay line
|
||||
|
||||
// f12, f13, f14 - delay[N-1], delay[N-2], delay[N-3], store for the last operation
|
||||
// f11 - delay[0]
|
||||
loopnez a14, .first_fir_loop_3 // pos...N-1
|
||||
madd.s f4, f1, f8
|
||||
madd.s f5, f2, f9
|
||||
madd.s f6, f3, f10
|
||||
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f7, f0, f11
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line (out of bounds access on last iteration! - delay[N], delay[N+1], delay[N+2], delay[N+3])
|
||||
.first_fir_loop_3:
|
||||
|
||||
// Reset delay line pointer
|
||||
l32i a15, a2, 4 // a11 - delay line [0]
|
||||
|
||||
srli a14, a7, 2
|
||||
loopnez a14, .second_fir_loop_3 // 0..pos
|
||||
EE.LDF.128.IP f11, f10, f9, f8, a15, 16 // Load data from delay line
|
||||
madd.s f4, f1, f8
|
||||
madd.s f5, f2, f9
|
||||
madd.s f6, f3, f10
|
||||
|
||||
EE.LDF.128.IP f3, f2, f1, f0, a10, 16 // Load coeffs
|
||||
madd.s f7, f0, f11
|
||||
.second_fir_loop_3:
|
||||
|
||||
// Both loops together evaluate to N/4 iterations (N madds)
|
||||
|
||||
madd.s f4, f1, f12
|
||||
madd.s f5, f2, f13
|
||||
madd.s f6, f3, f14
|
||||
|
||||
.store_fir_result:
|
||||
|
||||
add.s f4, f4, f5
|
||||
add.s f6, f6, f7
|
||||
add.s f4, f4, f6
|
||||
|
||||
// Store result
|
||||
ssip f4, a4, 4 // y++ - save result and increment output pointer
|
||||
|
||||
// Check loop break condition
|
||||
addi a5, a5, -1
|
||||
bnez a5, .fird_loop_len
|
||||
|
||||
// Store state
|
||||
s32i a7, a2, 12 // pos = a7
|
||||
mov.n a2, a9
|
||||
retw.n
|
||||
|
||||
#endif // dsps_fir_f32_aes3_enabled
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
int dsps_fird_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len)
|
||||
{
|
||||
int result = 0;
|
||||
for (int i = 0; i < len ; i++) {
|
||||
for (int k = 0 ; k < fir->decim ; k++) {
|
||||
fir->delay[fir->pos++] = *input++;
|
||||
if (fir->pos >= fir->N) {
|
||||
fir->pos = 0;
|
||||
}
|
||||
}
|
||||
float acc = 0;
|
||||
int coeff_pos = 0;
|
||||
for (int n = fir->pos; n < fir->N ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
for (int n = 0; n < fir->pos ; n++) {
|
||||
acc += fir->coeffs[coeff_pos++] * fir->delay[n];
|
||||
}
|
||||
output[result++] = acc;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
// Copyright 2018-2024 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#if (dsps_fird_f32_arp4_enabled == 1)
|
||||
|
||||
// This is FIR filter for esp32p4 processor.
|
||||
.text
|
||||
.align 4
|
||||
.global dsps_fird_f32_arp4
|
||||
.type dsps_fird_f32_arp4,@function
|
||||
// The function implements the following C code:
|
||||
//esp_err_t dsps_fird_f32_arp4(fir_f32_t* fir, const float* input, float* output, int len);
|
||||
|
||||
dsps_fird_f32_arp4:
|
||||
add sp,sp,-16
|
||||
|
||||
mv a6, a3
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
lw a4, 4(a0) // a4 - delay
|
||||
lw t2, 8(a0) // t2 - N :FIR filter coefficients amount
|
||||
lw t3, 12(a0) // t3 - pos
|
||||
lw t4, 16(a0) // t4 - decim
|
||||
slli t3, t3, 2 // t5 = pos*4 (bytes)
|
||||
add t1, t1, t3 // delay[pos]
|
||||
slli t6, t2, 2 // t6 = N*4 (bytes)
|
||||
add t3, a4, t6 // last position for the daly[N]
|
||||
|
||||
nop
|
||||
.fird_loop_len:
|
||||
// p.lw a1, 4(a1)
|
||||
//fmv.w.x fa5,zero
|
||||
flw fa0, 0(a1) // f0 = x[i], first load
|
||||
esp.lp.setup 0, t4, .fird_load_data // label to the last executed instruction
|
||||
add a1, a1, 4 // i++
|
||||
fsw fa0, 0(t1) // delay[pos]
|
||||
add t1, t1, 4
|
||||
blt t1, t3, .do_not_reset_pos # if t0 < t1 then target
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
.do_not_reset_pos:
|
||||
.fird_load_data: flw fa0, 0(a1) // f0 = x[i]
|
||||
|
||||
lw t0, 0(a0) // t0 - coeffs
|
||||
sub t5, t3, t1 // (last_pos - pos)*4
|
||||
srli t5, t5, 2 // N-pos
|
||||
sub t6, t1, a4
|
||||
srli t6, t6, 2 // pos
|
||||
|
||||
fmv.w.x fa2,zero
|
||||
|
||||
lw a5, 0(a0) // a5 - coeffs
|
||||
esp.lp.setup 0, t5, .first_fird_loop
|
||||
flw fa1, 0(a5)
|
||||
flw fa0, 0(t1)
|
||||
addi a5, a5, 4
|
||||
fmadd.s fa2, fa1, fa0, fa2
|
||||
.first_fird_loop: addi t1, t1, 4
|
||||
|
||||
|
||||
lw t1, 4(a0) // t1 - delay
|
||||
|
||||
beqz t6, .skeep_loop
|
||||
esp.lp.setup 0, t6, .second_fird_loop
|
||||
flw fa1, 0(a5)
|
||||
flw fa0, 0(t1)
|
||||
addi a5, a5, 4
|
||||
fmadd.s fa2, fa1, fa0, fa2
|
||||
.second_fird_loop: addi t1, t1, 4
|
||||
|
||||
.skeep_loop:
|
||||
// Store result
|
||||
|
||||
fsw fa2, 0(a2)
|
||||
addi a2, a2, 4
|
||||
|
||||
addi a3, a3, -1
|
||||
BNEZ a3, .fird_loop_len// Jump if > 0
|
||||
|
||||
sub t6, t1, a4
|
||||
srli t6, t6, 2 // pos
|
||||
|
||||
sw t6, 12(a0) // t3 - pos
|
||||
|
||||
mv a0, a6
|
||||
add sp,sp,16
|
||||
ret
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,46 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
|
||||
esp_err_t dsps_fird_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int N, int decim)
|
||||
{
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->N = N;
|
||||
fir->pos = 0;
|
||||
fir->decim = decim;
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
// The amount of coefficients should be divided to 4,
|
||||
// if not, add zero coefficients to round length to 0
|
||||
if (fir->N % 4 != 0) {
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
// The coeffs array should be aligned to 16
|
||||
if (((uint32_t)coeffs) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
// The delay array should be aligned to 16
|
||||
if (((uint32_t)delay) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
|
||||
for (int i = 0 ; i < N; i++) {
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_common.h"
|
||||
#include <malloc.h>
|
||||
#include "dsp_tests.h"
|
||||
|
||||
int dsps_firmr_f32_ansi(fir_f32_t *fir, const float *input, float *output, int input_len)
|
||||
{
|
||||
int m = fir->start_pos;
|
||||
int result = 0;
|
||||
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
fir->delay[fir->pos] = input[i];
|
||||
|
||||
for (m = fir->start_pos; m < fir->interp; m += fir->decim) {
|
||||
float fir_sum = 0;
|
||||
int coeff_pos = 0;
|
||||
for (int n = fir->pos; n < fir->delay_size; n++) {
|
||||
fir_sum += fir->delay[n] * fir->coeffs[coeff_pos++ * fir->interp + m];
|
||||
}
|
||||
for (int n = 0; n < fir->pos; n++) {
|
||||
fir_sum += fir->delay[n] * fir->coeffs[coeff_pos++ * fir->interp + m];
|
||||
}
|
||||
output[result++] = fir_sum;
|
||||
}
|
||||
fir->start_pos = m - fir->interp;
|
||||
|
||||
fir->pos--;
|
||||
if (fir->pos < 0) {
|
||||
fir->pos = fir->delay_size - 1;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_common.h"
|
||||
#include <malloc.h>
|
||||
#include "dsp_tests.h"
|
||||
|
||||
esp_err_t dsps_firmr_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int length, int interp, int decim, int start_pos)
|
||||
{
|
||||
fir->coeffs = coeffs;
|
||||
fir->delay = delay;
|
||||
fir->N = length;
|
||||
fir->pos = 0;
|
||||
fir->decim = decim;
|
||||
fir->use_delay = 0;
|
||||
fir->interp = interp;
|
||||
fir->interp_pos = 0;
|
||||
fir->start_pos = start_pos;
|
||||
fir->delay_size = length / interp;
|
||||
|
||||
if (delay == NULL) {
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
fir->delay = (float *)memalign(16, (fir->delay_size + 4) * sizeof(float));
|
||||
#else
|
||||
fir->delay = (float *)malloc((fir->delay_size + 4) * sizeof(float));
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
fir->use_delay = 1;
|
||||
} else {
|
||||
fir->use_delay = 0;
|
||||
}
|
||||
|
||||
if (decim == 0) {
|
||||
return ESP_ERR_DSP_INVALID_PARAM;
|
||||
}
|
||||
if (interp == 0) {
|
||||
return ESP_ERR_DSP_INVALID_PARAM;
|
||||
}
|
||||
if (length % interp != 0) {
|
||||
return ESP_ERR_DSP_INVALID_LENGTH;
|
||||
}
|
||||
if (start_pos < 0 || start_pos >= decim) {
|
||||
return ESP_ERR_DSP_INVALID_PARAM;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
// The delay array should be aligned to 16
|
||||
if (((uint32_t)delay) & 0x0f) {
|
||||
return ESP_ERR_DSP_ARRAY_NOT_ALIGNED;
|
||||
}
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
|
||||
for (int i = 0 ; i < fir->delay_size; i++) {
|
||||
fir->delay[i] = 0;
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
@@ -0,0 +1,386 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2018-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _dsps_fir_H_
|
||||
#define _dsps_fir_H_
|
||||
|
||||
|
||||
#include "dsp_err.h"
|
||||
|
||||
#include "dsps_fir_platform.h"
|
||||
#include "dsp_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Data struct of f32 fir filter
|
||||
*
|
||||
* This structure is used by a filter internally. A user should access this structure only in case of
|
||||
* extensions for the DSP Library.
|
||||
* All fields of this structure are initialized by the dsps_fir_init_f32(...) function.
|
||||
*/
|
||||
typedef struct fir_f32_s {
|
||||
float *coeffs; /*!< Pointer to the coefficient buffer.*/
|
||||
float *delay; /*!< Pointer to the delay line buffer.*/
|
||||
int N; /*!< FIR filter coefficients amount.*/
|
||||
int pos; /*!< Position in delay line.*/
|
||||
int decim; /*!< Decimation factor.*/
|
||||
int16_t use_delay; /*!< The delay line was allocated by init function.*/
|
||||
/**
|
||||
* @brief Interpolation parameters
|
||||
*
|
||||
* @note This is used for multi-rate FIR filter
|
||||
*/
|
||||
int delay_size; /*!< Size of the delay line.*/
|
||||
int interp; /*!< Interpolation factor.*/
|
||||
int interp_pos; /*!< Interpolation position.*/
|
||||
int start_pos; /*!< Start position for decimation counter.*/
|
||||
} fir_f32_t;
|
||||
|
||||
/**
|
||||
* @brief Data struct of s16 fir filter
|
||||
*
|
||||
* This structure is used by a filter internally. A user should access this structure only in case of
|
||||
* extensions for the DSP Library.
|
||||
* All fields of this structure are initialized by the dsps_fir_init_s16(...) function.
|
||||
*/
|
||||
typedef struct fir_s16_s {
|
||||
int16_t *coeffs; /*!< Pointer to the coefficient buffer.*/
|
||||
int16_t *delay; /*!< Pointer to the delay line buffer.*/
|
||||
int16_t coeffs_len; /*!< FIR filter coefficients amount.*/
|
||||
int16_t pos; /*!< Position in delay line.*/
|
||||
int16_t decim; /*!< Decimation factor.*/
|
||||
int16_t d_pos; /*!< Actual decimation counter.*/
|
||||
int16_t shift; /*!< Shift value of the result.*/
|
||||
int32_t *rounding_buff; /*!< Rounding buffer for the purposes of esp32s3 ee.ld.accx.ip assembly instruction */
|
||||
int32_t rounding_val; /*!< Rounding value*/
|
||||
int16_t free_status; /*!< Indicator for dsps_fird_s16_aes3_free() function*/
|
||||
|
||||
/**
|
||||
* @brief Interpolation parameters
|
||||
*
|
||||
* @note This is used for multi-rate FIR filter
|
||||
*/
|
||||
int16_t delay_size; /*!< Size of the delay line.*/
|
||||
int16_t interp; /*!< Interpolation factor.*/
|
||||
int16_t interp_pos; /*!< Interpolation position.*/
|
||||
int16_t start_pos; /*!< Start position for decimation counter.*/
|
||||
} fir_s16_t;
|
||||
|
||||
/**
|
||||
* @brief initialize structure for 32 bit FIR filter
|
||||
*
|
||||
* Function initialize structure for 32 bit floating point FIR filter
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be preallocated
|
||||
* @param coeffs: array with FIR filter coefficients. Must be length N
|
||||
* @param delay: array for FIR filter delay line. Must have a length = coeffs_len + 4
|
||||
* @param coeffs_len: FIR filter length. Length of coeffs array. For esp32s3 length should be divided by 4 and aligned to 16.
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_fir_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int coeffs_len);
|
||||
|
||||
/**
|
||||
* @brief initialize structure for 32 bit Decimation FIR filter
|
||||
* Function initialize structure for 32 bit floating point FIR filter with decimation
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be preallocated
|
||||
* @param coeffs: array with FIR filter coefficients. Must be length N
|
||||
* @param delay: array for FIR filter delay line. Must be length N
|
||||
* @param N: FIR filter length. Length of coeffs and delay arrays.
|
||||
* @param decim: decimation factor.
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_fird_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int N, int decim);
|
||||
|
||||
/**
|
||||
* @brief initialize structure for 16 bit Decimation FIR filter
|
||||
* Function initialize structure for 16 bit signed fixed point FIR filter with decimation
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be preallocated
|
||||
* @param coeffs: array with FIR filter coefficients. Must be length N
|
||||
* @param delay: array for FIR filter delay line. Must be length N
|
||||
* @param coeffs_len: FIR filter length. Length of coeffs and delay arrays.
|
||||
* @param decim: decimation factor.
|
||||
* @param start_pos: initial value of decimation counter. Must be [0..d)
|
||||
* @param shift: shift position of the result
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_fird_init_s16(fir_s16_t *fir, int16_t *coeffs, int16_t *delay, int16_t coeffs_len, int16_t decim, int16_t start_pos, int16_t shift);
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief 32 bit floating point FIR filter
|
||||
*
|
||||
* Function implements FIR filter
|
||||
* The extension (_ansi) uses ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
* @param[in] input: input array
|
||||
* @param[out] output: array with the result of FIR filter
|
||||
* @param[in] len: length of input and result arrays
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_fir_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
esp_err_t dsps_fir_f32_ae32(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
esp_err_t dsps_fir_f32_aes3(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief 32 bit floating point Decimation FIR filter
|
||||
*
|
||||
* Function implements FIR filter with decimation
|
||||
* The extension (_ansi) uses ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
* @param input: input array
|
||||
* @param output: array with the result of FIR filter
|
||||
* @param len: length of result array
|
||||
*
|
||||
* @return: function returns the number of samples stored in the output array
|
||||
* depends on the previous state value could be [0..len/decimation]
|
||||
*/
|
||||
int dsps_fird_f32_ansi(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
int dsps_fird_f32_ae32(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
int dsps_fird_f32_aes3(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
int dsps_fird_f32_arp4(fir_f32_t *fir, const float *input, float *output, int len);
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief 16 bit signed fixed point Decimation FIR filter
|
||||
*
|
||||
* Function implements FIR filter with decimation
|
||||
* The extension (_ansi) uses ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
* @param input: input array
|
||||
* @param output: array with the result of the FIR filter
|
||||
* @param len: length of the result array
|
||||
*
|
||||
* @return: function returns the number of samples stored in the output array
|
||||
* depends on the previous state value could be [0..len/decimation]
|
||||
*/
|
||||
int32_t dsps_fird_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
int32_t dsps_fird_s16_ae32(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
int32_t dsps_fird_s16_aes3(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
int32_t dsps_fird_s16_arp4(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t len);
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief initialize structure for multi-rate FIR filter
|
||||
* Function initialize structure for 32 bit floating point multi-rate FIR filter
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be preallocated
|
||||
* @param coeffs: array with FIR filter coefficients. Must be length N
|
||||
* @param delay: array for FIR filter delay line. Must be length N
|
||||
* @param length: FIR filter length. Length of coeffs and delay arrays.
|
||||
* @param interp: interpolation factor.
|
||||
* @param decim: decimation factor.
|
||||
* @param start_pos: initial value of decimation counter. Must be [0..decim)
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_firmr_init_f32(fir_f32_t *fir, float *coeffs, float *delay, int length, int interp, int decim, int start_pos );
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief initialize structure for multi-rate FIR filter
|
||||
* Function initialize structure for 16 bit signed fixed point multi-rate FIR filter
|
||||
* The implementation use ANSI C and could be compiled and run on any platform
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be preallocated
|
||||
* @param coeffs: array with FIR filter coefficients. Must be length N
|
||||
* @param delay: array for FIR filter delay line. Must be length N
|
||||
* @param length: FIR filter length. Length of coeffs and delay arrays.
|
||||
* @param interp: interpolation factor.
|
||||
* @param decim: decimation factor.
|
||||
* @param start_pos: initial value of decimation counter. Must be [0..decim)
|
||||
* @param shift: shift of accumulator value to store in the output array.
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
* - One of the error codes from DSP library
|
||||
*/
|
||||
esp_err_t dsps_firmr_init_s16(fir_s16_t *fir, int16_t *coeffs, int16_t *delay, int16_t length, int16_t interp, int16_t decim, int16_t start_pos, int16_t shift);
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief 32 bit floating point multi-rate FIR filter
|
||||
*
|
||||
* Function implements FIR filter with decimation
|
||||
* The extension (_ansi) uses ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
* @param input: input array
|
||||
* @param output: array with the result of the FIR filter
|
||||
* @param input_len: length of the input array
|
||||
*
|
||||
* @return
|
||||
* - amount of samples stored in the output array
|
||||
* - depends on the previous state value
|
||||
* - could be [0..len*intepr/decimation]
|
||||
*
|
||||
*/
|
||||
int dsps_firmr_f32_ansi(fir_f32_t *fir, const float *input, float *output, int input_len);
|
||||
/**@}*/
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief 16 bit signed fixed point multi-rate FIR filter
|
||||
*
|
||||
* Function implements FIR filter with decimation
|
||||
* The extension (_ansi) uses ANSI C and could be compiled and run on any platform.
|
||||
* The extension (_ae32) is optimized for ESP32 chip.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
* @param input: input array
|
||||
* @param output: array with the result of the FIR filter
|
||||
* @param input_len: length of the input array
|
||||
*
|
||||
* @return: function returns the number of samples stored in the output array
|
||||
* depends on the previous state value could be [0..len*intepr/decimation]
|
||||
*/
|
||||
int32_t dsps_firmr_s16_ansi(fir_s16_t *fir, const int16_t *input, int16_t *output, int32_t input_len);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief support arrays freeing function
|
||||
*
|
||||
* Function frees all the arrays, which were created during the initialization of the fir_s16_t structure
|
||||
* 1. frees allocated memory for rounding buffer, for the purposes of esp32s3 ee.ld.accx.ip assembly instruction
|
||||
* 2. frees allocated memory in case the delay line is NULL
|
||||
* 3. frees allocated memory in case the length of the filter (and the delay line) is not divisible by 8
|
||||
* and new delay line and filter coefficients arrays are created for the purpose of the esp32s3 assembly
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
*/
|
||||
esp_err_t dsps_fird_s16_aexx_free(fir_s16_t *fir);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief support arrays freeing function
|
||||
*
|
||||
* Function frees the delay line arrays, if it was allocated by the init functions.
|
||||
*
|
||||
* @param fir: pointer to fir filter structure, that must be initialized before
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
*/
|
||||
esp_err_t dsps_fir_f32_free(fir_f32_t *fir);
|
||||
/**@}*/
|
||||
|
||||
|
||||
/**@{*/
|
||||
/**
|
||||
* @brief Array reversal
|
||||
*
|
||||
* Function reverses 16-bit long array members for the purpose of the dsps_fird_s16_aes3 implementation
|
||||
* The function has to be called either during the fir struct initialization or every time the coefficients change
|
||||
*
|
||||
* @param arr: pointer to the array to be reversed
|
||||
* @param len: length of the array to be reversed
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK on success
|
||||
*/
|
||||
esp_err_t dsps_16_array_rev(int16_t *arr, int16_t len);
|
||||
/**@}*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#if (dsps_fir_f32_ae32_enabled == 1)
|
||||
#define dsps_fir_f32 dsps_fir_f32_ae32
|
||||
#elif (dsps_fir_f32_aes3_enabled == 1)
|
||||
#define dsps_fir_f32 dsps_fir_f32_aes3
|
||||
#else
|
||||
#define dsps_fir_f32 dsps_fir_f32_ansi
|
||||
#endif
|
||||
|
||||
#if (dsps_fird_f32_aes3_enabled == 1)
|
||||
#define dsps_fird_f32 dsps_fird_f32_aes3
|
||||
#define dsps_firmr_f32 dsps_firmr_f32_ansi
|
||||
#elif (dsps_fird_f32_ae32_enabled == 1)
|
||||
#define dsps_fird_f32 dsps_fird_f32_ae32
|
||||
#define dsps_firmr_f32 dsps_firmr_f32_ansi
|
||||
#elif (dsps_fird_f32_arp4_enabled == 1)
|
||||
#define dsps_fird_f32 dsps_fird_f32_arp4
|
||||
#define dsps_firmr_f32 dsps_firmr_f32_ansi
|
||||
#else
|
||||
#define dsps_fird_f32 dsps_fird_f32_ansi
|
||||
#define dsps_firmr_f32 dsps_firmr_f32_ansi
|
||||
#endif
|
||||
|
||||
|
||||
#if (dsps_fird_s16_ae32_enabled == 1)
|
||||
#define dsps_fird_s16 dsps_fird_s16_ae32
|
||||
#define dsps_firmr_s16 dsps_firmr_s16_ansi
|
||||
#elif (dsps_fird_s16_aes3_enabled == 1)
|
||||
#define dsps_fird_s16 dsps_fird_s16_aes3
|
||||
#define dsps_firmr_s16 dsps_firmr_s16_ansi
|
||||
#elif (dsps_fird_s16_arp4_enabled == 1)
|
||||
#define dsps_fird_s16 dsps_fird_s16_arp4
|
||||
#define dsps_firmr_s16 dsps_firmr_s16_ansi
|
||||
#else
|
||||
#define dsps_fird_s16 dsps_fird_s16_ansi
|
||||
#define dsps_firmr_s16 dsps_firmr_s16_ansi
|
||||
#endif
|
||||
|
||||
#else // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#define dsps_fir_f32 dsps_fir_f32_ansi
|
||||
#define dsps_fird_f32 dsps_fird_f32_ansi
|
||||
#define dsps_firmr_f32 dsps_firmr_f32_ansi
|
||||
#define dsps_fird_s16 dsps_fird_s16_ansi
|
||||
#define dsps_firmr_s16 dsps_firmr_s16_ansi
|
||||
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
|
||||
#endif // _dsps_fir_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
#ifndef _dsps_fir_platform_H_
|
||||
#define _dsps_fir_platform_H_
|
||||
|
||||
#include "sdkconfig.h"
|
||||
|
||||
#ifdef __XTENSA__
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#include <xtensa/config/core-matmap.h>
|
||||
|
||||
|
||||
#if ((XCHAL_HAVE_FP == 1) && (XCHAL_HAVE_LOOPS == 1))
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32S3
|
||||
#define dsps_fird_f32_aes3_enabled 1
|
||||
#define dsps_fird_f32_ae32_enabled 1
|
||||
#define dsps_fird_s16_aes3_enabled 1
|
||||
#define dsps_fird_s16_ae32_enabled 0
|
||||
#define dsps_fir_f32_aes3_enabled 1
|
||||
#define dsps_fir_f32_ae32_enabled 0
|
||||
#else
|
||||
#define dsps_fird_f32_ae32_enabled 1
|
||||
#define dsps_fird_s16_aes3_enabled 0
|
||||
#define dsps_fird_s16_ae32_enabled 1
|
||||
#define dsps_fir_f32_aes3_enabled 0
|
||||
#define dsps_fir_f32_ae32_enabled 1
|
||||
#endif
|
||||
|
||||
#endif //
|
||||
#endif // __XTENSA__
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32P4 || CONFIG_IDF_TARGET_ESP32S31
|
||||
#ifdef CONFIG_DSP_OPTIMIZED
|
||||
#define dsps_fird_f32_arp4_enabled 1
|
||||
#define dsps_fird_s16_arp4_enabled 1
|
||||
#else
|
||||
#define dsps_fird_f32_arp4_enabled 0
|
||||
#define dsps_fird_s16_arp4_enabled 0
|
||||
#endif // CONFIG_DSP_OPTIMIZED
|
||||
#endif
|
||||
#endif // _dsps_fir_platform_H_
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2018-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef _dsps_resampl_H_
|
||||
#define _dsps_resampl_H_
|
||||
|
||||
#include "dsp_err.h"
|
||||
#include "dsps_fir.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Data struct of f32 multi-rate resampler
|
||||
*
|
||||
* This structure is used by a resampler internally. A user should access this structure only in case of
|
||||
* extensions for the DSP Library.
|
||||
* To initialize the resampler, use the dsps_resampler_mr_init() function.
|
||||
* To execute the resampler, use the dsps_resampler_mr_exec() function.
|
||||
* To free the resampler, use the dsps_resampler_mr_free() function.
|
||||
*/
|
||||
typedef struct dsps_resample_mr_s {
|
||||
void *filter; /*!< FIR filter structure*/
|
||||
float samplerate_factor; /*!< Sample rate factor */
|
||||
int16_t decim_c; /*!< Decimation factor for ceil */
|
||||
int16_t decim_f; /*!< Decimation factor for floor */
|
||||
int16_t active_decim; /*!< Active decimation factor */
|
||||
float decim_avg_in; /*!< Average decimation factor for input */
|
||||
float decim_avg_out; /*!< Average decimation factor for output */
|
||||
int32_t (*dsps_firmr)(void *fir, void *input, void *output, int32_t length); /*!< Pointer to FIR filter function */
|
||||
int32_t fixed_point; /*!< Fixed point flag */
|
||||
} dsps_resample_mr_t;
|
||||
|
||||
/**
|
||||
* @brief Data struct of f32 poly-phase resampler
|
||||
*
|
||||
* This structure is used by a poly-phase resampler internally. A user should access this structure only in case of
|
||||
* extensions for the DSP Library.
|
||||
* To initialize the resampler, use the dsps_resampler_ph_init() function.
|
||||
* To execute the resampler, use the dsps_resampler_ph_exec() function.
|
||||
*
|
||||
* It is possible to correct the sample rate by adjust the phase parameter "on the fly".
|
||||
*/
|
||||
typedef struct dsps_resample_ph_s {
|
||||
float phase;
|
||||
float step;
|
||||
float delay[4];
|
||||
int delay_pos;
|
||||
} dsps_resample_ph_t;
|
||||
|
||||
/**
|
||||
* @brief Initialize the multi-rate resampler
|
||||
*
|
||||
* @param resampler Pointer to the resampler structure
|
||||
* @param coeffs Pointer to the filter coefficients (float or int16_t for fixed point)
|
||||
* @param length Length of the filter coefficients
|
||||
* @param interp Interpolation factor for the filter
|
||||
* @param samplerate_factor Sample rate factor
|
||||
* @param fixed_point Fixed point flag, 0 for float, 1 for fixed point
|
||||
* @param shift Shift value for fixed point
|
||||
*
|
||||
* @return ESP_OK on success, ESP_ERR_INVALID_ARG if the parameters are invalid
|
||||
*/
|
||||
esp_err_t dsps_resampler_mr_init(dsps_resample_mr_t *resampler, void *coeffs, int16_t length, int16_t interp, float samplerate_factor, int32_t fixed_point, int16_t shift);
|
||||
|
||||
/**
|
||||
* @brief Execute the multi-rate resampler
|
||||
*
|
||||
* This function executes the multi-rate resampler. The input and output buffers can be the same.
|
||||
* The function based on multi-rate FIR filter.
|
||||
* The decimation factor is updated for each execution. The current decimation factor calculated
|
||||
* as division of average input and average output sample rate.
|
||||
* To correct the output sample rate, the length_correction parameter is used. To increase
|
||||
* the output sample rate, the length_correction parameter should be positive. To decrease
|
||||
* the output sample rate, the length_correction parameter should be negative. This parameter
|
||||
* is used when the input and output sample rates are comes from different sources.
|
||||
*
|
||||
* @param resampler: Pointer to the resampler structure
|
||||
* @param input: Pointer to the input buffer
|
||||
* @param output: Pointer to the output buffer
|
||||
* @param length: Length of the input buffers
|
||||
* @param length_correction: Length correction for the current execution. Positive value
|
||||
* increases the output sample rate, negative value decreases
|
||||
* the output sample rate.
|
||||
*
|
||||
* @return Length of the output buffer
|
||||
*/
|
||||
int32_t dsps_resampler_mr_exec(dsps_resample_mr_t *resampler, void *input, void *output, int32_t length, int32_t length_correction);
|
||||
|
||||
/**
|
||||
* @brief Free the multi-rate resampler
|
||||
*
|
||||
* @param resampler Pointer to the resampler structure
|
||||
*/
|
||||
void dsps_resampler_mr_free(dsps_resample_mr_t *resampler);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Initialize the poly-phase resampler
|
||||
*
|
||||
* The poly-phase resampler is a implementation of the poly-phase Farrow filter
|
||||
* that use cubic interpolation with 4 coefficients.
|
||||
*
|
||||
* @param resampler Pointer to the resampler structure
|
||||
* @param samplerate_factor Sample rate factor
|
||||
*
|
||||
* @return ESP_OK on success, ESP_ERR_INVALID_ARG if the parameters are invalid
|
||||
*/
|
||||
esp_err_t dsps_resampler_ph_init(dsps_resample_ph_t *resampler, float samplerate_factor);
|
||||
|
||||
/**
|
||||
* @brief Execute the poly-phase resampler
|
||||
*
|
||||
* @param resampler Pointer to the resampler structure
|
||||
* @param input Pointer to the input buffer
|
||||
* @param output Pointer to the output buffer
|
||||
* @param length Length of the input buffer
|
||||
*
|
||||
* @return Length of the output buffer
|
||||
*/
|
||||
|
||||
int32_t dsps_resampler_ph_exec(dsps_resample_ph_t *resampler, float *input, float *output, int32_t length);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif //_dsps_resampl_H_
|
||||
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_resampler.h"
|
||||
#include <math.h>
|
||||
|
||||
static const float decim_avg_coeff = 0.9999;
|
||||
|
||||
esp_err_t dsps_resampler_mr_init(dsps_resample_mr_t *resampler, void *coeffs, int16_t length, int16_t interp, float samplerate_factor, int32_t fixed_point, int16_t shift)
|
||||
{
|
||||
esp_err_t ret = ESP_OK;
|
||||
|
||||
if (samplerate_factor < 1) {
|
||||
return ESP_ERR_DSP_INVALID_PARAM;
|
||||
}
|
||||
|
||||
resampler->fixed_point = fixed_point;
|
||||
resampler->samplerate_factor = samplerate_factor;
|
||||
float decimation = (float)interp / samplerate_factor;
|
||||
resampler->decim_f = floor(decimation);
|
||||
resampler->decim_c = ceil(decimation);
|
||||
resampler->active_decim = resampler->decim_c;
|
||||
resampler->decim_avg_in = 1;
|
||||
resampler->decim_avg_out = resampler->samplerate_factor;
|
||||
|
||||
if (fixed_point == 0) {
|
||||
resampler->dsps_firmr = (int32_t (*)(void *, void *, void *, int32_t))dsps_firmr_f32;
|
||||
resampler->filter = (void *)malloc(sizeof(fir_f32_t));
|
||||
|
||||
ret = dsps_firmr_init_f32((fir_f32_t *)resampler->filter, coeffs, NULL, length, interp, resampler->decim_c, 0);
|
||||
} else {
|
||||
resampler->dsps_firmr = (int32_t (*)(void *, void *, void *, int32_t))dsps_firmr_s16;
|
||||
resampler->filter = (void *)malloc(sizeof(fir_s16_t));
|
||||
ret = dsps_firmr_init_s16((fir_s16_t *)resampler->filter, coeffs, NULL, length, interp, resampler->decim_c, 0, shift);
|
||||
}
|
||||
|
||||
if (ret != ESP_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t dsps_resampler_mr_exec(dsps_resample_mr_t *resampler, void *input, void *output, int32_t length, int32_t length_correction)
|
||||
{
|
||||
int32_t result = 0;
|
||||
|
||||
if (resampler->fixed_point == 0) {
|
||||
((fir_f32_t *)resampler->filter)->decim = resampler->active_decim;
|
||||
} else {
|
||||
((fir_s16_t *)resampler->filter)->decim = resampler->active_decim;
|
||||
}
|
||||
|
||||
result = resampler->dsps_firmr(resampler->filter, input, output, length);
|
||||
|
||||
resampler->decim_avg_in = resampler->decim_avg_in * decim_avg_coeff + (float)length * (1 - decim_avg_coeff);
|
||||
resampler->decim_avg_out = resampler->decim_avg_out * decim_avg_coeff + (float)(result - length_correction) * (1 - decim_avg_coeff);
|
||||
|
||||
if ((resampler->decim_avg_in * resampler->samplerate_factor) > (resampler->decim_avg_out)) {
|
||||
resampler->active_decim = resampler->decim_f;
|
||||
} else {
|
||||
resampler->active_decim = resampler->decim_c;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void dsps_resampler_mr_free(dsps_resample_mr_t *resampler)
|
||||
{
|
||||
if (resampler->fixed_point == 0) {
|
||||
dsps_fir_f32_free((fir_f32_t *)(resampler->filter));
|
||||
} else {
|
||||
dsps_fird_s16_aexx_free((fir_s16_t *)(resampler->filter));
|
||||
}
|
||||
free(resampler->filter);
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "dsps_resampler.h"
|
||||
#include <math.h>
|
||||
|
||||
esp_err_t dsps_resampler_ph_init(dsps_resample_ph_t *resampler, float samplerate_factor)
|
||||
{
|
||||
esp_err_t ret = ESP_OK;
|
||||
|
||||
if (samplerate_factor < 1) {
|
||||
return ESP_ERR_DSP_INVALID_PARAM;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
resampler->delay[i] = 0;
|
||||
}
|
||||
resampler->delay_pos = 0;
|
||||
|
||||
resampler->step = 1.0f / samplerate_factor;
|
||||
resampler->phase = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t dsps_resampler_ph_exec(dsps_resample_ph_t *resampler, float *input, float *output, int32_t length)
|
||||
{
|
||||
int32_t result = 0;
|
||||
int input_pos = 0;
|
||||
float in_x[4];
|
||||
|
||||
int in_pos = 0;
|
||||
for (int pos = resampler->delay_pos; pos < 4; pos++) {
|
||||
in_x[in_pos++] = resampler->delay[pos];
|
||||
}
|
||||
for (int pos = 0; pos < resampler->delay_pos; pos++) {
|
||||
in_x[in_pos++] = resampler->delay[pos];
|
||||
}
|
||||
|
||||
while (input_pos < length) {
|
||||
|
||||
float c0 = in_x[1];
|
||||
float c1 = 0.5 * (in_x[2] - in_x[0]);
|
||||
float c2 = in_x[0] - 2.5 * in_x[1] + 2 * in_x[2] - 0.5 * in_x[3];
|
||||
float c3 = 0.5 * (in_x[3] - in_x[0]) + 1.5 * (in_x[1] - in_x[2]);
|
||||
|
||||
output[result] = c0 + resampler->phase * (c1 + resampler->phase * (c2 + resampler->phase * c3));
|
||||
result++;
|
||||
resampler->phase += resampler->step;
|
||||
|
||||
while (resampler->phase >= 1) {
|
||||
resampler->phase -= 1;
|
||||
resampler->delay[resampler->delay_pos] = input[input_pos];
|
||||
resampler->delay_pos++;
|
||||
if (resampler->delay_pos >= 4) {
|
||||
resampler->delay_pos = 0;
|
||||
}
|
||||
input_pos++;
|
||||
in_pos = 0;
|
||||
for (int pos = resampler->delay_pos; pos < 4; pos++) {
|
||||
in_x[in_pos++] = resampler->delay[pos];
|
||||
}
|
||||
for (int pos = 0; pos < resampler->delay_pos; pos++) {
|
||||
in_x[in_pos++] = resampler->delay[pos];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_fir_f32_aexx";
|
||||
|
||||
__attribute__((aligned(16)))
|
||||
static float x[1024];
|
||||
__attribute__((aligned(16)))
|
||||
static float y[1024];
|
||||
__attribute__((aligned(16)))
|
||||
static float y_compare[1024];
|
||||
|
||||
__attribute__((aligned(16)))
|
||||
static float coeffs[32];
|
||||
__attribute__((aligned(16)))
|
||||
static float delay[32 + 4];
|
||||
__attribute__((aligned(16)))
|
||||
static float delay_compare[32];
|
||||
|
||||
TEST_CASE("dsps_fir_f32_aexx functionality", "[dsps]")
|
||||
{
|
||||
// In the test we generate filter with cutt off frequency 0.1
|
||||
// and then filtering 0.1 and 0.3 frequencis.
|
||||
// Result must be better then 24 dB
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
|
||||
fir_f32_t fir1;
|
||||
fir_f32_t fir2;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = (fir_len - i - 1);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
esp_err_t err = dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
TEST_ESP_OK(err);
|
||||
err = dsps_fir_f32(&fir1, x, y, len);
|
||||
TEST_ESP_OK(err);
|
||||
|
||||
for (int i = 0 ; i < fir_len * 3 ; i++) {
|
||||
ESP_LOGD(TAG, "fir[%i] = %f", i, y[i]);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
if (y[i] != i) {
|
||||
TEST_ASSERT_EQUAL(y[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
x[0] = 1;
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
dsps_fir_init_f32(&fir2, coeffs, delay_compare, fir_len);
|
||||
|
||||
dsps_fir_f32(&fir1, x, y, len);
|
||||
dsps_fir_f32_ansi(&fir2, x, y_compare, len);
|
||||
dsps_fir_f32(&fir1, x, y, len);
|
||||
dsps_fir_f32_ansi(&fir2, x, y_compare, len);
|
||||
dsps_fir_f32(&fir1, x, y, len);
|
||||
dsps_fir_f32_ansi(&fir2, x, y_compare, len);
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
if (y[i] != y_compare[i]) {
|
||||
TEST_ASSERT_EQUAL(y[i], y_compare[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_fir_f32_aexx benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int repeat_count = 1;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fir_f32(&fir1, x, y, len);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_fir_f32_aexx - %f per sample for for %i coefficients, %f per tap \n", cycles, fir_len, cycles / (float)fir_len);
|
||||
|
||||
float min_exec = 3;
|
||||
float max_exec = 800;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_fir_f32_ansi";
|
||||
|
||||
static float x[1024];
|
||||
static float y[1024];
|
||||
|
||||
static float coeffs[32];
|
||||
static float delay[32 + 4];
|
||||
|
||||
TEST_CASE("dsps_fir_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
// In the test we generate filter with cutt off frequency 0.1
|
||||
// and then filtering 0.1 and 0.3 frequencis.
|
||||
// Result must be better then 24 dB
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = (fir_len - i - 1);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
dsps_fir_f32_ansi(&fir1, x, y, len);
|
||||
|
||||
for (int i = 0 ; i < fir_len * 3 ; i++) {
|
||||
ESP_LOGD(TAG, "fir[%i] = %f", i, y[i]);
|
||||
}
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
if (y[i] != i) {
|
||||
TEST_ASSERT_EQUAL(y[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
// Check even length
|
||||
#ifndef CONFIG_IDF_TARGET_ESP32S3
|
||||
fir_len--;
|
||||
#endif
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = (fir_len - i - 1);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
dsps_fir_f32_ansi(&fir1, x, y, len);
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
if (y[i] != i) {
|
||||
TEST_ASSERT_EQUAL(y[i], i);
|
||||
}
|
||||
}
|
||||
for (int i = fir_len ; i < len ; i++) {
|
||||
if (y[i] != 0) {
|
||||
TEST_ASSERT_EQUAL(y[i], 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_fir_f32_ansi benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int repeat_count = 1;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fir_f32_ansi(&fir1, x, y, len);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_fir_f32_ansi - %f per sample for for %i coefficients, %f per tap \n", cycles, fir_len, cycles / (float)fir_len);
|
||||
|
||||
float min_exec = 10;
|
||||
float max_exec = 800;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_fird_f32";
|
||||
|
||||
static float x[1024];
|
||||
static float y[1024];
|
||||
static float y_compare[1024];
|
||||
|
||||
static float coeffs[32];
|
||||
static float delay[32];
|
||||
static float delay_compare[32];
|
||||
|
||||
TEST_CASE("dsps_fird_f32_aexx functionality", "[dsps]")
|
||||
{
|
||||
// In the test we generate filter with cutt off frequency 0.1
|
||||
// and then filtering 0.1 and 0.3 frequencis.
|
||||
// Result must be better then 24 dB
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int decim = 4;
|
||||
|
||||
fir_f32_t fir1;
|
||||
fir_f32_t fir2;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
coeffs[0] = 1;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
|
||||
dsps_fird_init_f32(&fir1, coeffs, delay, fir_len, decim);
|
||||
dsps_fird_init_f32(&fir2, coeffs, delay_compare, fir_len, decim);
|
||||
int total1 = dsps_fird_f32(&fir1, x, y, len / decim);
|
||||
int total2 = dsps_fird_f32_ansi(&fir2, x, y_compare, len / decim);
|
||||
total1 += dsps_fird_f32(&fir1, x, y, len / decim);
|
||||
total2 += dsps_fird_f32_ansi(&fir2, x, y_compare, len / decim);
|
||||
total1 += dsps_fird_f32(&fir1, x, y, len / decim);
|
||||
total2 += dsps_fird_f32_ansi(&fir2, x, y_compare, len / decim);
|
||||
ESP_LOGI(TAG, "Total result = %i, expected %i from %i", total1, total2, len);
|
||||
TEST_ASSERT_EQUAL(total1, total2);
|
||||
for (int i = 0 ; i < total1 ; i++) {
|
||||
ESP_LOGD(TAG, "data[%i] = %f expected %f\n", i, y[i], y_compare[i]);
|
||||
}
|
||||
for (int i = 0 ; i < total1 ; i++) {
|
||||
if (y[i] != y_compare[i]) {
|
||||
TEST_ASSERT_EQUAL(y[i], y_compare[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_f32_aexx benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int repeat_count = 1;
|
||||
int decim = 4;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fird_init_f32(&fir1, coeffs, delay, fir_len, decim);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fird_f32(&fir1, x, y, len / decim);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_fir_f32_ae32 - %f per sample for for %i coefficients, %f per decim tap\n", cycles, fir_len, cycles / (float)fir_len * decim);
|
||||
ESP_LOGI(TAG, "Total cycles = %i", end_b - start_b);
|
||||
float min_exec = 3;
|
||||
float max_exec = 300;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_fird_f32_ansi";
|
||||
|
||||
static float x[1024];
|
||||
static float y[1024];
|
||||
|
||||
static float coeffs[32];
|
||||
static float delay[32];
|
||||
|
||||
TEST_CASE("dsps_fird_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
// In the test we generate filter with cutt off frequency 0.1
|
||||
// and then filtering 0.1 and 0.3 frequencis.
|
||||
// Result must be better then 24 dB
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int decim = 4;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = 0;
|
||||
}
|
||||
coeffs[0] = 1;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = (i) % decim;
|
||||
}
|
||||
|
||||
dsps_fird_init_f32(&fir1, coeffs, delay, fir_len, decim);
|
||||
int total = dsps_fird_f32_ansi(&fir1, x, y, len / decim);
|
||||
ESP_LOGI(TAG, "Total result = %i from %i", total, len);
|
||||
TEST_ASSERT_EQUAL(total, len / decim);
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
ESP_LOGD(TAG, "data[%i] = %f\n", i, y[i]);
|
||||
}
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
TEST_ASSERT_EQUAL(y[i], 0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_f32_ansi benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int repeat_count = 1;
|
||||
int decim = 4;
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_fird_init_f32(&fir1, coeffs, delay, fir_len, decim);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fird_f32_ansi(&fir1, x, y, len / decim);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_fir_f32_ansi - %f per sample for for %i coefficients, %f per decim tap \n", cycles, fir_len, cycles / (float)fir_len * decim);
|
||||
float min_exec = 10;
|
||||
float max_exec = 300;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,371 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
#include <stdbool.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_dsp.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_wind.h"
|
||||
#include "dsps_view.h"
|
||||
#include "dsps_fft2r.h"
|
||||
|
||||
#define COEFFS 256
|
||||
#define N_IN_SAMPLES 4096
|
||||
#define DECIMATION 2
|
||||
#define Q15_MAX INT16_MAX
|
||||
#define LEAKAGE_BINS 10
|
||||
#define FIR_BUFF_LEN 16
|
||||
|
||||
#define MAX_FIR_LEN 64
|
||||
|
||||
static const char *TAG = "dsps_fird_s16_aexx";
|
||||
|
||||
const static int32_t len = N_IN_SAMPLES;
|
||||
const static int32_t N_FFT = (N_IN_SAMPLES / DECIMATION);
|
||||
const static int16_t decim = DECIMATION;
|
||||
const static int16_t fir_len = COEFFS;
|
||||
const static int32_t fir_buffer = (N_IN_SAMPLES + FIR_BUFF_LEN);
|
||||
|
||||
|
||||
// error messages for the init functions
|
||||
static void error_msg_handler(fir_s16_t *fir, esp_err_t status)
|
||||
{
|
||||
|
||||
if (status != ESP_OK) {
|
||||
dsps_fird_s16_aexx_free(fir);
|
||||
|
||||
switch (status) {
|
||||
case ESP_ERR_DSP_INVALID_LENGTH:
|
||||
TEST_ASSERT_MESSAGE(false, "Number of the coefficients must be higher than 1");
|
||||
break;
|
||||
case ESP_ERR_DSP_ARRAY_NOT_ALIGNED:
|
||||
TEST_ASSERT_MESSAGE(false, "Delay line or (and) coefficients arrays not aligned");
|
||||
break;
|
||||
case ESP_ERR_DSP_PARAM_OUTOFRANGE:
|
||||
TEST_ASSERT_MESSAGE(false, "Start position or (and) Decimation ratio or (and) Shift out of range");
|
||||
break;
|
||||
default:
|
||||
TEST_ASSERT_MESSAGE(false, "Unspecified error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_s16_aexx functionality", "[dsps]")
|
||||
{
|
||||
|
||||
const int32_t max_len[2] = {2048, 2520}; // 2520 can be divided by 3, 6, 9, 12, 15, 18 and 21
|
||||
const int16_t max_dec[2] = {32, 21};
|
||||
const int16_t min_dec[2] = {2, 3};
|
||||
const int16_t shift_vals[17] = {-15, 0, 15};
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, max_len[1] * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, max_len[1] * sizeof(int16_t));
|
||||
int16_t *y_compare = (int16_t *)memalign(16, max_len[1] * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
int16_t *coeffs_aexx = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
int16_t *coeffs_ansi = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
|
||||
int16_t *delay = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
int16_t *delay_compare = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
|
||||
int32_t combinations = 0;
|
||||
esp_err_t status1 = ESP_OK, status2 = ESP_OK;
|
||||
fir_s16_t fir1, fir2;
|
||||
|
||||
for (int i = 0 ; i < MAX_FIR_LEN ; i++) {
|
||||
coeffs[i] = i + 0x100;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < max_len[1] ; i++) {
|
||||
x[i] = 0x10;
|
||||
}
|
||||
|
||||
for (int variations = 0; variations < 2; variations++) {
|
||||
|
||||
ESP_LOGI(TAG, ": %"PRId32" input samples, coefficients range from 2 to %"PRId16", decimation range from %"PRId16" to %"PRId16", shift in range from -40 to 40 and start positions within the coeffs range",
|
||||
max_len[variations], (int16_t)MAX_FIR_LEN, min_dec[variations], max_dec[variations]);
|
||||
|
||||
// decimation increment is set as dec * 2 for input data length 2048 (2, 4, 8, 16, 32)
|
||||
// as dec + 3 for input data length 2520 (3, 6, 9, 12, 15, 18, 21)
|
||||
for (int16_t dec = min_dec[variations]; dec <= max_dec[variations]; ((variations) ? (dec += 3) : (dec *= 2)) ) {
|
||||
|
||||
const int32_t loop_len = max_len[variations] / dec;
|
||||
const int16_t start_position = 0;
|
||||
|
||||
for (int16_t fir_length = 2; fir_length <= MAX_FIR_LEN; fir_length += 16) {
|
||||
|
||||
for (int16_t shift_amount = 0; shift_amount < sizeof(shift_vals) / sizeof(uint16_t); shift_amount++) {
|
||||
|
||||
for (int k = 0 ; k < fir_length; k++) {
|
||||
coeffs_ansi[k] = coeffs[k];
|
||||
coeffs_aexx[k] = coeffs[k];
|
||||
}
|
||||
|
||||
status1 = dsps_fird_init_s16(&fir1, coeffs_aexx, delay, fir_length, dec, start_position, shift_vals[shift_amount]);
|
||||
error_msg_handler(&fir1, status1);
|
||||
status2 = dsps_fird_init_s16(&fir2, coeffs_ansi, delay_compare, fir_length, dec, start_position, shift_vals[shift_amount]);
|
||||
error_msg_handler(&fir2, status2);
|
||||
|
||||
#if(dsps_fird_s16_aes3_enabled)
|
||||
dsps_16_array_rev(fir1.coeffs, fir1.coeffs_len); // coefficients are being reverted for the purposes of the aes3 TIE implementation
|
||||
#endif
|
||||
|
||||
for (int16_t start_pos = 0; start_pos < dec; start_pos++) {
|
||||
|
||||
fir1.d_pos = start_pos;
|
||||
fir2.d_pos = start_pos;
|
||||
|
||||
for (int j = 0; j < fir1.coeffs_len; j++) {
|
||||
fir1.delay[j] = 0;
|
||||
fir2.delay[j] = 0;
|
||||
}
|
||||
|
||||
fir1.pos = 0;
|
||||
fir2.pos = 0;
|
||||
|
||||
const int32_t total1 = dsps_fird_s16(&fir1, x, y, loop_len);
|
||||
const int32_t total2 = dsps_fird_s16_ansi(&fir2, x, y_compare, loop_len);
|
||||
|
||||
TEST_ASSERT_EQUAL(total1, total2);
|
||||
for (int i = 0 ; i < total1 ; i++) {
|
||||
TEST_ASSERT_EQUAL(y[i], y_compare[i]);
|
||||
}
|
||||
|
||||
combinations++;
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
dsps_fird_s16_aexx_free(&fir2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ESP_LOGI(TAG, ": %"PRId32" total filter combinations\n", combinations);
|
||||
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
free(y_compare);
|
||||
free(coeffs_ansi);
|
||||
free(coeffs_aexx);
|
||||
free(delay_compare);
|
||||
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_fird_s16_aexx benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
const int16_t local_dec = 2;
|
||||
const int32_t local_len = (len % 16) ? (4096) : (len); // length must be divisible by 16
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, local_len * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, local_len * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
int16_t *delay = (int16_t *)memalign(16, MAX_FIR_LEN * sizeof(int16_t));
|
||||
|
||||
const int repeat_count = 100;
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
int32_t loop_len = 0;
|
||||
|
||||
fir_s16_t fir;
|
||||
esp_err_t status = ESP_OK;
|
||||
|
||||
status = dsps_fird_init_s16(&fir, coeffs, delay, MAX_FIR_LEN, local_dec, start_pos, shift);
|
||||
error_msg_handler(&fir, status);
|
||||
|
||||
#if(dsps_fird_s16_aes3_enabled)
|
||||
dsps_16_array_rev(fir.coeffs, fir.coeffs_len);
|
||||
#endif
|
||||
|
||||
// Test for decimations 2, 4, 8, 16
|
||||
for (int dec = local_dec; dec <= 16 ; dec *= 2) {
|
||||
|
||||
loop_len = (local_len / dec);
|
||||
fir.decim = dec;
|
||||
|
||||
const unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int j = 0 ; j < repeat_count ; j++) {
|
||||
dsps_fird_s16(&fir, x, y, loop_len);
|
||||
}
|
||||
const unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
const float total_b = end_b - start_b;
|
||||
const float cycles = total_b / (float)(repeat_count);
|
||||
const float cycles_per_sample = total_b / (float)(local_len * repeat_count);
|
||||
const float cycles_per_decim_tap = cycles_per_sample / (float)(fir.coeffs_len * fir.decim);
|
||||
|
||||
ESP_LOGI(TAG, ": %.2f total cycles, %.2f cycles per sample, %.2f per decim tap, for %"PRId32" input samples, %"PRId16" coefficients and decimation %"PRId16"\n",
|
||||
cycles, cycles_per_sample, cycles_per_decim_tap, local_len, (int16_t)MAX_FIR_LEN, fir.decim);
|
||||
|
||||
const float min_exec = (((local_len / fir.decim) * fir.coeffs_len) / 2);
|
||||
const float max_exec = min_exec * 20;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
|
||||
TEST_CASE("dsps_fird_s16_aexx noise_snr", "[dsps]")
|
||||
{
|
||||
|
||||
// In the SNR-noise test we are generating a sine wave signal, filtering the signal using a fixed point FIRD filter
|
||||
// and do the FFT of the filtered signal. Afterward, a noise and SNR calculated from the FFT spectrum
|
||||
|
||||
// FIR Coeffs
|
||||
int16_t *s_coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point coefficients
|
||||
int16_t *delay_line = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point delay line
|
||||
float *f_coeffs = (float *)memalign(16, fir_len * sizeof(float)); // floating point coefficients
|
||||
|
||||
// Coefficients windowing
|
||||
dsps_wind_hann_f32(f_coeffs, fir_len);
|
||||
const float fir_order = (float)fir_len - 1;
|
||||
const float ft = 0.25; // Transition frequency
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
f_coeffs[i] *= sinf((2 * M_PI * ft * (i - fir_order / 2))) / (M_PI * (i - fir_order / 2));
|
||||
}
|
||||
|
||||
// FIR coefficients conversion to q15
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
s_coeffs[i] = f_coeffs[i] * Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_coeffs);
|
||||
|
||||
// Signal generation
|
||||
const float amplitude = 0.9;
|
||||
const float frequency = 0.05;
|
||||
const float phase = 0;
|
||||
float *f_in_signal = (float *)memalign(16, fir_buffer * sizeof(float));
|
||||
dsps_tone_gen_f32(f_in_signal, fir_buffer, amplitude, frequency, phase);
|
||||
|
||||
// Input signal conversion to q15
|
||||
int16_t *fir_x = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
int16_t *fir_y = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
int16_t *fir_y2 = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
for (int i = 0; i < fir_buffer; i++) {
|
||||
fir_x[i] = f_in_signal[i] * (int16_t)Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_in_signal);
|
||||
|
||||
// FIR
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
const int32_t loop_len = (int32_t)(fir_buffer / decim); // loop_len result must be without remainder
|
||||
fir_s16_t fir;
|
||||
esp_err_t status = dsps_fird_init_s16(&fir, s_coeffs, delay_line, fir_len, decim, start_pos, shift);
|
||||
fir_s16_t fir2;
|
||||
esp_err_t status2 = dsps_fird_init_s16(&fir2, s_coeffs, delay_line, fir_len, decim, start_pos, shift);
|
||||
error_msg_handler(&fir, status);
|
||||
error_msg_handler(&fir2, status2);
|
||||
|
||||
#if(dsps_fird_s16_aes3_enabled || dsps_fird_s16_arv4_enabled)
|
||||
dsps_16_array_rev(fir.coeffs, fir.coeffs_len);
|
||||
#endif
|
||||
|
||||
dsps_fird_s16(&fir, fir_x, fir_y, loop_len);
|
||||
dsps_fird_s16_ansi(&fir2, fir_x, fir_y2, loop_len);
|
||||
for (int i = 0 ; i < loop_len ; i++) {
|
||||
ESP_LOGD(TAG, "Data[%i] = %i vs %i, diff = %i", i, fir_y[i], fir_y2[i], fir_y[i] - fir_y2[i]);
|
||||
}
|
||||
|
||||
free(delay_line);
|
||||
free(s_coeffs);
|
||||
free(fir_x);
|
||||
|
||||
// FIR Output conversion to float
|
||||
const unsigned int ignored_fir_samples = (FIR_BUFF_LEN / 2) - 1;
|
||||
float *fir_output = (float *)memalign(16, len * sizeof(float));
|
||||
for (int i = 0; i < N_FFT; i++) {
|
||||
fir_output[i] = (float)(fir_y[ignored_fir_samples + i] / (float)Q15_MAX);
|
||||
}
|
||||
|
||||
free(fir_y);
|
||||
|
||||
// Signal windowing
|
||||
float *window = (float *)memalign(16, N_FFT * sizeof(float));
|
||||
dsps_wind_blackman_f32(window, N_FFT);
|
||||
|
||||
// Prepare FFT input, real and imaginary part
|
||||
const int32_t fft_data_len = (N_IN_SAMPLES / DECIMATION) * 2;
|
||||
float *fft_data = (float *)memalign(16, fft_data_len * sizeof(float));
|
||||
for (int i = 0 ; i < N_FFT ; i++) {
|
||||
fft_data[i * 2 + 0] = fir_output[i] * window[i];
|
||||
fft_data[i * 2 + 1] = 0;
|
||||
}
|
||||
free(fir_output);
|
||||
free(window);
|
||||
|
||||
// Initialize FFT
|
||||
esp_err_t ret = dsps_fft2r_init_fc32(NULL, N_FFT * 2);
|
||||
TEST_ESP_OK(ret);
|
||||
|
||||
// Do the FFT
|
||||
dsps_fft2r_fc32(fft_data, N_FFT);
|
||||
dsps_bit_rev_fc32(fft_data, N_FFT);
|
||||
dsps_cplx2reC_fc32(fft_data, N_FFT);
|
||||
|
||||
// Convert the FFT spectrum from amplitude to watts, find the max value and its position
|
||||
float max_val = -1000000;
|
||||
int max_pos = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
fft_data[i] = (fft_data[i * 2 + 0] * fft_data[i * 2 + 0] + fft_data[i * 2 + 1] * fft_data[i * 2 + 1]) / (N_FFT * 3);
|
||||
if (fft_data[i] > max_val) {
|
||||
max_val = fft_data[i];
|
||||
max_pos = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the power of the signal and noise of the spectrum and convert the spectrum to dB
|
||||
float signal_pow = 0, noise_pow = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
if ((i >= max_pos - LEAKAGE_BINS) && (i <= max_pos + LEAKAGE_BINS)) {
|
||||
signal_pow += fft_data[i];
|
||||
} else {
|
||||
noise_pow += fft_data[i];
|
||||
}
|
||||
|
||||
fft_data[i] = 10 * log10f(0.0000000000001 + fft_data[i]);
|
||||
}
|
||||
|
||||
// Convert the signal power and noise power from watts to dB and calculate SNR
|
||||
const float snr = 10 * log10f(signal_pow / noise_pow);
|
||||
noise_pow = 10 * log10f(noise_pow);
|
||||
signal_pow = 10 * log10f(signal_pow);
|
||||
|
||||
ESP_LOGI(TAG, "\nSignal Power: \t%f\nNoise Power: \t%f\nSNR: \t\t%f", signal_pow, noise_pow, snr);
|
||||
dsps_view(fft_data, N_FFT / 2, 64, 16, -140, 40, '|');
|
||||
free(fft_data);
|
||||
|
||||
const float min_exec_snr = 50;
|
||||
const float max_exec_snr = 120;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec_snr, max_exec_snr, snr);
|
||||
dsps_fird_s16_aexx_free(&fir);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_dsp.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_wind.h"
|
||||
#include "dsps_view.h"
|
||||
#include "dsps_fft2r.h"
|
||||
|
||||
#define COEFFS 64
|
||||
#define N_IN_SAMPLES 1024
|
||||
#define DECIMATION 2
|
||||
#define Q15_MAX INT16_MAX
|
||||
#define LEAKAGE_BINS 10
|
||||
#define FIR_BUFF_LEN 16
|
||||
|
||||
static const char *TAG = "dsps_fird_s16_ansi";
|
||||
|
||||
const static int32_t len = N_IN_SAMPLES;
|
||||
const static int32_t N_FFT = (N_IN_SAMPLES / DECIMATION);
|
||||
const static int16_t decim = DECIMATION;
|
||||
const static int16_t fir_len = COEFFS;
|
||||
const static int32_t fir_buffer = (N_IN_SAMPLES + FIR_BUFF_LEN);
|
||||
|
||||
|
||||
// error messages for the init functions
|
||||
static void error_msg_handler(fir_s16_t *fir, esp_err_t status)
|
||||
{
|
||||
|
||||
if (status != ESP_OK) {
|
||||
dsps_fird_s16_aexx_free(fir);
|
||||
|
||||
switch (status) {
|
||||
case ESP_ERR_DSP_INVALID_LENGTH:
|
||||
TEST_ASSERT_MESSAGE(false, "Number of the coefficients must be higher than 1");
|
||||
break;
|
||||
case ESP_ERR_DSP_ARRAY_NOT_ALIGNED:
|
||||
TEST_ASSERT_MESSAGE(false, "Delay line or (and) coefficients arrays not aligned");
|
||||
break;
|
||||
case ESP_ERR_DSP_PARAM_OUTOFRANGE:
|
||||
TEST_ASSERT_MESSAGE(false, "Start position or (and) Decimation ratio or (and) Shift out of range");
|
||||
break;
|
||||
default:
|
||||
TEST_ASSERT_MESSAGE(false, "Unspecified error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_s16_ansi functionality", "[dsps]")
|
||||
{
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
int16_t *delay = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
const int16_t dec = decim;
|
||||
const int32_t output_len = (int32_t)(len / dec);
|
||||
|
||||
fir_s16_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = 0;
|
||||
}
|
||||
coeffs[0] = 0x4000;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0x4000;
|
||||
}
|
||||
|
||||
esp_err_t status = dsps_fird_init_s16(&fir1, coeffs, delay, fir_len, dec, start_pos, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
|
||||
const int32_t total = dsps_fird_s16_ansi(&fir1, x, y, output_len);
|
||||
|
||||
ESP_LOGI(TAG, "%"PRId32" input samples, decimation %"PRId16",total result = %"PRId32"\n", len, dec, total);
|
||||
TEST_ASSERT_EQUAL(total, len / decim);
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
ESP_LOGD(TAG, "data[%i] = %d\n", i, y[i]);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
TEST_ASSERT_EQUAL(y[i], (0x2000));
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_s16_ansi benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
int16_t *delay = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
|
||||
const int repeat_count = 4;
|
||||
const int16_t dec = 1;
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
int32_t output_len = 0;
|
||||
|
||||
fir_s16_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
esp_err_t status = dsps_fird_init_s16(&fir1, coeffs, delay, fir_len, dec, start_pos, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
|
||||
// Decimations 1, 2, 4, 8
|
||||
for (int i = 0 ; i < 4 ; i++) {
|
||||
|
||||
output_len = (int32_t)(len / fir1.decim);
|
||||
const unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_fird_s16_ansi(&fir1, x, y, output_len);
|
||||
}
|
||||
const unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
const float total_b = end_b - start_b;
|
||||
const float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "total cycles %f per sample for %"PRId16" coefficients, decimation %"PRId16", %f per decim tap \n",
|
||||
cycles, fir_len, fir1.decim, cycles / (float)fir_len * fir1.decim);
|
||||
float min_exec = 10;
|
||||
float max_exec = 1500;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
fir1.decim *= 2;
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_fird_s16_ansi noise_snr", "[dsps]")
|
||||
{
|
||||
|
||||
// In the SNR-noise test we are generating a sine wave signal, filtering the signal using a fixed point FIRD filter
|
||||
// and do the FFT of the filtered signal. Afterward, a noise and SNR calculated from the FFT spectrum
|
||||
|
||||
// FIR Coeffs
|
||||
int16_t *s_coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point coefficients
|
||||
int16_t *delay_line = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point delay line
|
||||
float *f_coeffs = (float *)memalign(16, fir_len * sizeof(float)); // floating point coefficients
|
||||
|
||||
// Coefficients windowing
|
||||
dsps_wind_hann_f32(f_coeffs, fir_len);
|
||||
const float fir_order = (float)fir_len - 1;
|
||||
const float ft = 0.25; // sine frequency
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
f_coeffs[i] *= sinf((2 * M_PI * ft * (i - fir_order / 2))) / (M_PI * (i - fir_order / 2));
|
||||
}
|
||||
|
||||
// FIR coefficients conversion to q15
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
s_coeffs[i] = f_coeffs[i] * Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_coeffs);
|
||||
|
||||
// Signal generation
|
||||
const float amplitude = 0.9;
|
||||
const float frequency = 0.05;
|
||||
const float phase = 0;
|
||||
float *f_in_signal = (float *)memalign(16, fir_buffer * sizeof(float));
|
||||
dsps_tone_gen_f32(f_in_signal, fir_buffer, amplitude, frequency, phase);
|
||||
|
||||
// Input signal conversion to q15
|
||||
int16_t *fir_x = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
int16_t *fir_y = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
for (int i = 0; i < fir_buffer; i++) {
|
||||
fir_x[i] = f_in_signal[i] * (int16_t)Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_in_signal);
|
||||
|
||||
// FIR
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
const int32_t output_len = (int32_t)(fir_buffer / decim);
|
||||
fir_s16_t fir1;
|
||||
esp_err_t status = dsps_fird_init_s16(&fir1, s_coeffs, delay_line, fir_len, decim, start_pos, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
dsps_fird_s16_ansi(&fir1, fir_x, fir_y, output_len);
|
||||
|
||||
free(delay_line);
|
||||
free(s_coeffs);
|
||||
free(fir_x);
|
||||
|
||||
// FIR Output conversion to float
|
||||
const unsigned int ignored_fir_samples = (FIR_BUFF_LEN / 2) - 1;
|
||||
float *fir_output = (float *)memalign(16, len * sizeof(float));
|
||||
for (int i = 0; i < N_FFT; i++) {
|
||||
fir_output[i] = (float)(fir_y[ignored_fir_samples + i] / (float)Q15_MAX);
|
||||
}
|
||||
|
||||
free(fir_y);
|
||||
|
||||
// Signal windowing
|
||||
float *window = (float *)memalign(16, N_FFT * sizeof(float));
|
||||
dsps_wind_blackman_f32(window, N_FFT);
|
||||
|
||||
// Prepare FFT input, real and imaginary part
|
||||
const int32_t fft_data_len = (N_IN_SAMPLES / DECIMATION) * 2;
|
||||
float *fft_data = (float *)memalign(16, fft_data_len * sizeof(float));
|
||||
for (int i = 0 ; i < N_FFT ; i++) {
|
||||
fft_data[i * 2 + 0] = fir_output[i] * window[i];
|
||||
fft_data[i * 2 + 1] = 0;
|
||||
}
|
||||
free(fir_output);
|
||||
free(window);
|
||||
|
||||
// Initialize FFT
|
||||
esp_err_t ret = dsps_fft2r_init_fc32(NULL, N_FFT * 2);
|
||||
TEST_ESP_OK(ret);
|
||||
|
||||
// Do the FFT
|
||||
dsps_fft2r_fc32(fft_data, N_FFT);
|
||||
dsps_bit_rev_fc32(fft_data, N_FFT);
|
||||
dsps_cplx2reC_fc32(fft_data, N_FFT);
|
||||
|
||||
// Convert the FFT spectrum from amplitude to watts, find the max value and its position
|
||||
float max_val = -1000000;
|
||||
int max_pos = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
fft_data[i] = (fft_data[i * 2 + 0] * fft_data[i * 2 + 0] + fft_data[i * 2 + 1] * fft_data[i * 2 + 1]) / (N_FFT * 3);
|
||||
if (fft_data[i] > max_val) {
|
||||
max_val = fft_data[i];
|
||||
max_pos = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the power of the signal and noise of the spectrum and convert the spectrum to dB
|
||||
float signal_pow = 0, noise_pow = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
if ((i >= max_pos - LEAKAGE_BINS) && (i <= max_pos + LEAKAGE_BINS)) {
|
||||
signal_pow += fft_data[i];
|
||||
} else {
|
||||
noise_pow += fft_data[i];
|
||||
}
|
||||
|
||||
fft_data[i] = 10 * log10f(0.0000000000001 + fft_data[i]);
|
||||
}
|
||||
|
||||
// Convert the signal power and noise power to dB, calculate SNR
|
||||
const float snr = 10 * log10f(signal_pow / noise_pow);
|
||||
noise_pow = 10 * log10f(noise_pow);
|
||||
signal_pow = 10 * log10f(signal_pow);
|
||||
|
||||
ESP_LOGI(TAG, "\nSignal Power: \t%f\nNoise Power: \t%f\nSNR: \t\t%f", signal_pow, noise_pow, snr);
|
||||
dsps_view(fft_data, N_FFT / 2, 64, 16, -140, 40, '|');
|
||||
free(fft_data);
|
||||
|
||||
const float min_exec_snr = 50;
|
||||
const float max_exec_snr = 120;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec_snr, max_exec_snr, snr);
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unity.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_firmr_f32_ansi";
|
||||
|
||||
static float *x;
|
||||
static float *y;
|
||||
|
||||
static float coeffs[120];
|
||||
static float delay[32];
|
||||
|
||||
TEST_CASE("dsps_firmr_f32_ansi functionality", "[dsps]")
|
||||
{
|
||||
int len = 1024;
|
||||
int decim = 4;
|
||||
int interp = 5;
|
||||
int fir_len = interp * 12;
|
||||
|
||||
x = (float *)malloc(len * sizeof(float));
|
||||
y = (float *)malloc(interp * len / decim * sizeof(float));
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = 0;
|
||||
}
|
||||
coeffs[0] = 1;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
|
||||
dsps_firmr_init_f32(&fir1, coeffs, delay, fir_len, interp, decim, 0);
|
||||
int total = dsps_firmr_f32_ansi(&fir1, x, y, len);
|
||||
ESP_LOGI(TAG, "Total result = %i from %i", total, len);
|
||||
TEST_ASSERT_EQUAL(total, len * interp / decim);
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
ESP_LOGD(TAG, "data[%i] = %f", i, y[i]);
|
||||
}
|
||||
int decim_count = 0;
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
if (i % interp == 0) {
|
||||
TEST_ASSERT_EQUAL(y[i], decim_count * decim);
|
||||
decim_count++;
|
||||
} else {
|
||||
TEST_ASSERT_EQUAL(y[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_firmr_f32_ansi benchmark", "[dsps]")
|
||||
{
|
||||
int len = 1024;
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
int decim = 4;
|
||||
int interp = 5;
|
||||
int repeat_count = 100;
|
||||
|
||||
x = (float *)malloc(len * sizeof(float));
|
||||
y = (float *)malloc(interp * len / decim * sizeof(float));
|
||||
|
||||
|
||||
fir_f32_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
dsps_firmr_init_f32(&fir1, coeffs, delay, fir_len, interp, decim, 0);
|
||||
|
||||
unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_firmr_f32_ansi(&fir1, x, y, len);
|
||||
}
|
||||
unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
float total_b = end_b - start_b;
|
||||
float cycles = total_b / (len * repeat_count);
|
||||
|
||||
ESP_LOGI(TAG, "dsps_firmr_f32_ansi - %f per sample for for %i coefficients, %f per decim tap \n", cycles, fir_len, cycles / (float)fir_len * decim / interp);
|
||||
float min_exec = 10;
|
||||
float max_exec = 300;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_dsp.h"
|
||||
|
||||
#include "dsps_tone_gen.h"
|
||||
#include "dsps_d_gen.h"
|
||||
#include "dsps_fir.h"
|
||||
#include "dsp_tests.h"
|
||||
#include "dsps_wind.h"
|
||||
#include "dsps_view.h"
|
||||
#include "dsps_fft2r.h"
|
||||
|
||||
#define INTERPOLATION 5
|
||||
#define DECIMATION 4
|
||||
#define N_IN_SAMPLES 2048
|
||||
#define COEFFS INTERPOLATION*12
|
||||
#define Q15_MAX INT16_MAX
|
||||
#define LEAKAGE_BINS 10
|
||||
#define FIR_BUFF_LEN 16
|
||||
|
||||
static const char *TAG = "dsps_firmr_s16_ansi";
|
||||
|
||||
static int32_t len = N_IN_SAMPLES;
|
||||
static int32_t N_FFT = 1024;
|
||||
static int16_t decim = DECIMATION;
|
||||
static int16_t interp = INTERPOLATION;
|
||||
static int16_t fir_len = COEFFS;
|
||||
static int32_t fir_buffer = (N_IN_SAMPLES + FIR_BUFF_LEN);
|
||||
|
||||
|
||||
// error messages for the init functions
|
||||
static void error_msg_handler(fir_s16_t *fir, esp_err_t status)
|
||||
{
|
||||
|
||||
if (status != ESP_OK) {
|
||||
dsps_fird_s16_aexx_free(fir);
|
||||
|
||||
switch (status) {
|
||||
case ESP_ERR_DSP_INVALID_LENGTH:
|
||||
TEST_ASSERT_MESSAGE(false, "Number of the coefficients must be higher than 1");
|
||||
break;
|
||||
case ESP_ERR_DSP_ARRAY_NOT_ALIGNED:
|
||||
TEST_ASSERT_MESSAGE(false, "Delay line or (and) coefficients arrays not aligned");
|
||||
break;
|
||||
case ESP_ERR_DSP_PARAM_OUTOFRANGE:
|
||||
TEST_ASSERT_MESSAGE(false, "Start position or (and) Decimation ratio or (and) Shift out of range");
|
||||
break;
|
||||
default:
|
||||
TEST_ASSERT_MESSAGE(false, "Unspecified error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_firmr_s16_ansi functionality", "[dsps]")
|
||||
{
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, INTERPOLATION * len * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
int16_t *delay = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 1;
|
||||
|
||||
fir_s16_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = 0;
|
||||
}
|
||||
coeffs[0] = 0x4000;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
|
||||
esp_err_t status = dsps_firmr_init_s16(&fir1, coeffs, delay, fir_len, interp, decim, start_pos, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
|
||||
const int32_t total = dsps_firmr_s16_ansi(&fir1, x, y, len);
|
||||
|
||||
ESP_LOGI(TAG, "%"PRId32" input samples, interpolation %"PRId16", decimation %"PRId16",total result = %"PRId32"\n", len, interp, decim, total);
|
||||
TEST_ASSERT_EQUAL(total, len * interp / decim);
|
||||
int decim_count = 0;
|
||||
for (int i = 0 ; i < total ; i++) {
|
||||
if (i % interp == 0) {
|
||||
TEST_ASSERT_EQUAL(y[i], decim_count * decim);
|
||||
decim_count++;
|
||||
} else {
|
||||
TEST_ASSERT_EQUAL(y[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_firmr_s16_ansi benchmark", "[dsps]")
|
||||
{
|
||||
|
||||
int16_t *x = (int16_t *)memalign(16, len * sizeof(int16_t));
|
||||
int16_t *y = (int16_t *)memalign(16, INTERPOLATION * len * sizeof(int16_t));
|
||||
|
||||
int16_t *coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
int16_t *delay = (int16_t *)memalign(16, fir_len * sizeof(int16_t));
|
||||
|
||||
const int repeat_count = 4;
|
||||
const int16_t shift = 0;
|
||||
|
||||
fir_s16_t fir1;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
esp_err_t status = dsps_firmr_init_s16(&fir1, coeffs, delay, fir_len, interp, decim, 0, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
|
||||
|
||||
const unsigned int start_b = dsp_get_cpu_cycle_count();
|
||||
for (int i = 0 ; i < repeat_count ; i++) {
|
||||
dsps_firmr_s16_ansi(&fir1, x, y, len);
|
||||
}
|
||||
const unsigned int end_b = dsp_get_cpu_cycle_count();
|
||||
|
||||
const float total_b = end_b - start_b;
|
||||
const float cycles = total_b / (len * repeat_count);
|
||||
ESP_LOGI(TAG, "cycles count = %f, filter length = %d, interpolation = %d, decimation = %d", cycles, (int)fir_len, (int)interp, (int)decim);
|
||||
|
||||
float min_exec = 10;
|
||||
float max_exec = 1500;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec, max_exec, cycles);
|
||||
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_firmr_s16_ansi noise_snr", "[dsps]")
|
||||
{
|
||||
|
||||
// In the SNR-noise test we are generating a sine wave signal, filtering the signal using a fixed point FIRD filter
|
||||
// and do the FFT of the filtered signal. Afterward, a noise and SNR calculated from the FFT spectrum
|
||||
|
||||
// FIR Coeffs
|
||||
int16_t *s_coeffs = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point coefficients
|
||||
int16_t *delay_line = (int16_t *)memalign(16, fir_len * sizeof(int16_t)); // fixed point delay line
|
||||
float *f_coeffs = (float *)memalign(16, fir_len * sizeof(float)); // floating point coefficients
|
||||
|
||||
// Coefficients windowing
|
||||
dsps_wind_hann_f32(f_coeffs, fir_len);
|
||||
const float fir_order = (float)fir_len - 1;
|
||||
const float ft = 0.5 / interp; // sine frequency
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
f_coeffs[i] *= sinf((2 * M_PI * ft * (i - fir_order / 2))) / (M_PI * (i - fir_order / 2));
|
||||
}
|
||||
|
||||
// FIR coefficients conversion to q15
|
||||
for (int i = 0; i < fir_len; i++) {
|
||||
s_coeffs[i] = f_coeffs[i] * Q15_MAX;
|
||||
}
|
||||
|
||||
free(f_coeffs);
|
||||
|
||||
// Signal generation
|
||||
const float amplitude = 0.1;
|
||||
const float frequency = 0.05;
|
||||
const float phase = 0;
|
||||
float *f_in_signal = (float *)memalign(16, fir_buffer * sizeof(float));
|
||||
dsps_tone_gen_f32(f_in_signal, fir_buffer, amplitude, frequency, phase);
|
||||
|
||||
// Input signal conversion to q15
|
||||
int16_t *fir_x = (int16_t *)memalign(16, fir_buffer * sizeof(int16_t));
|
||||
int16_t *fir_y = (int16_t *)memalign(16, fir_buffer * interp * sizeof(int16_t));
|
||||
for (int i = 0; i < fir_buffer; i++) {
|
||||
fir_x[i] = f_in_signal[i] * (int16_t)Q15_MAX;
|
||||
}
|
||||
free(f_in_signal);
|
||||
|
||||
// FIR
|
||||
const int16_t start_pos = 0;
|
||||
const int16_t shift = 0;
|
||||
fir_s16_t fir1;
|
||||
esp_err_t status = dsps_firmr_init_s16(&fir1, s_coeffs, delay_line, fir_len, interp, decim, start_pos, shift);
|
||||
error_msg_handler(&fir1, status);
|
||||
int result_len = dsps_firmr_s16_ansi(&fir1, fir_x, fir_y, len);
|
||||
// printf("[ ");
|
||||
// for (int i = 0; i < result_len; i++) {
|
||||
// printf("%d, ", (int)fir_y[i]);
|
||||
// }
|
||||
// printf("];\n");
|
||||
|
||||
free(delay_line);
|
||||
free(s_coeffs);
|
||||
|
||||
// FIR Output conversion to float
|
||||
const unsigned int ignored_fir_samples = (FIR_BUFF_LEN / 2) - 1;
|
||||
float *fir_output = (float *)memalign(16, result_len * sizeof(float));
|
||||
for (int i = 0; i < result_len; i++) {
|
||||
fir_output[i] = (float)(fir_y[ignored_fir_samples + i] / (float)Q15_MAX);
|
||||
}
|
||||
|
||||
free(fir_x);
|
||||
free(fir_y);
|
||||
|
||||
// Signal windowing
|
||||
float *window = (float *)memalign(16, N_FFT * sizeof(float));
|
||||
dsps_wind_blackman_f32(window, N_FFT);
|
||||
|
||||
// Prepare FFT input, real and imaginary part
|
||||
const int32_t fft_data_len = N_FFT * 2;
|
||||
float *fft_data = (float *)memalign(16, fft_data_len * sizeof(float));
|
||||
for (int i = 0 ; i < N_FFT ; i++) {
|
||||
fft_data[i * 2 + 0] = fir_output[i] * window[i];
|
||||
fft_data[i * 2 + 1] = 0;
|
||||
}
|
||||
free(fir_output);
|
||||
free(window);
|
||||
|
||||
// Initialize FFT
|
||||
esp_err_t ret = dsps_fft2r_init_fc32(NULL, fft_data_len);
|
||||
TEST_ESP_OK(ret);
|
||||
|
||||
// Do the FFT
|
||||
dsps_fft2r_fc32(fft_data, N_FFT);
|
||||
dsps_bit_rev_fc32(fft_data, N_FFT);
|
||||
dsps_cplx2reC_fc32(fft_data, N_FFT);
|
||||
|
||||
// Convert the FFT spectrum from amplitude to watts, find the max value and its position
|
||||
float max_val = -1000000;
|
||||
int max_pos = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
fft_data[i] = (fft_data[i * 2 + 0] * fft_data[i * 2 + 0] + fft_data[i * 2 + 1] * fft_data[i * 2 + 1]) / (N_FFT * 3);
|
||||
if (fft_data[i] > max_val) {
|
||||
max_val = fft_data[i];
|
||||
max_pos = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the power of the signal and noise of the spectrum and convert the spectrum to dB
|
||||
float signal_pow = 0, noise_pow = 0;
|
||||
for (int i = 0 ; i < N_FFT / 2 ; i++) {
|
||||
if ((i >= max_pos - LEAKAGE_BINS) && (i <= max_pos + LEAKAGE_BINS)) {
|
||||
signal_pow += fft_data[i];
|
||||
} else {
|
||||
noise_pow += fft_data[i];
|
||||
}
|
||||
|
||||
fft_data[i] = 10 * log10f(0.0000000000001 + fft_data[i]);
|
||||
}
|
||||
|
||||
// Convert the signal power and noise power to dB, calculate SNR
|
||||
const float snr = 10 * log10f(signal_pow / noise_pow);
|
||||
noise_pow = 10 * log10f(noise_pow);
|
||||
signal_pow = 10 * log10f(signal_pow);
|
||||
|
||||
ESP_LOGI(TAG, "\nSignal Power: \t%f\nNoise Power: \t%f\nSNR: \t\t%f", signal_pow, noise_pow, snr);
|
||||
dsps_view(fft_data, N_FFT / 2, 64, 16, -140, 40, '|');
|
||||
free(fft_data);
|
||||
|
||||
const float min_exec_snr = 50;
|
||||
const float max_exec_snr = 120;
|
||||
TEST_ASSERT_EXEC_IN_RANGE(min_exec_snr, max_exec_snr, snr);
|
||||
dsps_fird_s16_aexx_free(&fir1);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_resampler_mr_f32";
|
||||
|
||||
#define INTERPOLATION 10
|
||||
#define DECIMATION 9
|
||||
#define N_IN_SAMPLES 4096
|
||||
#define COEFFS INTERPOLATION*12
|
||||
|
||||
static int32_t len = N_IN_SAMPLES;
|
||||
static int16_t fir_len = COEFFS;
|
||||
static float samplerate_factor = (float)480 / 441;
|
||||
|
||||
TEST_CASE("dsps_resampler_mr_f32 functionality", "[dsps]")
|
||||
{
|
||||
|
||||
float *x = (float *)memalign(16, len * sizeof(float));
|
||||
float *y = (float *)memalign(16, 2 * len * sizeof(float));
|
||||
|
||||
float *coeffs = (float *)memalign(16, fir_len * sizeof(float));
|
||||
float *delay = (float *)memalign(16, fir_len * sizeof(float));
|
||||
|
||||
dsps_resample_mr_t fir_resampler;
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = 0;
|
||||
}
|
||||
coeffs[0] = 0x4000;
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
|
||||
esp_err_t status = dsps_resampler_mr_init(&fir_resampler, coeffs, fir_len, INTERPOLATION, samplerate_factor, 0, 0);
|
||||
TEST_ESP_OK(status);
|
||||
|
||||
int out_pos = 0;
|
||||
int setp = 32;
|
||||
int out_count = 0;
|
||||
float expected_count = 0;
|
||||
int32_t correct_length = 0;
|
||||
for (int i = 0 ; i < 44100 ; i++) {
|
||||
correct_length = 0;
|
||||
if (expected_count > (out_pos + 1)) {
|
||||
correct_length = 1;
|
||||
}
|
||||
if (expected_count < (out_pos - 1)) {
|
||||
correct_length = -1;
|
||||
}
|
||||
|
||||
out_count = dsps_resampler_mr_exec(&fir_resampler, &x[0], &y[0], setp, correct_length);
|
||||
expected_count = setp * (i + 1) * samplerate_factor;
|
||||
out_pos += out_count;
|
||||
}
|
||||
ESP_LOGI(TAG, "current count = %d, expected_count=%i", (int)out_pos, (int)expected_count);
|
||||
int diff_count = abs(out_pos - (int)expected_count);
|
||||
if (diff_count > 2) {
|
||||
ESP_LOGE(TAG, "Expected: %i, reached: %i", (int)expected_count, (int)out_pos);
|
||||
TEST_ASSERT_MESSAGE (false, "Length difference more than expected! ");
|
||||
}
|
||||
|
||||
dsps_resampler_mr_free(&fir_resampler);
|
||||
free(x);
|
||||
free(y);
|
||||
free(coeffs);
|
||||
free(delay);
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include "unity.h"
|
||||
#include "dsp_platform.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_dsp.h"
|
||||
#include "dsp_tests.h"
|
||||
|
||||
static const char *TAG = "dsps_resampler_ph_f32";
|
||||
|
||||
TEST_CASE("dsps_resampler_ph_f32 functionality", "[dsps]")
|
||||
{
|
||||
int32_t len = 1000;
|
||||
float *x = (float *)memalign(16, len * sizeof(float));
|
||||
float *y = (float *)memalign(16, 2 * len * sizeof(float));
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
x[i] = sinf((2 * M_PI * 0.125 / 4 * i));
|
||||
}
|
||||
|
||||
float samplerate_factor = 1.1f;
|
||||
dsps_resample_ph_t resampler;
|
||||
|
||||
esp_err_t status = dsps_resampler_ph_init(&resampler, samplerate_factor);
|
||||
TEST_ESP_OK(status);
|
||||
|
||||
int frame_size = 100;
|
||||
int result_len = 0;
|
||||
for (int i = 0; i < len / frame_size; i++) {
|
||||
int32_t result = dsps_resampler_ph_exec(&resampler, &x[i * frame_size], &y[result_len], frame_size);
|
||||
result_len += result;
|
||||
ESP_LOGD(TAG, "result_len: %d, phase: %f, delay_pos: %d", result_len, resampler.phase, resampler.delay_pos);
|
||||
}
|
||||
|
||||
if (abs(result_len - (int)(len * samplerate_factor)) > 1) {
|
||||
ESP_LOGE(TAG, "Expected: %i, reached: %i", (int)(len * samplerate_factor), (int)result_len);
|
||||
TEST_ASSERT_MESSAGE (false, "Length difference more than expected! ");
|
||||
}
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("dsps_resampler_ph_f32 functionality view", "[dsps]")
|
||||
{
|
||||
int32_t len = 1500;
|
||||
float *x = (float *)memalign(16, 2048 * sizeof(float));
|
||||
float *y = (float *)memalign(16, 2 * 2048 * sizeof(float));
|
||||
float check_freq = 0.125;
|
||||
for (int i = 0; i < len; i++) {
|
||||
x[i] = sinf((2 * M_PI * check_freq * i));
|
||||
}
|
||||
|
||||
float samplerate_factor = 1.2f;
|
||||
dsps_resample_ph_t resampler;
|
||||
|
||||
esp_err_t ret = dsps_resampler_ph_init(&resampler, samplerate_factor);
|
||||
TEST_ESP_OK(ret);
|
||||
ret = dsps_fft2r_init_fc32(NULL, CONFIG_DSP_MAX_FFT_SIZE);
|
||||
TEST_ESP_OK(ret);
|
||||
|
||||
int frame_size = 100;
|
||||
int result_len = 0;
|
||||
for (int i = 0; i < len / frame_size; i++) {
|
||||
int32_t result = dsps_resampler_ph_exec(&resampler, &x[i * frame_size], &y[result_len], frame_size);
|
||||
result_len += result;
|
||||
ESP_LOGD(TAG, "result_len: %d, phase: %f, delay_pos: %d", result_len, resampler.phase, resampler.delay_pos);
|
||||
}
|
||||
|
||||
float *window = (float *)malloc(1024 * sizeof(float));
|
||||
dsps_wind_hann_f32(window, 1024);
|
||||
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
x[i * 2 + 0] = (y[i] + sinf((2 * M_PI * check_freq * i))) * window[i];
|
||||
x[i * 2 + 1] = 0;
|
||||
}
|
||||
|
||||
int n_fft = 1024;
|
||||
dsps_fft2r_fc32_ansi(x, n_fft);
|
||||
dsps_bit_rev_fc32_ansi(x, n_fft);
|
||||
for (int i = 0; i < n_fft / 4; i++) {
|
||||
y[i] = 10 * log10f(x[i * 2 + 0] * x[i * 2 + 0] + x[i * 2 + 1] * x[i * 2 + 1]);
|
||||
ESP_LOGD(TAG, "y[%d]: %f", i, y[i]);
|
||||
}
|
||||
dsps_view_spectrum(y, n_fft / 4, -20, 60);
|
||||
int f1 = roundf(n_fft * check_freq);
|
||||
int f2 = roundf(n_fft * check_freq / samplerate_factor);
|
||||
ESP_LOGI(TAG, "f1: %d, f2: %d, diff: %f", f1, f2, fabs(y[f1] - y[f2]));
|
||||
if (fabs(y[f1] - y[f2]) > 2) {
|
||||
TEST_ASSERT_MESSAGE(false, "Signal difference more than expected! ");
|
||||
}
|
||||
|
||||
free(window);
|
||||
free(x);
|
||||
free(y);
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
|
||||
void test_fir();
|
||||
|
||||
int main(void)
|
||||
{
|
||||
printf("main starts!\n");
|
||||
// xt_iss_profile_enable();
|
||||
test_fir();
|
||||
// xt_iss_profile_disable();
|
||||
|
||||
printf("Test done\n");
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "dsp_common.h"
|
||||
|
||||
#include "dsps_fir.h"
|
||||
|
||||
float x[1024];
|
||||
float y[1024];
|
||||
float y_compare[1024];
|
||||
|
||||
float coeffs[256];
|
||||
float delay[256];
|
||||
float delay_compare[256];
|
||||
|
||||
|
||||
void test_fir()
|
||||
{
|
||||
int len = sizeof(x) / sizeof(float);
|
||||
int fir_len = sizeof(coeffs) / sizeof(float);
|
||||
|
||||
fir_f32_t fir1;
|
||||
fir_f32_t fir2;
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
x[0] = 1;
|
||||
|
||||
for (int i = 0 ; i < fir_len ; i++) {
|
||||
coeffs[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < len ; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
x[0] = 1;
|
||||
dsps_fir_init_f32(&fir1, coeffs, delay, fir_len / 4);
|
||||
dsps_fir_init_f32(&fir2, coeffs, delay_compare, fir_len);
|
||||
|
||||
xt_iss_profile_enable();
|
||||
dsps_fir_f32_aes3(&fir1, x, y, len);
|
||||
dsps_fir_f32_ansi(&fir2, x, y_compare, len);
|
||||
xt_iss_profile_disable();
|
||||
|
||||
printf("Test Pass!\n");
|
||||
}
|
||||
Reference in New Issue
Block a user