init commit

This commit is contained in:
2025-09-05 10:30:26 +02:00
commit c144fcd356
1526 changed files with 1115326 additions and 0 deletions

View File

@@ -0,0 +1,117 @@
cmake_minimum_required (VERSION 3.14)
project(CMSISDSPStatistics)
include(configLib)
include(configDsp)
add_library(CMSISDSPStatistics STATIC)
target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f64.c)
configLib(CMSISDSPStatistics ${ROOT})
configDsp(CMSISDSPStatistics ${ROOT})
### Includes
target_include_directories(CMSISDSPStatistics PUBLIC "${DSP}/Include")
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
target_sources(CMSISDSPStatistics PRIVATE arm_max_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f16.c)
endif()

View File

@@ -0,0 +1,100 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: StatisticsFunctions.c
* Description: Combination of all statistics function source files.
*
* $Date: 16. March 2020
* $Revision: V1.1.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_entropy_f32.c"
#include "arm_entropy_f64.c"
#include "arm_kullback_leibler_f32.c"
#include "arm_kullback_leibler_f64.c"
#include "arm_logsumexp_dot_prod_f32.c"
#include "arm_logsumexp_f32.c"
#include "arm_max_f32.c"
#include "arm_max_f64.c"
#include "arm_max_q15.c"
#include "arm_max_q31.c"
#include "arm_max_q7.c"
#include "arm_max_no_idx_f32.c"
#include "arm_max_no_idx_f64.c"
#include "arm_max_no_idx_q31.c"
#include "arm_max_no_idx_q15.c"
#include "arm_max_no_idx_q7.c"
#include "arm_mean_f32.c"
#include "arm_mean_f64.c"
#include "arm_mean_q15.c"
#include "arm_mean_q31.c"
#include "arm_mean_q7.c"
#include "arm_min_f32.c"
#include "arm_min_f64.c"
#include "arm_min_q15.c"
#include "arm_min_q31.c"
#include "arm_min_q7.c"
#include "arm_min_no_idx_f32.c"
#include "arm_min_no_idx_f64.c"
#include "arm_min_no_idx_q31.c"
#include "arm_min_no_idx_q15.c"
#include "arm_min_no_idx_q7.c"
#include "arm_power_f32.c"
#include "arm_power_f64.c"
#include "arm_power_q15.c"
#include "arm_power_q31.c"
#include "arm_power_q7.c"
#include "arm_rms_f32.c"
#include "arm_rms_q15.c"
#include "arm_rms_q31.c"
#include "arm_std_f32.c"
#include "arm_std_f64.c"
#include "arm_std_q15.c"
#include "arm_std_q31.c"
#include "arm_var_f32.c"
#include "arm_var_f64.c"
#include "arm_var_q15.c"
#include "arm_var_q31.c"
#include "arm_absmax_f32.c"
#include "arm_absmax_f64.c"
#include "arm_absmax_q15.c"
#include "arm_absmax_q31.c"
#include "arm_absmax_q7.c"
#include "arm_absmin_f32.c"
#include "arm_absmin_f64.c"
#include "arm_absmin_q15.c"
#include "arm_absmin_q31.c"
#include "arm_absmin_q7.c"
#include "arm_absmax_no_idx_f32.c"
#include "arm_absmax_no_idx_f64.c"
#include "arm_absmax_no_idx_q15.c"
#include "arm_absmax_no_idx_q31.c"
#include "arm_absmax_no_idx_q7.c"
#include "arm_absmin_no_idx_f32.c"
#include "arm_absmin_no_idx_f64.c"
#include "arm_absmin_no_idx_q15.c"
#include "arm_absmin_no_idx_q31.c"
#include "arm_absmin_no_idx_q7.c"
#include "arm_mse_q7.c"
#include "arm_mse_q15.c"
#include "arm_mse_q31.c"
#include "arm_mse_f32.c"
#include "arm_mse_f64.c"

View File

@@ -0,0 +1,46 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: StatisticsFunctions.c
* Description: Combination of all statistics function source files.
*
* $Date: 16. March 2020
* $Revision: V1.1.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_max_f16.c"
#include "arm_min_f16.c"
#include "arm_mean_f16.c"
#include "arm_power_f16.c"
#include "arm_rms_f16.c"
#include "arm_std_f16.c"
#include "arm_var_f16.c"
#include "arm_entropy_f16.c"
#include "arm_kullback_leibler_f16.c"
#include "arm_logsumexp_dot_prod_f16.c"
#include "arm_logsumexp_f16.c"
#include "arm_max_no_idx_f16.c"
#include "arm_min_no_idx_f16.c"
#include "arm_absmax_f16.c"
#include "arm_absmin_f16.c"
#include "arm_absmax_no_idx_f16.c"
#include "arm_absmin_no_idx_f16.c"
#include "arm_mse_f16.c"

View File

@@ -0,0 +1,274 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_f16.c
* Description: Maximum value of a absolute values of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
uint16_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN);
float16_t maxValue = F16_ABSMIN;
uint16_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
mve_pred16_t p0;
indexVec = vidupq_u16((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u16(0);
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
p0 = vctp16q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpgeq(curExtremValVec, maxValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmax_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
/* compare for the extrema value */ \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (_Float16)fabsf((float32_t)*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (_Float16)fabsf((float32_t)*pSrc++);
/* compare for the maximum value */
if ((_Float16)out < (_Float16)maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,260 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_f32.c
* Description: Maximum value of absolute values of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@defgroup AbsMax Absolute Maximum
Computes the maximum value of absolute values of an array of data.
The function returns both the maximum value and its position within the array.
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
int32_t blkSize = blockSize;
f32x4_t vecSrc;
f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
float32_t maxValue = F32_ABSMIN;
uint32_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
uint32_t curIdx = 0;
mve_pred16_t p0;
indexVec = vidupq_wb_u32(&curIdx, 1);
curExtremIdxVec = vdupq_n_u32(0);
do {
mve_pred16_t p = vctp32q(blkSize);
vecSrc = vldrwq_z_f32((float32_t const *) pSrc, p);
vecSrc = vabsq_m(vuninitializedq_f32(), vecSrc, p);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(vecSrc, curExtremValVec, p);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
/* Does TP detection works here ?? */
indexVec = vidupq_wb_u32(&curIdx, 1);
blkSize -= 4;
pSrc += 4;
}
while (blkSize > 0);
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpgeq(curExtremValVec, maxValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmax_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f) ? out : -out; \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabsf(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,92 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_f64.c
* Description: Maximum value of absolute values of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
void arm_absmax_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult,
uint32_t * pIndex)
{
float64_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabs(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabs(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,228 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_f16.c
* Description: Maximum value of a absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
uint16_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN);
float16_t maxValue = F16_ABSMIN;
mve_pred16_t p0;
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmavq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmax_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
/* compare for the extrema value */ \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (_Float16)fabsf((float32_t)*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (_Float16)fabsf((float32_t)*pSrc++);
/* compare for the maximum value */
if ((_Float16)out < (_Float16)maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,225 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_f32.c
* Description: Maximum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
int32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
float32_t const *pSrcVec;
f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
float32_t maxValue = F32_ABSMIN;
mve_pred16_t p0;
pSrcVec = (float32_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmavq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmax_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f) ? out : -out; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabsf(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,87 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_f64.c
* Description: Maximum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
void arm_absmax_no_idx_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult)
{
float64_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = fabs(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabs(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,220 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_q15.c
* Description: Maximum value of absolute values of a Q15 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint16_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
uint16x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMIN);
q15_t maxValue = Q15_ABSMIN;
mve_pred16_t p0;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane max.
*/
curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxavq(maxValue, (q15x8_t)curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,220 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_q31.c
* Description: Maximum value of absolute values of a Q31 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
int32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
uint32x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMIN);
q31_t maxValue = Q31_ABSMIN;
mve_pred16_t p0;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane max.
*/
curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxavq(maxValue, (q31x4_t)curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q31_t)__QSUB(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,224 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_q7.c
* Description: Maximum value of absolute values of a Q7 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <stdint.h>
#include "arm_helium_utils.h"
void arm_absmax_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
int32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
uint8x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMIN);
q7_t maxValue = Q7_ABSMIN;
mve_pred16_t p0;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
/*
* update per-lane max.
*/
curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
p0 = vctp8q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxavq(maxValue, (q7x16_t)curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,236 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_q15.c
* Description: Maximum value of absolute values of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q15x8_t extremValVec = vdupq_n_s16(Q15_ABSMIN);
q15_t maxValue = Q15_ABSMIN;
uint16x8_t indexVec;
uint16x8_t extremIdxVec;
mve_pred16_t p0;
uint16_t extremIdxArr[8];
indexVec = vidupq_u16(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p);
extremIdxVal = vqabsq(extremIdxVal);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u16(extremIdxArr, indexVec, p0);
indexVec += 8;
pSrc += 8;
blkCnt -= 8;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u16(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,236 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_q31.c
* Description: Maximum value of absolute values of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q31x4_t extremValVec = vdupq_n_s32(Q31_ABSMIN);
q31_t maxValue = Q31_ABSMIN;
uint32x4_t indexVec;
uint32x4_t extremIdxVec;
mve_pred16_t p0;
uint32_t extremIdxArr[4];
indexVec = vidupq_u32(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp32q(blkCnt);
q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p);
extremIdxVal = vqabsq(extremIdxVal);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u32(extremIdxArr, indexVec, p0);
indexVec += 4;
pSrc += 4;
blkCnt -= 4;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u32(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q31_t)__QSUB(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,294 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_q7.c
* Description: Maximum value of absolute values of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <stdint.h>
#include "arm_helium_utils.h"
#define MAX_BLKSZ_S8 (UINT8_MAX+1)
static void arm_small_blk_absmax_q7(
const q7_t * pSrc,
uint16_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q7x16_t extremValVec = vdupq_n_s8(Q7_ABSMIN);
q7_t maxValue = Q7_ABSMIN;
uint8x16_t indexVec;
uint8x16_t extremIdxVec;
mve_pred16_t p0;
uint8_t extremIdxArr[16];
indexVec = vidupq_u8(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp8q(blkCnt);
q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
extremIdxVal = vqabsq(extremIdxVal);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u8(extremIdxArr, indexVec, p0);
indexVec += 16;
pSrc += 16;
blkCnt -= 16;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u8(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
*pIndex = vminvq_u8(blockSize - 1, indexVec);
*pResult = maxValue;
}
void arm_absmax_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t totalSize = blockSize;
if (totalSize <= MAX_BLKSZ_S8)
{
arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex);
}
else
{
uint32_t curIdx = 0;
q7_t curBlkExtr = Q7_MIN;
uint32_t curBlkPos = 0;
uint32_t curBlkIdx = 0;
/*
* process blocks of 255 elts
*/
while (totalSize >= MAX_BLKSZ_S8)
{
const q7_t *curSrc = pSrc;
arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
if (*pResult > curBlkExtr)
{
/*
* update partial extrema
*/
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
curIdx++;
pSrc += MAX_BLKSZ_S8;
totalSize -= MAX_BLKSZ_S8;
}
/*
* remainder
*/
arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex);
if (*pResult > curBlkExtr)
{
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
*pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
*pResult = curBlkExtr;
}
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,276 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_f16.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
uint16_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX);
float16_t minValue = F16_ABSMAX;
uint16_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
mve_pred16_t p0;
indexVec = vidupq_u16((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u16(0);
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
p0 = vctp16q(blkCnt);
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmvq(minValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmin_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
/* compare for the extrema value */ \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (_Float16)fabsf((float32_t)*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (_Float16)fabsf((float32_t)*pSrc++);
/* compare for the minimum value */
if ((_Float16)out > (_Float16)minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,279 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_f32.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@defgroup AbsMin Absolute Minimum
Computes the minimum value of absolute values of an array of data.
The function returns both the minimum value and its position within the array.
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
float32_t const *pSrcVec;
f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMAX);
float32_t minValue = F32_ABSMAX;
uint32_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
mve_pred16_t p0;
indexVec = vidupq_u32((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u32(0);
pSrcVec = (float32_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 4;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
p0 = vctp32q(blkCnt);
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmvq(minValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmin_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f) ? out : -out; \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabsf(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,90 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_f64.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
void arm_absmin_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult,
uint32_t * pIndex)
{
float64_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabs(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabs(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,230 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_f16.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
int32_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX);
float16_t minValue = F16_ABSMAX;
mve_pred16_t p0;
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane min.
*/
curExtremValVec = vminnmaq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminnmaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmavq(minValue, curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmin_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
/* compare for the extrema value */ \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (_Float16)fabsf((float32_t)*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (_Float16)fabsf((float32_t)*pSrc++);
/* compare for the minimum value */
if ((_Float16)out > (_Float16)minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,226 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_f32.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
int32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
float32_t const *pSrcVec;
f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMAX);
float32_t minValue = F32_ABSMAX;
mve_pred16_t p0;
pSrcVec = (float32_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane min.
*/
curExtremValVec = vminnmaq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminnmaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmavq(minValue, curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmin_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f) ? out : -out; \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabsf(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_f64.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
void arm_absmin_no_idx_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult)
{
float64_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = fabs(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabs(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,222 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_q15.c
* Description: Minimum value of absolute values of a Q15 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint16_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
uint16x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMAX);
q15_t minValue = Q15_ABSMAX;
mve_pred16_t p0;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane min.
*/
curExtremValVec = vminaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminavq(minValue, (q15x8_t)curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,221 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_q31.c
* Description: Minimum value of absolute values of a Q31 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
int32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
uint32x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMAX);
q31_t minValue = Q31_ABSMAX;
mve_pred16_t p0;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane min.
*/
curExtremValVec = vminaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminavq(minValue, (q31x4_t)curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q31_t)__QSUB(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,223 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_q7.c
* Description: Minimum value of absolute values of a Q7 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <stdint.h>
#include "arm_helium_utils.h"
void arm_absmin_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
int32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
uint8x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
q7_t minValue = Q7_ABSMAX;
mve_pred16_t p0;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
/*
* update per-lane min.
*/
curExtremValVec = vminaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
p0 = vctp8q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminavq(minValue, (q7x16_t)curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,269 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_q15.c
* Description: Minimum value of absolute values of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
uint16_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
q15x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMAX);
q15_t minValue = Q15_ABSMAX;
uint16_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
uint32_t startIdx = 0;
mve_pred16_t p0;
indexVec = vidupq_wb_u16(&startIdx, 1);
curExtremIdxVec = vdupq_n_u16(0);
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u16(&startIdx, 1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
p0 = vctp16q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
/*
* set index for lower values to min possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a min value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,269 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_q31.c
* Description: Minimum value of absolute values of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
uint16_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
q31x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMAX);
q31_t minValue = Q31_ABSMAX;
uint16_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
uint32_t startIdx = 0;
mve_pred16_t p0;
indexVec = vidupq_wb_u32(&startIdx, 1);
curExtremIdxVec = vdupq_n_u32(0);
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
vecSrc = vabsq(vecSrc);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u32(&startIdx, 1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
vecSrc = vabsq(vecSrc);
p0 = vctp32q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
/*
* set index for lower values to min possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a min value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q31_t)__QSUB(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,322 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_q7.c
* Description: Minimum value of absolute values of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <stdint.h>
#include "arm_helium_utils.h"
#define MAX_BLKSZ_S8 (UINT8_MAX+1)
static void arm_small_blk_absmin_q7(
const q7_t *pSrc,
uint32_t blockSize,
q7_t *pResult,
uint32_t *pIndex)
{
uint16_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
q7x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
q7_t minValue = Q7_ABSMAX;
uint16_t idx = blockSize - 1;
uint8x16_t indexVec;
uint8x16_t curExtremIdxVec;
uint32_t startIdx = 0;
mve_pred16_t p0;
indexVec = vidupq_wb_u8(&startIdx, 1);
curExtremIdxVec = vdupq_n_u8(0);
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
vecSrc = vabsq(vecSrc);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u8(&startIdx, 1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
vecSrc = vabsq(vecSrc);
p0 = vctp8q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
/*
* set index for lower values to min possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
idx = vminvq_p_u8(idx, curExtremIdxVec, p0);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
void arm_absmin_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t totalSize = blockSize;
if (totalSize <= MAX_BLKSZ_S8)
{
arm_small_blk_absmin_q7(pSrc, blockSize, pResult, pIndex);
}
else
{
uint32_t curIdx = 0;
q7_t curBlkExtr = Q7_MAX;
uint32_t curBlkPos = 0;
uint32_t curBlkIdx = 0;
/*
* process blocks of 255 elts
*/
while (totalSize >= MAX_BLKSZ_S8)
{
const q7_t *curSrc = pSrc;
arm_small_blk_absmin_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
if (*pResult < curBlkExtr)
{
/*
* update partial extrema
*/
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
curIdx++;
pSrc += MAX_BLKSZ_S8;
totalSize -= MAX_BLKSZ_S8;
}
/*
* remainder
*/
arm_small_blk_absmin_q7(pSrc, totalSize, pResult, pIndex);
if (*pResult < curBlkExtr)
{
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
*pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
*pResult = curBlkExtr;
}
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,140 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup Entropy Entropy
Computes the entropy of a distribution
*/
/**
* @addtogroup Entropy
* @{
*/
/**
* @brief Entropy
*
* @param[in] pSrcA Array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return Entropy -Sum(p ln p)
*
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
{
uint32_t blkCnt;
_Float16 accum=0.0f16,p;
blkCnt = blockSize;
f16x8_t vSum = vdupq_n_f16(0.0f);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
f16x8_t vecIn = vld1q(pSrcA);
vSum = vaddq_f16(vSum, vmulq(vecIn, vlogq_f16(vecIn)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 8;
blkCnt --;
}
accum = vecAddAcrossF16Mve(vSum);
/* Tail */
blkCnt = blockSize & 0x7;
while(blkCnt > 0)
{
p = *pSrcA++;
accum += p * (_Float16)logf((float32_t)p);
blkCnt--;
}
return (-accum);
}
#else
float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
{
const float16_t *pIn;
uint32_t blkCnt;
_Float16 accum, p;
pIn = pSrcA;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * (_Float16)logf((float32_t)p);
blkCnt--;
}
return(-accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Entropy group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,174 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f32.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup Entropy
* @{
*/
/**
* @brief Entropy
*
* @param[in] pSrcA Array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return Entropy -Sum(p ln p)
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math.h"
float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
{
uint32_t blkCnt;
float32_t accum=0.0f,p;
blkCnt = blockSize;
f32x4_t vSum = vdupq_n_f32(0.0f);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
f32x4_t vecIn = vld1q(pSrcA);
vSum = vaddq_f32(vSum, vmulq(vecIn, vlogq_f32(vecIn)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 4;
blkCnt --;
}
accum = vecAddAcrossF32Mve(vSum);
/* Tail */
blkCnt = blockSize & 0x3;
while(blkCnt > 0)
{
p = *pSrcA++;
accum += p * logf(p);
blkCnt--;
}
return (-accum);
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "NEMath.h"
float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
{
const float32_t *pIn;
uint32_t blkCnt;
float32_t accum, p;
float32x4_t accumV;
float32x2_t accumV2;
float32x4_t tmpV, tmpV2;
pIn = pSrcA;
accum = 0.0f;
accumV = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
tmpV = vld1q_f32(pIn);
pIn += 4;
tmpV2 = vlogq_f32(tmpV);
accumV = vmlaq_f32(accumV, tmpV, tmpV2);
blkCnt--;
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * logf(p);
blkCnt--;
}
return(-accum);
}
#else
float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
{
const float32_t *pIn;
uint32_t blkCnt;
float32_t accum, p;
pIn = pSrcA;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * logf(p);
blkCnt--;
}
return(-accum);
}
#endif
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Entropy group
*/

View File

@@ -0,0 +1,73 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f64.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup Entropy
* @{
*/
/**
* @brief Entropy
*
* @param[in] pSrcA Array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return Entropy -Sum(p ln p)
*
*/
float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize)
{
const float64_t *pIn;
uint32_t blkCnt;
float64_t accum, p;
pIn = pSrcA;
blkCnt = blockSize;
accum = 0.0;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * log(p);
blkCnt--;
}
return(-accum);
}
/**
* @} end of Entropy group
*/

View File

@@ -0,0 +1,152 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup Kullback-Leibler Kullback-Leibler divergence
Computes the Kullback-Leibler divergence between two distributions
*/
/**
* @addtogroup Kullback-Leibler
* @{
*/
/**
* @brief Kullback-Leibler
*
* Distribution A may contain 0 with Neon version.
* Result will be right but some exception flags will be set.
*
* Distribution B must not contain 0 probability.
*
* @param[in] *pSrcA points to an array of input values for probaility distribution A.
* @param[in] *pSrcB points to an array of input values for probaility distribution B.
* @param[in] blockSize number of samples in the input array.
* @return Kullback-Leibler divergence D(A || B)
*
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
{
uint32_t blkCnt;
_Float16 accum, pA,pB;
blkCnt = blockSize;
accum = 0.0f16;
f16x8_t vSum = vdupq_n_f16(0.0f16);
blkCnt = blockSize >> 3;
while(blkCnt > 0)
{
f16x8_t vecA = vld1q(pSrcA);
f16x8_t vecB = vld1q(pSrcB);
f16x8_t vRatio;
vRatio = vdiv_f16(vecB, vecA);
vSum = vaddq_f16(vSum, vmulq(vecA, vlogq_f16(vRatio)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
blkCnt --;
}
accum = vecAddAcrossF16Mve(vSum);
blkCnt = blockSize & 7;
while(blkCnt > 0)
{
pA = *pSrcA++;
pB = *pSrcB++;
accum += pA * (_Float16)logf((float32_t)pB / (float32_t)pA);
blkCnt--;
}
return(-accum);
}
#else
float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
{
const float16_t *pInA, *pInB;
uint32_t blkCnt;
_Float16 accum, pA,pB;
pInA = pSrcA;
pInB = pSrcB;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * (_Float16)logf((float32_t)pB / (float32_t)pA);
blkCnt--;
}
return(-accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Kullback-Leibler group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,193 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f32.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup Kullback-Leibler
* @{
*/
/**
* @brief Kullback-Leibler
*
* Distribution A may contain 0 with Neon version.
* Result will be right but some exception flags will be set.
*
* Distribution B must not contain 0 probability.
*
* @param[in] *pSrcA points to an array of input values for probaility distribution A.
* @param[in] *pSrcB points to an array of input values for probaility distribution B.
* @param[in] blockSize number of samples in the input array.
* @return Kullback-Leibler divergence D(A || B)
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math.h"
float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize)
{
uint32_t blkCnt;
float32_t accum, pA,pB;
blkCnt = blockSize;
accum = 0.0f;
f32x4_t vSum = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
f32x4_t vecA = vld1q(pSrcA);
f32x4_t vecB = vld1q(pSrcB);
f32x4_t vRatio;
vRatio = vdiv_f32(vecB, vecA);
vSum = vaddq_f32(vSum, vmulq(vecA, vlogq_f32(vRatio)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 4;
pSrcB += 4;
blkCnt --;
}
accum = vecAddAcrossF32Mve(vSum);
blkCnt = blockSize & 3;
while(blkCnt > 0)
{
pA = *pSrcA++;
pB = *pSrcB++;
accum += pA * logf(pB / pA);
blkCnt--;
}
return(-accum);
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "NEMath.h"
float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize)
{
const float32_t *pInA, *pInB;
uint32_t blkCnt;
float32_t accum, pA,pB;
float32x4_t accumV;
float32x2_t accumV2;
float32x4_t tmpVA, tmpVB,tmpV;
pInA = pSrcA;
pInB = pSrcB;
accum = 0.0f;
accumV = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
tmpVA = vld1q_f32(pInA);
pInA += 4;
tmpVB = vld1q_f32(pInB);
pInB += 4;
tmpV = vinvq_f32(tmpVA);
tmpVB = vmulq_f32(tmpVB, tmpV);
tmpVB = vlogq_f32(tmpVB);
accumV = vmlaq_f32(accumV, tmpVA, tmpVB);
blkCnt--;
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * logf(pB/pA);
blkCnt--;
}
return(-accum);
}
#else
float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize)
{
const float32_t *pInA, *pInB;
uint32_t blkCnt;
float32_t accum, pA,pB;
pInA = pSrcA;
pInB = pSrcB;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * logf(pB / pA);
blkCnt--;
}
return(-accum);
}
#endif
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Kullback-Leibler group
*/

View File

@@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f64.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup Kullback-Leibler
* @{
*/
/**
* @brief Kullback-Leibler
*
* @param[in] *pSrcA points to an array of input values for probaility distribution A.
* @param[in] *pSrcB points to an array of input values for probaility distribution B.
* @param[in] blockSize number of samples in the input array.
* @return Kullback-Leibler divergence D(A || B)
*
*/
float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, const float64_t * pSrcB, uint32_t blockSize)
{
const float64_t *pInA, *pInB;
uint32_t blkCnt;
float64_t accum, pA,pB;
pInA = pSrcA;
pInB = pSrcB;
blkCnt = blockSize;
accum = 0.0;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * log(pB / pA);
blkCnt--;
}
return(-accum);
}
/**
* @} end of Kullback-Leibler group
*/

View File

@@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup LogSumExp LogSumExp
LogSumExp optimizations to compute sum of probabilities with Gaussian distributions
*/
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Dot product with log arithmetic
*
* Vectors are containing the log of the samples
*
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[in] *pTmpBuffer temporary buffer of length blockSize
* @return The log of the dot product.
*
*/
float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t *pTmpBuffer)
{
float16_t result;
arm_add_f16((float16_t*)pSrcA, (float16_t*)pSrcB, pTmpBuffer, blockSize);
result = arm_logsumexp_f16(pTmpBuffer, blockSize);
return(result);
}
/**
* @} end of LogSumExp group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,68 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f32.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Dot product with log arithmetic
*
* Vectors are containing the log of the samples
*
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[in] *pTmpBuffer temporary buffer of length blockSize
* @return The log of the dot product.
*
*/
float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t *pTmpBuffer)
{
float32_t result;
arm_add_f32((float32_t*)pSrcA, (float32_t*)pSrcB, pTmpBuffer, blockSize);
result = arm_logsumexp_f32(pTmpBuffer, blockSize);
return(result);
}
/**
* @} end of LogSumExp group
*/

View File

@@ -0,0 +1,172 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Computation of the LogSumExp
*
* In probabilistic computations, the dynamic of the probability values can be very
* wide because they come from gaussian functions.
* To avoid underflow and overflow issues, the values are represented by their log.
* In this representation, multiplying the original exp values is easy : their logs are added.
* But adding the original exp values is requiring some special handling and it is the
* goal of the LogSumExp function.
*
* If the values are x1...xn, the function is computing:
*
* ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
* rounding issues are minimised.
*
* The max xm of the values is extracted and the function is computing:
* xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
*
* @param[in] *in Pointer to an array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return LogSumExp
*
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
{
float16_t maxVal;
const float16_t *pIn;
int32_t blkCnt;
_Float16 accum=0.0f16;
_Float16 tmp;
arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal);
blkCnt = blockSize;
pIn = in;
f16x8_t vSum = vdupq_n_f16(0.0f16);
blkCnt = blockSize >> 3;
while(blkCnt > 0)
{
f16x8_t vecIn = vld1q(pIn);
f16x8_t vecExp;
vecExp = vexpq_f16(vsubq_n_f16(vecIn, maxVal));
vSum = vaddq_f16(vSum, vecExp);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pIn += 8;
blkCnt --;
}
/* sum + log */
accum = vecAddAcrossF16Mve(vSum);
blkCnt = blockSize & 0x7;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal));
blkCnt--;
}
accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum);
return (accum);
}
#else
float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
{
_Float16 maxVal;
_Float16 tmp;
const float16_t *pIn;
uint32_t blkCnt;
_Float16 accum;
pIn = in;
blkCnt = blockSize;
maxVal = *pIn++;
blkCnt--;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
blkCnt = blockSize;
pIn = in;
accum = 0;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal));
blkCnt--;
}
accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum);
return(accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of LogSumExp group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,277 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f32.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Computation of the LogSumExp
*
* In probabilistic computations, the dynamic of the probability values can be very
* wide because they come from gaussian functions.
* To avoid underflow and overflow issues, the values are represented by their log.
* In this representation, multiplying the original exp values is easy : their logs are added.
* But adding the original exp values is requiring some special handling and it is the
* goal of the LogSumExp function.
*
* If the values are x1...xn, the function is computing:
*
* ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
* rounding issues are minimised.
*
* The max xm of the values is extracted and the function is computing:
* xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
*
* @param[in] *in Pointer to an array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return LogSumExp
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math.h"
float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
{
float32_t maxVal;
const float32_t *pIn;
int32_t blkCnt;
float32_t accum=0.0f;
float32_t tmp;
arm_max_no_idx_f32((float32_t *) in, blockSize, &maxVal);
blkCnt = blockSize;
pIn = in;
f32x4_t vSum = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
f32x4_t vecIn = vld1q(pIn);
f32x4_t vecExp;
vecExp = vexpq_f32(vsubq_n_f32(vecIn, maxVal));
vSum = vaddq_f32(vSum, vecExp);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pIn += 4;
blkCnt --;
}
/* sum + log */
accum = vecAddAcrossF32Mve(vSum);
blkCnt = blockSize & 0x3;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return (accum);
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "NEMath.h"
float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
{
float32_t maxVal;
float32_t tmp;
float32x4_t tmpV, tmpVb;
float32x4_t maxValV;
uint32x4_t idxV;
float32x4_t accumV;
float32x2_t accumV2;
const float32_t *pIn;
uint32_t blkCnt;
float32_t accum;
pIn = in;
blkCnt = blockSize;
if (blockSize <= 3)
{
maxVal = *pIn++;
blkCnt--;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
}
else
{
maxValV = vld1q_f32(pIn);
pIn += 4;
blkCnt = (blockSize - 4) >> 2;
while(blkCnt > 0)
{
tmpVb = vld1q_f32(pIn);
pIn += 4;
idxV = vcgtq_f32(tmpVb, maxValV);
maxValV = vbslq_f32(idxV, tmpVb, maxValV );
blkCnt--;
}
accumV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
accumV2 = vpmax_f32(accumV2,accumV2);
maxVal = vget_lane_f32(accumV2, 0) ;
blkCnt = (blockSize - 4) & 3;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
}
maxValV = vdupq_n_f32(maxVal);
pIn = in;
accum = 0;
accumV = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
tmpV = vld1q_f32(pIn);
pIn += 4;
tmpV = vsubq_f32(tmpV, maxValV);
tmpV = vexpq_f32(tmpV);
accumV = vaddq_f32(accumV, tmpV);
blkCnt--;
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 0x3;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return(accum);
}
#else
float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
{
float32_t maxVal;
float32_t tmp;
const float32_t *pIn;
uint32_t blkCnt;
float32_t accum;
pIn = in;
blkCnt = blockSize;
maxVal = *pIn++;
blkCnt--;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
blkCnt = blockSize;
pIn = in;
accum = 0;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return(accum);
}
#endif
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of LogSumExp group
*/

View File

@@ -0,0 +1,246 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_f16.c
* Description: Maximum value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt;
f16x8_t vecSrc;
f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
float16_t maxValue = F16_MIN;
uint32_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
uint32_t curIdx = 0;
mve_pred16_t p0;
float16_t tmp;
indexVec = vidupq_wb_u16(&curIdx, 1);
curExtremIdxVec = vdupq_n_u16(0);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vldrhq_f16(pSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u16(&curIdx, 1);
pSrc += 8;
/* Decrement the loop counter */
blkCnt--;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpgeq(curExtremValVec, maxValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/* Tail */
blkCnt = blockSize & 7;
while (blkCnt > 0)
{
/* Initialize tmp to the next consecutive values one by one */
tmp = *pSrc++;
/* compare for the maximum value */
if ((_Float16)maxValue < (_Float16)tmp)
{
/* Update the maximum value and it's index */
maxValue = tmp;
idx = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/*
* Save result
*/
*pIndex = idx;
*pResult = maxValue;
}
#else
void arm_max_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if ((_Float16)out < (_Float16)maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if ((_Float16)out < (_Float16)maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if ((_Float16)out < (_Float16)maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if ((_Float16)out < (_Float16)maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if ((_Float16)out < (_Float16)maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,365 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_f32.c
* Description: Maximum value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@defgroup Max Maximum
Computes the maximum value of an array of data.
The function returns both the maximum value and its position within the array.
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
uint32_t blkCnt;
f32x4_t vecSrc;
f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN);
float32_t maxValue = F32_MIN;
uint32_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
uint32_t curIdx = 0;
mve_pred16_t p0;
float32_t tmp;
indexVec = vidupq_wb_u32(&curIdx, 1);
curExtremIdxVec = vdupq_n_u32(0);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u32(&curIdx, 1);
pSrc += 4;
/* Decrement the loop counter */
blkCnt--;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpgeq(curExtremValVec, maxValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/* Tail */
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
/* Initialize tmp to the next consecutive values one by one */
tmp = *pSrc++;
/* compare for the maximum value */
if (maxValue < tmp)
{
/* Update the maximum value and it's index */
maxValue = tmp;
idx = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/*
* Save result
*/
*pIndex = idx;
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
float32x4_t outV, srcV;
float32x2_t outV2;
uint32x4_t idxV;
uint32x4_t maxIdx;
static const uint32_t indexInit[4]={4,5,6,7};
static const uint32_t countVInit[4]={0,1,2,3};
uint32x4_t index;
uint32x4_t delta;
uint32x4_t countV;
uint32x2_t countV2;
maxIdx = vdupq_n_u32(ULONG_MAX);
delta = vdupq_n_u32(4);
index = vld1q_u32(indexInit);
countV = vld1q_u32(countVInit);
/* Initialise the index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparison */
if (blockSize <= 3)
{
out = *pSrc++;
blkCnt = blockSize - 1;
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
outV = vld1q_f32(pSrc);
pSrc += 4;
/* Compute 4 outputs at a time */
blkCnt = (blockSize - 4 ) >> 2U;
while (blkCnt > 0U)
{
srcV = vld1q_f32(pSrc);
pSrc += 4;
idxV = vcgtq_f32(srcV, outV);
outV = vbslq_f32(idxV, srcV, outV );
countV = vbslq_u32(idxV, index,countV );
index = vaddq_u32(index,delta);
/* Decrement the loop counter */
blkCnt--;
}
outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV));
outV2 = vpmax_f32(outV2,outV2);
out = vget_lane_f32(outV2, 0);
idxV = vceqq_f32(outV, vdupq_n_f32(out));
countV = vbslq_u32(idxV, countV,maxIdx);
countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
countV2 = vpmin_u32(countV2,countV2);
outIndex = vget_lane_u32(countV2,0);
/* if (blockSize - 1U) is not multiple of 4 */
blkCnt = (blockSize - 4 ) % 4U;
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt ;
}
/* Decrement the loop counter */
blkCnt--;
}
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#else
void arm_max_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,90 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_f64.c
* Description: Maximum value of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
void arm_max_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult,
uint32_t * pIndex)
{
float64_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
@} end of Max group
*/

View File

@@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_f16.c
* Description: Maximum value of a floating-point vector without returning the index
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult)
{
f16x8_t vecSrc;
f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
float16_t maxValue = F16_MIN;
float16_t newVal;
uint32_t blkCnt;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrc);
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 8;
blkCnt --;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if ((_Float16)maxValue < (_Float16)newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blkCnt --;
}
*pResult = maxValue;
}
#else
void arm_max_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult)
{
float16_t maxValue = F16_MIN;
float16_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if ((_Float16)maxValue < (_Float16)newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blockSize --;
}
*pResult = maxValue;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,138 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_f32.c
* Description: Maximum value of a floating-point vector without returning the index
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_no_idx_f32(
const float32_t *pSrc,
uint32_t blockSize,
float32_t *pResult)
{
f32x4_t vecSrc;
f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN);
float32_t maxValue = F32_MIN;
float32_t newVal;
uint32_t blkCnt;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrc);
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 4;
blkCnt --;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blkCnt --;
}
*pResult = maxValue;
}
#else
void arm_max_no_idx_f32(
const float32_t *pSrc,
uint32_t blockSize,
float32_t *pResult)
{
float32_t maxValue = F32_MIN;
float32_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blockSize --;
}
*pResult = maxValue;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_f64.c
* Description: Maximum value of a floating-point vector without returning the index
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
void arm_max_no_idx_f64(
const float64_t *pSrc,
uint32_t blockSize,
float64_t *pResult)
{
float64_t maxValue = F64_MIN;
float64_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blockSize --;
}
*pResult = maxValue;
}
/**
@} end of Max group
*/

View File

@@ -0,0 +1,142 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_q15.c
* Description: Maximum value of a q15 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a q15 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
int32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
q15x8_t curExtremValVec = vdupq_n_s16(Q15_MIN);
q15_t maxValue = Q15_MIN;
mve_pred16_t p0;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane max.
*/
curExtremValVec = vmaxq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxvq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
void arm_max_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value */
out = maxVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,142 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_q31.c
* Description: Maximum value of a q31 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a q31 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
int32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
q31x4_t curExtremValVec = vdupq_n_s32(Q31_MIN);
q31_t maxValue = Q31_MIN;
mve_pred16_t p0;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane max.
*/
curExtremValVec = vmaxq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxvq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
void arm_max_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value */
out = maxVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_q7.c
* Description: Maximum value of a q7 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a q7 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
int32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
q7x16_t curExtremValVec = vdupq_n_s8(Q7_MIN);
q7_t maxValue = Q7_MIN;
mve_pred16_t p0;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
/*
* update per-lane max.
*/
curExtremValVec = vmaxq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
p0 = vctp8q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxvq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
void arm_max_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value */
out = maxVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,201 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_q15.c
* Description: Maximum value of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q15x8_t extremValVec = vdupq_n_s16(Q15_MIN);
q15_t maxValue = Q15_MIN;
uint16x8_t indexVec;
uint16x8_t extremIdxVec;
mve_pred16_t p0;
uint16_t extremIdxArr[8];
indexVec = vidupq_u16(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u16(extremIdxArr, indexVec, p0);
indexVec += 8;
pSrc += 8;
blkCnt -= 8;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u16(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = maxValue;
}
#else
void arm_max_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,202 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_q31.c
* Description: Maximum value of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q31x4_t extremValVec = vdupq_n_s32(Q31_MIN);
q31_t maxValue = Q31_MIN;
uint32x4_t indexVec;
uint32x4_t extremIdxVec;
mve_pred16_t p0;
uint32_t extremIdxArr[4];
indexVec = vidupq_u32(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp32q(blkCnt);
q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u32(extremIdxArr, indexVec, p0);
indexVec += 4;
pSrc += 4;
blkCnt -= 4;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u32(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = maxValue;
}
#else
void arm_max_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,256 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_q7.c
* Description: Maximum value of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
static void arm_small_blk_max_q7(
const q7_t * pSrc,
uint16_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q7x16_t extremValVec = vdupq_n_s8(Q7_MIN);
q7_t maxValue = Q7_MIN;
uint8x16_t indexVec;
uint8x16_t extremIdxVec;
mve_pred16_t p0;
uint8_t extremIdxArr[16];
indexVec = vidupq_u8(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp8q(blkCnt);
q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u8(extremIdxArr, indexVec, p0);
indexVec += 16;
pSrc += 16;
blkCnt -= 16;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u8(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
*pIndex = vminvq_u8(blockSize - 1, indexVec);
*pResult = maxValue;
}
void arm_max_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t totalSize = blockSize;
const uint16_t sub_blk_sz = UINT8_MAX + 1;
if (totalSize <= sub_blk_sz)
{
arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex);
}
else
{
uint32_t curIdx = 0;
q7_t curBlkExtr = Q7_MIN;
uint32_t curBlkPos = 0;
uint32_t curBlkIdx = 0;
/*
* process blocks of 255 elts
*/
while (totalSize >= sub_blk_sz)
{
const q7_t *curSrc = pSrc;
arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex);
if (*pResult > curBlkExtr)
{
/*
* update partial extrema
*/
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
curIdx++;
pSrc += sub_blk_sz;
totalSize -= sub_blk_sz;
}
/*
* remainder
*/
arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex);
if (*pResult > curBlkExtr)
{
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
*pIndex = curBlkIdx * sub_blk_sz + curBlkPos;
*pResult = curBlkExtr;
}
}
#else
void arm_max_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,152 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mean_f16.c
* Description: Mean value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupStats
*/
/**
@defgroup mean Mean
Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
The underlying algorithm is used:
<pre>
Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
</pre>
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup mean
@{
*/
/**
@brief Mean value of a floating-point vector.
@param[in] pSrc points to the input vector.
@param[in] blockSize number of samples in input vector.
@param[out] pResult mean value returned here.
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_mean_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
int32_t blkCnt; /* loop counters */
f16x8_t vecSrc;
f16x8_t sumVec = vdupq_n_f16(0.0f16);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
sumVec = vaddq_m_f16(sumVec, sumVec, vecSrc, p);
blkCnt -= 8;
pSrc += 8;
}
while (blkCnt > 0);
*pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) blockSize;
}
#else
void arm_mean_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float16_t sum = 0.0f; /* Temporary result storage */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += (_Float16)*pSrc++;
sum += (_Float16)*pSrc++;
sum += (_Float16)*pSrc++;
sum += (_Float16)*pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += (_Float16)*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store result to destination */
*pResult = ((_Float16)sum / (_Float16)blockSize);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of mean group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,198 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mean_f32.c
* Description: Mean value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup mean
@{
*/
/**
@brief Mean value of a floating-point vector.
@param[in] pSrc points to the input vector.
@param[in] blockSize number of samples in input vector.
@param[out] pResult mean value returned here.
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_mean_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
uint32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
f32x4_t sumVec = vdupq_n_f32(0.0f);
float32_t sum = 0.0f;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrc);
sumVec = vaddq_f32(sumVec, vecSrc);
blkCnt --;
pSrc += 4;
}
sum = vecAddAcrossF32Mve(sumVec);
/* Tail */
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
*pResult = sum / (float32_t) blockSize;
}
#else
#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mean_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t sum = 0.0f; /* Temporary result storage */
float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
float32x2_t sumV2;
uint32_t blkCnt; /* Loop counter */
float32x4_t inV;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
inV = vld1q_f32(pSrc);
sumV = vaddq_f32(sumV, inV);
pSrc += 4;
/* Decrement the loop counter */
blkCnt--;
}
sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = sum / (float32_t) blockSize;
}
#else
void arm_mean_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float32_t sum = 0.0f; /* Temporary result storage */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store result to destination */
*pResult = (sum / blockSize);
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of mean group
*/

View File

@@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mean_f64.c
* Description: Mean value of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup mean
@{
*/
/**
@brief Mean value of a floating-point vector.
@param[in] pSrc points to the input vector.
@param[in] blockSize number of samples in input vector.
@param[out] pResult mean value returned here.
@return none
*/
void arm_mean_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float64_t sum = 0.; /* Temporary result storage */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store result to destination */
*pResult = (sum / blockSize);
}
/**
@} end of mean group
*/

View File

@@ -0,0 +1,156 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mean_q15.c
* Description: Mean value of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup mean
@{
*/
/**
@brief Mean value of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 32-bit internal accumulator.
The input is represented in 1.15 format and is accumulated in a 32-bit
accumulator in 17.15 format.
There is no risk of internal overflow with this approach, and the
full precision of intermediate result is preserved.
Finally, the accumulator is truncated to yield a result of 1.15 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mean_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q31_t sum = 0L;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrc = vldrhq_s16(pSrc);
/*
* sum lanes
*/
sum = vaddvaq(sum, vecSrc);
blkCnt--;
pSrc += 8;
}
/* Tail */
blkCnt = blockSize & 0x7;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = (q15_t) (sum / (int32_t) blockSize);
}
#else
void arm_mean_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q31_t sum = 0; /* Temporary result storage */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t in;
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
in = read_q15x2_ia (&pSrc);
sum += ((in << 16U) >> 16U);
sum += (in >> 16U);
in = read_q15x2_ia (&pSrc);
sum += ((in << 16U) >> 16U);
sum += (in >> 16U);
/* Decrement the loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store result to destination */
*pResult = (q15_t) (sum / (int32_t) blockSize);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of mean group
*/

View File

@@ -0,0 +1,149 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mean_q31.c
* Description: Mean value of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup mean
@{
*/
/**
@brief Mean value of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 64-bit internal accumulator.
The input is represented in 1.31 format and is accumulated in a 64-bit
accumulator in 33.31 format.
There is no risk of internal overflow with this approach, and the
full precision of intermediate result is preserved.
Finally, the accumulator is truncated to yield a result of 1.31 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mean_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q63_t sum = 0LL;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_s32(pSrc);
/*
* sum lanes
*/
sum = vaddlvaq(sum, vecSrc);
blkCnt --;
pSrc += 4;
}
/* Tail */
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
blkCnt --;
}
*pResult = arm_div_q63_to_q31(sum, blockSize);
}
#else
void arm_mean_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store result to destination */
*pResult = (q31_t) (sum / blockSize);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of mean group
*/

View File

@@ -0,0 +1,153 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mean_q7.c
* Description: Mean value of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup mean
@{
*/
/**
@brief Mean value of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 32-bit internal accumulator.
The input is represented in 1.7 format and is accumulated in a 32-bit
accumulator in 25.7 format.
There is no risk of internal overflow with this approach, and the
full precision of intermediate result is preserved.
Finally, the accumulator is truncated to yield a result of 1.7 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mean_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q31_t sum = 0L;
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
vecSrc = vldrbq_s8(pSrc);
/*
* sum lanes
*/
sum = vaddvaq(sum, vecSrc);
blkCnt--;
pSrc += 16;
}
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = (q7_t) (sum / (int32_t) blockSize);
}
#else
void arm_mean_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q31_t sum = 0; /* Temporary result storage */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t in;
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
in = read_q7x4_ia (&pSrc);
sum += ((in << 24U) >> 24U);
sum += ((in << 16U) >> 24U);
sum += ((in << 8U) >> 24U);
sum += (in >> 24U);
/* Decrement the loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store result to destination */
*pResult = (q7_t) (sum / (int32_t) blockSize);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of mean group
*/

View File

@@ -0,0 +1,240 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_f16.c
* Description: Minimum value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_min_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_MAX);
float16_t minValue = F16_MAX;
uint32_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
mve_pred16_t p0;
indexVec = vidupq_u16((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u16(0);
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vldrhq_f16(pSrcVec); pSrcVec += 8;
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0)
{
vecSrc = vldrhq_f16(pSrcVec); pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmvq(minValue, curExtremValVec);
/*
* set index for lower values to min possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a min value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
void arm_min_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize minVal to next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if ((_Float16)out > (_Float16)minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = index + 1U;
}
minVal = *pSrc++;
if ((_Float16)out > (_Float16)minVal)
{
out = minVal;
outIndex = index + 2U;
}
minVal = *pSrc++;
if ((_Float16)out > (_Float16)minVal)
{
out = minVal;
outIndex = index + 3U;
}
minVal = *pSrc++;
if ((_Float16)out > (_Float16)minVal)
{
out = minVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if ((_Float16)out > (_Float16)minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Min group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,363 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_f32.c
* Description: Minimum value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@defgroup Min Minimum
Computes the minimum value of an array of data.
The function returns both the minimum value and its position within the array.
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_min_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
uint32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
float32_t const *pSrcVec;
f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX);
float32_t minValue = F32_MAX;
uint32_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
float32_t tmp;
mve_pred16_t p0;
indexVec = vidupq_u32((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u32(0);
pSrcVec = (float32_t const *) pSrc;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 4;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* Get min value across the vector
*/
minValue = vminnmvq(minValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* tail
*/
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
tmp = *pSrc++;
/* compare for the minimum value */
if (minValue > tmp)
{
/* Update the minimum value and it's index */
minValue = tmp;
idx = blockSize - blkCnt;
}
blkCnt--;
}
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_min_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
float32x4_t outV, srcV;
float32x2_t outV2;
uint32x4_t idxV;
static const uint32_t indexInit[4]={4,5,6,7};
static const uint32_t countVInit[4]={0,1,2,3};
uint32x4_t maxIdx;
uint32x4_t index;
uint32x4_t delta;
uint32x4_t countV;
uint32x2_t countV2;
maxIdx = vdupq_n_u32(ULONG_MAX);
delta = vdupq_n_u32(4);
index = vld1q_u32(indexInit);
countV = vld1q_u32(countVInit);
/* Initialise the index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparison */
if (blockSize <= 3)
{
out = *pSrc++;
blkCnt = blockSize - 1;
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out > maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
outV = vld1q_f32(pSrc);
pSrc += 4;
/* Compute 4 outputs at a time */
blkCnt = (blockSize - 4 ) >> 2U;
while (blkCnt > 0U)
{
srcV = vld1q_f32(pSrc);
pSrc += 4;
idxV = vcltq_f32(srcV, outV);
outV = vbslq_f32(idxV, srcV, outV );
countV = vbslq_u32(idxV, index,countV );
index = vaddq_u32(index,delta);
/* Decrement the loop counter */
blkCnt--;
}
outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
outV2 = vpmin_f32(outV2,outV2);
out = vget_lane_f32(outV2,0);
idxV = vceqq_f32(outV, vdupq_n_f32(out));
countV = vbslq_u32(idxV, countV,maxIdx);
countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
countV2 = vpmin_u32(countV2,countV2);
outIndex = vget_lane_u32(countV2,0);
/* if (blockSize - 1U) is not multiple of 4 */
blkCnt = (blockSize - 4 ) % 4U;
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out > maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt ;
}
/* Decrement the loop counter */
blkCnt--;
}
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#else
void arm_min_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize minVal to next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = index + 1U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 2U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 3U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Min group
*/

View File

@@ -0,0 +1,90 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_f64.c
* Description: Minimum value of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
void arm_min_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult,
uint32_t * pIndex)
{
float64_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
@} end of Min group
*/

View File

@@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_no_idx_f16.c
* Description: Minimum value of a floating-point vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_min_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult)
{
f16x8_t vecSrc;
f16x8_t curExtremValVec = vdupq_n_f16(F16_MAX);
float16_t minValue = F16_MAX;
float16_t newVal;
uint32_t blkCnt;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrc);
/*
* update per-lane min.
*/
curExtremValVec = vminnmq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 8;
blkCnt --;
}
/*
* Get min value across the vector
*/
minValue = vminnmvq(minValue, curExtremValVec);
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
newVal = *pSrc++;
/* compare for the minimum value */
if ((_Float16)minValue > (_Float16)newVal)
{
/* Update the minimum value and it's index */
minValue = newVal;
}
blkCnt --;
}
*pResult = minValue;
}
#else
void arm_min_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult)
{
float16_t minValue = F16_MAX;
float16_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the minimum value */
if ((_Float16)minValue > (_Float16)newVal)
{
/* Update the minimum value and it's index */
minValue = newVal;
}
blockSize --;
}
*pResult = minValue;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Min group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,138 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_no_idx_f32.c
* Description: Minimum value of a floating-point vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_min_no_idx_f32(
const float32_t *pSrc,
uint32_t blockSize,
float32_t *pResult)
{
f32x4_t vecSrc;
f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX);
float32_t minValue = F32_MAX;
float32_t newVal;
uint32_t blkCnt;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrc);
/*
* update per-lane min.
*/
curExtremValVec = vminnmq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 4;
blkCnt --;
}
/*
* Get min value across the vector
*/
minValue = vminnmvq(minValue, curExtremValVec);
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
newVal = *pSrc++;
/* compare for the minimum value */
if (minValue > newVal)
{
/* Update the minimum value and it's index */
minValue = newVal;
}
blkCnt --;
}
*pResult = minValue;
}
#else
void arm_min_no_idx_f32(
const float32_t *pSrc,
uint32_t blockSize,
float32_t *pResult)
{
float32_t minValue = F32_MAX;
float32_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the minimum value */
if (minValue > newVal)
{
/* Update the minimum value and it's index */
minValue = newVal;
}
blockSize --;
}
*pResult = minValue;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Min group
*/

View File

@@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_no_idx_f64.c
* Description: Maximum value of a floating-point vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
void arm_min_no_idx_f64(
const float64_t *pSrc,
uint32_t blockSize,
float64_t *pResult)
{
float64_t minValue = F64_MAX;
float64_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the minimum value */
if (minValue > newVal)
{
/* Update the minimum value and it's index */
minValue = newVal;
}
blockSize --;
}
*pResult = minValue;
}
/**
@} end of Min group
*/

View File

@@ -0,0 +1,142 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_no_idx_q15.c
* Description: Minimum value of a q15 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a q15 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_min_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
int32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
q15x8_t curExtremValVec = vdupq_n_s16(Q15_MAX);
q15_t minValue = Q15_MAX;
mve_pred16_t p0;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane min.
*/
curExtremValVec = vminq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
*pResult = minValue;
}
#else
void arm_min_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t minVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal1 = *pSrc++;
/* compare for the minimum value */
if (out > minVal1)
{
/* Update the minimum value */
out = minVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the minimum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Min group
*/

View File

@@ -0,0 +1,141 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_no_idx_q31.c
* Description: Minimum value of a q31 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a q31 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_min_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
int32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
q31x4_t curExtremValVec = vdupq_n_s32(Q31_MAX);
q31_t minValue = Q31_MAX;
mve_pred16_t p0;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane min.
*/
curExtremValVec = vminq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
*pResult = minValue;
}
#else
void arm_min_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t minVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal1 = *pSrc++;
/* compare for the minimum value */
if (out > minVal1)
{
/* Update the minimum value */
out = minVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the minimum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Min group
*/

View File

@@ -0,0 +1,141 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_no_idx_q7.c
* Description: Minimum value of a q7 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a q7 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_min_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
int32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
q7x16_t curExtremValVec = vdupq_n_s8(Q7_MAX);
q7_t minValue = Q7_MAX;
mve_pred16_t p0;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
/*
* update per-lane min.
*/
curExtremValVec = vminq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
p0 = vctp8q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
*pResult = minValue;
}
#else
void arm_min_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t minVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal1 = *pSrc++;
/* compare for the minimum value */
if (out > minVal1)
{
/* Update the minimum value */
out = minVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the minimum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Min group
*/

View File

@@ -0,0 +1,203 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_q15.c
* Description: Minimum value of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_min_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q15x8_t extremValVec = vdupq_n_s16(Q15_MAX);
q15_t minValue = Q15_MAX;
uint16x8_t indexVec;
uint16x8_t extremIdxVec;
mve_pred16_t p0;
uint16_t extremIdxArr[8];
indexVec = vidupq_u16(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u16(extremIdxArr, indexVec, p0);
indexVec += 8;
pSrc += 8;
blkCnt -= 8;
}
while (blkCnt > 0);
/* Get min value across the vector */
minValue = vminvq(minValue, extremValVec);
/* set index for lower values to min possible index */
p0 = vcmpleq(extremValVec, minValue);
extremIdxVec = vld1q_u16(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = minValue;
}
#else
void arm_min_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize minVal to next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = index + 1U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 2U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 3U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Min group
*/

View File

@@ -0,0 +1,203 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_q31.c
* Description: Minimum value of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_min_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q31x4_t extremValVec = vdupq_n_s32(Q31_MAX);
q31_t minValue = Q31_MAX;
uint32x4_t indexVec;
uint32x4_t extremIdxVec;
mve_pred16_t p0;
uint32_t extremIdxArr[4];
indexVec = vidupq_u32(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp32q(blkCnt);
q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u32(extremIdxArr, indexVec, p0);
indexVec += 4;
pSrc += 4;
blkCnt -= 4;
}
while (blkCnt > 0);
/* Get min value across the vector */
minValue = vminvq(minValue, extremValVec);
/* set index for lower values to min possible index */
p0 = vcmpleq(extremValVec, minValue);
extremIdxVec = vld1q_u32(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = minValue;
}
#else
void arm_min_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize minVal to next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = index + 1U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 2U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 3U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Min group
*/

View File

@@ -0,0 +1,284 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_min_q7.c
* Description: Minimum value of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Min
@{
*/
/**
@brief Minimum value of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
static void arm_small_blk_min_q7(
const q7_t * pSrc,
uint8_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7x16_t curExtremValVec = vdupq_n_s8(Q7_MAX);
q7_t minValue = Q7_MAX,temp;
uint32_t idx = blockSize;
uint8x16_t indexVec;
uint8x16_t curExtremIdxVec;
mve_pred16_t p0;
indexVec = vidupq_u8((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u8(0);
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
vecSrc = vldrbq_s8(pSrc);
pSrc += 16;
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 16;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
/*
* set index for lower values to min possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u8(blockSize), p0);
/*
* Get min index which is thus for a min value
*/
idx = vminvq(idx, indexVec);
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
temp = *pSrc++;
/* compare for the minimum value */
if (minValue > temp)
{
/* Update the minimum value and it's index */
minValue = temp;
idx = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
void arm_min_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t totalSize = blockSize;
if (totalSize <= UINT8_MAX)
{
arm_small_blk_min_q7(pSrc, blockSize, pResult, pIndex);
}
else
{
uint32_t curIdx = 0;
q7_t curBlkExtr = Q7_MAX;
uint32_t curBlkPos = 0;
uint32_t curBlkIdx = 0;
/*
* process blocks of 255 elts
*/
while (totalSize >= UINT8_MAX)
{
const q7_t *curSrc = pSrc;
arm_small_blk_min_q7(curSrc, UINT8_MAX, pResult, pIndex);
if (*pResult < curBlkExtr)
{
/*
* update partial extrema
*/
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
curIdx++;
pSrc += UINT8_MAX;
totalSize -= UINT8_MAX;
}
/*
* remainder
*/
arm_small_blk_min_q7(pSrc, totalSize, pResult, pIndex);
if (*pResult < curBlkExtr)
{
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
*pIndex = curBlkIdx * UINT8_MAX + curBlkPos;
*pResult = curBlkExtr;
}
}
#else
void arm_min_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize minVal to next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = index + 1U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 2U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 3U;
}
minVal = *pSrc++;
if (out > minVal)
{
out = minVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Min group
*/

View File

@@ -0,0 +1,203 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f16.c
* Description: Half floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two half floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
#if !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVE_FLOAT16)
#include "arm_helium_utils.h"
void arm_mse_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t * result)
{
float16x8_t vecA, vecB;
float16x8_t vecSum;
uint32_t blkCnt;
_Float16 sum = 0.0f16;
vecSum = vdupq_n_f16(0.0f16);
blkCnt = (blockSize) >> 3;
while (blkCnt > 0U)
{
vecA = vld1q(pSrcA);
pSrcA += 8;
vecB = vld1q(pSrcB);
pSrcB += 8;
vecA = vsubq(vecA, vecB);
vecSum = vfmaq(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
blkCnt = (blockSize) & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vecA = vsubq(vecA, vecB);
vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
}
sum = vecAddAcrossF16Mve(vecSum);
/* Store result in destination buffer */
*result = (_Float16)sum / (_Float16)blockSize;
}
#endif
#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
#if defined(ARM_FLOAT16_SUPPORTED)
#if (!defined(ARM_MATH_MVE_FLOAT16)) || defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t * result)
{
uint32_t blkCnt; /* Loop counter */
_Float16 inA, inB;
_Float16 sum = 0.0f16; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
blkCnt = (blockSize) >> 3;
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 7;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = (_Float16)sum / (_Float16)blockSize;
}
#endif /* end of test for vector instruction availability */
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
/**
@} end of MSE group
*/

View File

@@ -0,0 +1,247 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f32.c
* Description: Floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
#if !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVEF)
#include "arm_helium_utils.h"
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
float32x4_t vecA, vecB;
float32x4_t vecSum;
uint32_t blkCnt;
float32_t sum = 0.0f;
vecSum = vdupq_n_f32(0.0f);
/* Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
while (blkCnt > 0U)
{
vecA = vld1q(pSrcA);
pSrcA += 4;
vecB = vld1q(pSrcB);
pSrcB += 4;
vecA = vsubq(vecA, vecB);
vecSum = vfmaq(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
blkCnt = (blockSize) & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vecA = vsubq(vecA, vecB);
vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
}
sum = vecAddAcrossF32Mve(vecSum);
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif
#if defined(ARM_MATH_NEON)
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
float32x4_t vecA, vecB;
float32x4_t vecSum;
uint32_t blkCnt;
float32_t inA, inB;
float32_t sum = 0.0f;
vecSum = vdupq_n_f32(0.0f);
#if !defined(__aarch64__)
f32x2_t tmp = vdup_n_f32(0.0f);
#endif
/* Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
while (blkCnt > 0U)
{
vecA = vld1q_f32(pSrcA);
pSrcA += 4;
vecB = vld1q_f32(pSrcB);
pSrcB += 4;
vecA = vsubq_f32(vecA, vecB);
vecSum = vfmaq_f32(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
#if defined(__aarch64__)
sum = vpadds_f32(vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum)));
#else
tmp = vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum));
sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1);
#endif
blkCnt = (blockSize) & 3;
while (blkCnt > 0U)
{
/* Calculate dot product and store result in a temporary buffer. */
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif
#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
#if (!defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON)) || defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
uint32_t blkCnt; /* Loop counter */
float32_t inA, inB;
float32_t sum = 0.0f; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 3;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif /* end of test for vector instruction availability */
/**
@} end of MSE group
*/

View File

@@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f64.c
* Description: Double floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two double floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
void arm_mse_f64(
const float64_t * pSrcA,
const float64_t * pSrcB,
uint32_t blockSize,
float64_t * result)
{
uint32_t blkCnt; /* Loop counter */
float64_t inA, inB;
float64_t sum = 0.0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
blkCnt = (blockSize) >> 1;
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 1;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
/**
@} end of MSE group
*/

View File

@@ -0,0 +1,175 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_q15.c
* Description: Mean square error between two Q15 vectors
*
* $Date: 04 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two Q15 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrcA,vecSrcB;
q63_t sum = 0LL;
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/*
* sum lanes
*/
sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
blkCnt--;
pSrcA += 8;
pSrcB += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
}
*pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
}
#else
void arm_mse_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q15_t inA,inB; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
/* Decrement loop counter */
blkCnt--;
}
/* Store result in q15 format */
*pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of MSE group
*/

View File

@@ -0,0 +1,176 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_q31.c
* Description: Mean square error between two Q31 vectors
*
* $Date: 04 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two Q31 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrcA,vecSrcB;
q63_t sum = 0LL;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/*
* sum lanes
*/
sum = vrmlaldavhaq(sum, vecSrcA, vecSrcA);
blkCnt--;
pSrcA += 4;
pSrcB += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
sum = vrmlaldavhaq_p(sum, vecSrcA, vecSrcA, p0);
}
*pResult = (q31_t) ((sum / blockSize)>>21);
}
#else
void arm_mse_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q31_t inA32,inB32; /* Temporary variable to store packed input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in q31 format */
*pResult = (q31_t) ((sum / blockSize)>>15);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of MSE group
*/

View File

@@ -0,0 +1,179 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_q7.c
* Description: Mean square error between two Q7 vectors
*
* $Date: 04 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@defgroup MSE Mean Square Error
Calculates the mean square error between two vectors.
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two Q7 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
uint32_t blockSize,
q7_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrcA,vecSrcB;
q31_t sum = 0LL;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/*
* sum lanes
*/
sum = vmladavaq(sum, vecSrcA, vecSrcA);
blkCnt--;
pSrcA += 16;
pSrcB += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
sum = vmladavaq_p(sum, vecSrcA, vecSrcA, p0);
}
*pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);
}
#else
void arm_mse_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
uint32_t blockSize,
q7_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q31_t sum = 0; /* Temporary result storage */
q7_t inA,inB; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
/* Decrement loop counter */
blkCnt--;
}
/* Store result in q7 format */
*pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of MSE group
*/

View File

@@ -0,0 +1,152 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_power_f16.c
* Description: Sum of the squares of the elements of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupStats
*/
/**
@addtogroup power
@{
*/
/**
@brief Sum of the squares of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult sum of the squares value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_power_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
int32_t blkCnt; /* loop counters */
f16x8_t vecSrc;
f16x8_t sumVec = vdupq_n_f16(0.0f);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
/*
* sum lanes
*/
sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p);
blkCnt -= 8;
pSrc += 8;
}
while (blkCnt > 0);
*pResult = vecAddAcrossF16Mve(sumVec);
}
#else
void arm_power_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
_Float16 sum = 0.0f16; /* Temporary result storage */
_Float16 in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Store result to destination */
*pResult = sum;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of power group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,229 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_power_f32.c
* Description: Sum of the squares of the elements of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@defgroup power Power
Calculates the sum of the squares of the elements in the input vector.
The underlying algorithm is used:
<pre>
Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
</pre>
There are separate functions for floating point, Q31, Q15, and Q7 data types.
Since the result is not divided by the length, those functions are in fact computing
something which is more an energy than a power.
*/
/**
@addtogroup power
@{
*/
/**
@brief Sum of the squares of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult sum of the squares value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_power_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
uint32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
f32x4_t sumVec = vdupq_n_f32(0.0f);
float32_t sum = 0.0f;
float32_t in;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrc);
/*
* sum lanes
*/
sumVec = vfmaq(sumVec, vecSrc, vecSrc);
blkCnt --;
pSrc += 4;
}
sum = vecAddAcrossF32Mve(sumVec);
/*
* tail
*/
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
*pResult = sum;
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_power_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t sum = 0.0f; /* accumulator */
float32_t in; /* Temporary variable to store input value */
uint32_t blkCnt; /* loop counter */
float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
float32x2_t sumV2;
float32x4_t inV;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
inV = vld1q_f32(pSrc);
sumV = vmlaq_f32(sumV, inV, inV);
pSrc += 4;
/* Decrement the loop counter */
blkCnt--;
}
sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* compute power and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* Store the result to the destination */
*pResult = sum;
}
#else
void arm_power_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float32_t sum = 0.0f; /* Temporary result storage */
float32_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Store result to destination */
*pResult = sum;
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of power group
*/

View File

@@ -0,0 +1,77 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_power_f64.c
* Description: Sum of the squares of the elements of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup power
@{
*/
/**
@brief Sum of the squares of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult sum of the squares value returned here
@return none
*/
void arm_power_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float64_t sum = 0.; /* Temporary result storage */
float64_t in; /* Temporary variable to store input value */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Store result to destination */
*pResult = sum;
}
/**
@} end of power group
*/

View File

@@ -0,0 +1,177 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_power_q15.c
* Description: Sum of the squares of the elements of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup power
@{
*/
/**
@brief Sum of the squares of the elements of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult sum of the squares value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 64-bit internal accumulator.
The input is represented in 1.15 format.
Intermediate multiplication yields a 2.30 format, and this
result is added without saturation to a 64-bit accumulator in 34.30 format.
With 33 guard bits in the accumulator, there is no risk of overflow, and the
full precision of the intermediate multiplication is preserved.
Finally, the return result is in 34.30 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_power_q15(
const q15_t * pSrc,
uint32_t blockSize,
q63_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q63_t sum = 0LL;
q15_t in;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrc = vldrhq_s16(pSrc);
/*
* sum lanes
*/
sum = vmlaldavaq(sum, vecSrc, vecSrc);
blkCnt --;
pSrc += 8;
}
/*
* tail
*/
blkCnt = blockSize & 0x7;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q31_t) in * in);
/* Decrement loop counter */
blkCnt--;
}
*pResult = sum;
}
#else
void arm_power_q15(
const q15_t * pSrc,
uint32_t blockSize,
q63_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q15_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in32; /* Temporary variable to store packed input value */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
in32 = read_q15x2_ia (&pSrc);
sum = __SMLALD(in32, in32, sum);
in32 = read_q15x2_ia (&pSrc);
sum = __SMLALD(in32, in32, sum);
#else
in = *pSrc++;
sum += ((q31_t) in * in);
in = *pSrc++;
sum += ((q31_t) in * in);
in = *pSrc++;
sum += ((q31_t) in * in);
in = *pSrc++;
sum += ((q31_t) in * in);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q31_t) in * in);
/* Decrement loop counter */
blkCnt--;
}
/* Store result in 34.30 format */
*pResult = sum;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of power group
*/

View File

@@ -0,0 +1,165 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_power_q31.c
* Description: Sum of the squares of the elements of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup power
@{
*/
/**
@brief Sum of the squares of the elements of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult sum of the squares value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 64-bit internal accumulator.
The input is represented in 1.31 format.
Intermediate multiplication yields a 2.62 format, and this
result is truncated to 2.48 format by discarding the lower 14 bits.
The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
With 15 guard bits in the accumulator, there is no risk of overflow, and the
full precision of the intermediate multiplication is preserved.
Finally, the return result is in 16.48 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_power_q31(
const q31_t * pSrc,
uint32_t blockSize,
q63_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q63_t sum = 0LL;
q31_t in;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_s32(pSrc);
/*
* sum lanes
*/
sum = vrmlaldavhaq(sum, vecSrc, vecSrc);
blkCnt --;
pSrc += 4;
}
/*
* tail
*/
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q63_t) in * in) >> 8;
/* Decrement loop counter */
blkCnt--;
}
*pResult = asrl(sum, 6);
}
#else
void arm_power_q31(
const q31_t * pSrc,
uint32_t blockSize,
q63_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q31_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and store result in a temporary variable sum, providing 15 guard bits. */
in = *pSrc++;
sum += ((q63_t) in * in) >> 14U;
in = *pSrc++;
sum += ((q63_t) in * in) >> 14U;
in = *pSrc++;
sum += ((q63_t) in * in) >> 14U;
in = *pSrc++;
sum += ((q63_t) in * in) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q63_t) in * in) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Store results in 16.48 format */
*pResult = sum;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of power group
*/

View File

@@ -0,0 +1,180 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_power_q7.c
* Description: Sum of the squares of the elements of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup power
@{
*/
/**
@brief Sum of the squares of the elements of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult sum of the squares value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 32-bit internal accumulator.
The input is represented in 1.7 format.
Intermediate multiplication yields a 2.14 format, and this
result is added without saturation to an accumulator in 18.14 format.
With 17 guard bits in the accumulator, there is no risk of overflow, and the
full precision of the intermediate multiplication is preserved.
Finally, the return result is in 18.14 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_power_q7(
const q7_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q31_t sum = 0LL;
q7_t in;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4U;
while (blkCnt > 0U)
{
vecSrc = vldrbq_s8(pSrc);
/*
* sum lanes
*/
sum = vmladavaq(sum, vecSrc, vecSrc);
blkCnt--;
pSrc += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q15_t) in * in);
/* Decrement loop counter */
blkCnt--;
}
*pResult = sum;
}
#else
void arm_power_q7(
const q7_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q31_t sum = 0; /* Temporary result storage */
q7_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in32; /* Temporary variable to store packed input value */
q31_t in1, in2; /* Temporary variables to store input value */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
in32 = read_q7x4_ia (&pSrc);
in1 = __SXTB16(__ROR(in32, 8));
in2 = __SXTB16(in32);
/* calculate power and accumulate to accumulator */
sum = __SMLAD(in1, in1, sum);
sum = __SMLAD(in2, in2, sum);
#else
in = *pSrc++;
sum += ((q15_t) in * in);
in = *pSrc++;
sum += ((q15_t) in * in);
in = *pSrc++;
sum += ((q15_t) in * in);
in = *pSrc++;
sum += ((q15_t) in * in);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q15_t) in * in);
/* Decrement loop counter */
blkCnt--;
}
/* Store result in 18.14 format */
*pResult = sum;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of power group
*/

View File

@@ -0,0 +1,147 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rms_f16.c
* Description: Root mean square value of the elements of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupStats
*/
/**
@defgroup RMS Root mean square (RMS)
Calculates the Root Mean Square of the elements in the input vector.
The underlying algorithm is used:
<pre>
Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
</pre>
There are separate functions for floating point, Q31, and Q15 data types.
*/
/**
@addtogroup RMS
@{
*/
/**
@brief Root Mean Square of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult root mean square value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_rms_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t pow = 0.0f;
arm_power_f16(pSrc, blockSize, &pow);
/* Compute Rms and store the result in the destination */
arm_sqrt_f16((_Float16)pow / (_Float16) blockSize, pResult);
}
#else
void arm_rms_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
_Float16 sum = 0.0f16; /* Temporary result storage */
_Float16 in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable, sum. */
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable. */
sum += ( in * in);
/* Decrement loop counter */
blkCnt--;
}
/* Compute Rms and store result in destination */
arm_sqrt_f16((_Float16)sum / (_Float16) blockSize, pResult);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of RMS group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,192 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rms_f32.c
* Description: Root mean square value of the elements of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@defgroup RMS Root mean square (RMS)
Calculates the Root Mean Square of the elements in the input vector.
The underlying algorithm is used:
<pre>
Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
</pre>
There are separate functions for floating point, Q31, and Q15 data types.
*/
/**
@addtogroup RMS
@{
*/
/**
@brief Root Mean Square of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult root mean square value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_rms_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t pow = 0.0f;
arm_power_f32(pSrc, blockSize, &pow);
/* Compute Rms and store the result in the destination */
arm_sqrt_f32(pow / (float32_t) blockSize, pResult);
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_rms_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t sum = 0.0f; /* accumulator */
float32_t in; /* Temporary variable to store input value */
uint32_t blkCnt; /* loop counter */
float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
float32x2_t sumV2;
float32x4_t inV;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
inV = vld1q_f32(pSrc);
sumV = vmlaq_f32(sumV, inV, inV);
pSrc += 4;
/* Decrement the loop counter */
blkCnt--;
}
sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* compute power and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Rms and store the result in the destination */
arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
}
#else
void arm_rms_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float32_t sum = 0.0f; /* Temporary result storage */
float32_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable, sum. */
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable. */
sum += ( in * in);
/* Decrement loop counter */
blkCnt--;
}
/* Compute Rms and store result in destination */
arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of RMS group
*/

View File

@@ -0,0 +1,149 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rms_q15.c
* Description: Root Mean Square of the elements of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup RMS
@{
*/
/**
@brief Root Mean Square of the elements of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult root mean square value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 64-bit internal accumulator.
The input is represented in 1.15 format.
Intermediate multiplication yields a 2.30 format, and this
result is added without saturation to a 64-bit accumulator in 34.30 format.
With 33 guard bits in the accumulator, there is no risk of overflow, and the
full precision of the intermediate multiplication is preserved.
Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
15 bits, and then saturated to yield a result in 1.15 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_rms_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q63_t pow = 0.0f;
q15_t normalizedPower;
arm_power_q15(pSrc, blockSize, &pow);
normalizedPower=__SSAT((pow / (q63_t) blockSize) >> 15,16);
arm_sqrt_q15(normalizedPower, pResult);
}
#else
void arm_rms_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q15_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in32; /* Temporary variable to store input value */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute sum of squares and store result in a temporary variable. */
#if defined (ARM_MATH_DSP)
in32 = read_q15x2_ia (&pSrc);
sum = __SMLALD(in32, in32, sum);
in32 = read_q15x2_ia (&pSrc);
sum = __SMLALD(in32, in32, sum);
#else
in = *pSrc++;
sum += ((q31_t) in * in);
in = *pSrc++;
sum += ((q31_t) in * in);
in = *pSrc++;
sum += ((q31_t) in * in);
in = *pSrc++;
sum += ((q31_t) in * in);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable. */
sum += ((q31_t) in * in);
/* Decrement loop counter */
blkCnt--;
}
/* Truncating and saturating the accumulator to 1.15 format */
/* Store result in destination */
arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of RMS group
*/

View File

@@ -0,0 +1,141 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rms_q31.c
* Description: Root Mean Square of the elements of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup RMS
@{
*/
/**
@brief Root Mean Square of the elements of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult root mean square value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using an internal 64-bit accumulator.
The input is represented in 1.31 format, and intermediate multiplication
yields a 2.62 format.
The accumulator maintains full precision of the intermediate multiplication results,
but provides only a single guard bit.
There is no saturation on intermediate additions.
If the accumulator overflows, it wraps around and distorts the result.
In order to avoid overflows completely, the input signal must be scaled down by
log2(blockSize) bits, as a total of blockSize additions are performed internally.
Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_rms_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q63_t pow = 0.0f;
q31_t normalizedPower;
arm_power_q31(pSrc, blockSize, &pow);
normalizedPower=clip_q63_to_q31((pow / (q63_t) blockSize) >> 17);
arm_sqrt_q31(normalizedPower, pResult);
}
#else
void arm_rms_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
uint64_t sum = 0; /* Temporary result storage (can get never negative. changed type from q63 to uint64 */
q31_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable, sum. */
sum += ((q63_t) in * in);
in = *pSrc++;
sum += ((q63_t) in * in);
in = *pSrc++;
sum += ((q63_t) in * in);
in = *pSrc++;
sum += ((q63_t) in * in);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable. */
sum += ((q63_t) in * in);
/* Decrement loop counter */
blkCnt--;
}
/* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */
/* Compute Rms and store result in destination vector */
arm_sqrt_q31(clip_q63_to_q31((sum / (q63_t) blockSize) >> 31), pResult);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of RMS group
*/

View File

@@ -0,0 +1,67 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_std_f16.c
* Description: Standard deviation of the elements of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupStats
*/
/**
@addtogroup STD
@{
*/
/**
@brief Standard deviation of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult standard deviation value returned here
@return none
*/
void arm_std_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t var;
arm_var_f16(pSrc,blockSize,&var);
arm_sqrt_f16(var, pResult);
}
/**
@} end of STD group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,83 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_std_f32.c
* Description: Standard deviation of the elements of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@defgroup STD Standard deviation
Calculates the standard deviation of the elements in the input vector.
The float implementation is relying on arm_var_f32 which is using a two-pass algorithm
to avoid problem of numerical instabilities and cancellation errors.
Fixed point versions are using the standard textbook algorithm since the fixed point
numerical behavior is different from the float one.
Algorithm for fixed point versions is summarized below:
<pre>
Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
</pre>
There are separate functions for floating point, Q31, and Q15 data types.
*/
/**
@addtogroup STD
@{
*/
/**
@brief Standard deviation of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult standard deviation value returned here
@return none
*/
void arm_std_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t var;
arm_var_f32(pSrc,blockSize,&var);
arm_sqrt_f32(var, pResult);
}
/**
@} end of STD group
*/

View File

@@ -0,0 +1,59 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_std_f64.c
* Description: Standard deviation of the elements of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup STD
@{
*/
/**
@brief Standard deviation of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult standard deviation value returned here
@return none
*/
void arm_std_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult)
{
float64_t var;
arm_var_f64(pSrc,blockSize,&var);
*pResult = sqrt(var);
}
/**
@} end of STD group
*/

View File

@@ -0,0 +1,173 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_std_q15.c
* Description: Standard deviation of an array of Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup STD
@{
*/
/**
@brief Standard deviation of the elements of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult standard deviation value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 64-bit internal accumulator.
The input is represented in 1.15 format.
Intermediate multiplication yields a 2.30 format, and this
result is added without saturation to a 64-bit accumulator in 34.30 format.
With 33 guard bits in the accumulator, there is no risk of overflow, and the
full precision of the intermediate multiplication is preserved.
Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
15 bits, and then saturated to yield a result in 1.15 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_std_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t var=0;
arm_var_q15(pSrc, blockSize, &var);
arm_sqrt_q15(var,pResult);
}
#else
void arm_std_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q31_t sum = 0; /* Accumulator */
q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
q63_t sumOfSquares = 0; /* Sum of squares */
q15_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in32; /* Temporary variable to store input value */
#endif
if (blockSize <= 1U)
{
*pResult = 0;
return;
}
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
/* Compute sum and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
in32 = read_q15x2_ia (&pSrc);
sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
sum += ((in32 << 16U) >> 16U);
sum += (in32 >> 16U);
in32 = read_q15x2_ia (&pSrc);
sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
sum += ((in32 << 16U) >> 16U);
sum += (in32 >> 16U);
#else
in = *pSrc++;
sumOfSquares += (in * in);
sum += in;
in = *pSrc++;
sumOfSquares += (in * in);
sum += in;
in = *pSrc++;
sumOfSquares += (in * in);
sum += in;
in = *pSrc++;
sumOfSquares += (in * in);
sum += in;
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
sumOfSquares += (in * in);
/* Compute sum and store result in a temporary variable, sum. */
sum += in;
/* Decrement loop counter */
blkCnt--;
}
/* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
/* Compute square of mean */
squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
/* mean of squares minus the square of mean. */
/* Compute standard deviation and store result in destination */
arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of STD group
*/

View File

@@ -0,0 +1,159 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_std_q31.c
* Description: Standard deviation of the elements of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup STD
@{
*/
/**
@brief Standard deviation of the elements of a Q31 vector.
@param[in] pSrc points to the input vector.
@param[in] blockSize number of samples in input vector.
@param[out] pResult standard deviation value returned here.
@return none
@par Scaling and Overflow Behavior
The function is implemented using an internal 64-bit accumulator.
The input is represented in 1.31 format, which is then downshifted by 8 bits
which yields 1.23, and intermediate multiplication yields a 2.46 format.
The accumulator maintains full precision of the intermediate multiplication results,
but provides only a 16 guard bits.
There is no saturation on intermediate additions.
If the accumulator overflows it wraps around and distorts the result.
In order to avoid overflows completely the input signal must be scaled down by
log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
After division, internal variables should be Q18.46
Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_std_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t var=0;
arm_var_q31(pSrc, blockSize, &var);
arm_sqrt_q31(var, pResult);
}
#else
void arm_std_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Accumulator */
q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
q63_t sumOfSquares = 0; /* Sum of squares */
q31_t in; /* Temporary variable to store input value */
if (blockSize <= 1U)
{
*pResult = 0;
return;
}
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
in = *pSrc++ >> 8U;
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
sumOfSquares += ((q63_t) (in) * (in));
/* Compute sum and store result in a temporary variable, sum. */
sum += in;
in = *pSrc++ >> 8U;
sumOfSquares += ((q63_t) (in) * (in));
sum += in;
in = *pSrc++ >> 8U;
sumOfSquares += ((q63_t) (in) * (in));
sum += in;
in = *pSrc++ >> 8U;
sumOfSquares += ((q63_t) (in) * (in));
sum += in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
in = *pSrc++ >> 8U;
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
sumOfSquares += ((q63_t) (in) * (in));
/* Compute sum and store result in a temporary variable, sum. */
sum += in;
/* Decrement loop counter */
blkCnt--;
}
/* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
/* Compute square of mean */
squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
/* Compute standard deviation and store result in destination */
arm_sqrt_q31((meanOfSquares - squareOfMean) >> 15U, pResult);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of STD group
*/

View File

@@ -0,0 +1,209 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_var_f16.c
* Description: Variance of the elements of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupStats
*/
/**
@addtogroup variance
@{
*/
/**
@brief Variance of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult variance value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_var_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
int32_t blkCnt; /* loop counters */
f16x8_t vecSrc;
f16x8_t sumVec = vdupq_n_f16(0.0f16);
float16_t fMean;
if (blockSize <= 1U) {
*pResult = 0;
return;
}
arm_mean_f16(pSrc, blockSize, &fMean);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
/*
* sum lanes
*/
vecSrc = vsubq_m(vuninitializedq_f16(), vecSrc, fMean, p);
sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p);
blkCnt -= 8;
pSrc += 8;
}
while (blkCnt > 0);
/* Variance */
*pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) (blockSize - 1.0f16);
}
#else
void arm_var_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
_Float16 sum = 0.0f; /* Temporary result storage */
_Float16 fSum = 0.0f;
_Float16 fMean, fValue;
const float16_t * pInput = pSrc;
if (blockSize <= 1U)
{
*pResult = 0;
return;
}
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += (_Float16)*pInput++;
sum += (_Float16)*pInput++;
sum += (_Float16)*pInput++;
sum += (_Float16)*pInput++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += (_Float16)*pInput++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
fMean = (_Float16)sum / (_Float16) blockSize;
pInput = pSrc;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
fValue = (_Float16)*pInput++ - (_Float16)fMean;
fSum += (_Float16)fValue * (_Float16)fValue;
fValue = (_Float16)*pInput++ - (_Float16)fMean;
fSum += (_Float16)fValue * (_Float16)fValue;
fValue = (_Float16)*pInput++ - (_Float16)fMean;
fSum += (_Float16)fValue * (_Float16)fValue;
fValue = (_Float16)*pInput++ - (_Float16)fMean;
fSum += (_Float16)fValue * (_Float16)fValue;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
fValue = (_Float16)*pInput++ - (_Float16)fMean;
fSum += (_Float16)fValue * (_Float16)fValue;
/* Decrement loop counter */
blkCnt--;
}
/* Variance */
*pResult = (_Float16)fSum / ((_Float16)blockSize - 1.0f16);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of variance group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,293 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_var_f32.c
* Description: Variance of the elements of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@defgroup variance Variance
Calculates the variance of the elements in the input vector.
The underlying algorithm used is the direct method sometimes referred to as the two-pass method:
<pre>
Result = sum(element - meanOfElements)^2) / numElement - 1
meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
</pre>
There are separate functions for floating point, Q31, and Q15 data types.
*/
/**
@addtogroup variance
@{
*/
/**
@brief Variance of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult variance value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_var_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
uint32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
f32x4_t sumVec = vdupq_n_f32(0.0f);
float32_t fMean;
float32_t sum = 0.0f; /* accumulator */
float32_t in; /* Temporary variable to store input value */
if (blockSize <= 1U) {
*pResult = 0;
return;
}
arm_mean_f32(pSrc, blockSize, &fMean);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrc);
/*
* sum lanes
*/
vecSrc = vsubq(vecSrc, fMean);
sumVec = vfmaq(sumVec, vecSrc, vecSrc);
blkCnt --;
pSrc += 4;
}
sum = vecAddAcrossF32Mve(sumVec);
/*
* tail
*/
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
in = *pSrc++ - fMean;
sum += in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Variance */
*pResult = sum / (float32_t) (blockSize - 1);
}
#else
#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_var_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t mean;
float32_t sum = 0.0f; /* accumulator */
float32_t in; /* Temporary variable to store input value */
uint32_t blkCnt; /* loop counter */
float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
float32x2_t sumV2;
float32x4_t inV;
float32x4_t avg;
arm_mean_f32(pSrc,blockSize,&mean);
avg = vdupq_n_f32(mean);
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
inV = vld1q_f32(pSrc);
inV = vsubq_f32(inV, avg);
sumV = vmlaq_f32(sumV, inV, inV);
pSrc += 4;
/* Decrement the loop counter */
blkCnt--;
}
sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* compute power and then store the result in a temporary variable, sum. */
in = *pSrc++;
in = in - mean;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* Variance */
*pResult = sum / (float32_t)(blockSize - 1.0f);
}
#else
void arm_var_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float32_t sum = 0.0f; /* Temporary result storage */
float32_t fSum = 0.0f;
float32_t fMean, fValue;
const float32_t * pInput = pSrc;
if (blockSize <= 1U)
{
*pResult = 0;
return;
}
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pInput++;
sum += *pInput++;
sum += *pInput++;
sum += *pInput++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pInput++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
fMean = sum / (float32_t) blockSize;
pInput = pSrc;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
/* Decrement loop counter */
blkCnt--;
}
/* Variance */
*pResult = fSum / (float32_t)(blockSize - 1.0f);
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of variance group
*/

View File

@@ -0,0 +1,100 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_var_f64.c
* Description: Variance of the elements of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup variance
@{
*/
/**
@brief Variance of the elements of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult variance value returned here
@return none
*/
void arm_var_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float64_t sum = 0.; /* Temporary result storage */
float64_t fSum = 0.;
float64_t fMean, fValue;
const float64_t * pInput = pSrc;
if (blockSize <= 1U)
{
*pResult = 0;
return;
}
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pInput++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
fMean = sum / (float64_t) blockSize;
pInput = pSrc;
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0U)
{
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
/* Decrement loop counter */
blkCnt--;
}
/* Variance */
*pResult = fSum / (float64_t)(blockSize - 1.);
}
/**
@} end of variance group
*/

View File

@@ -0,0 +1,230 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_var_q15.c
* Description: Variance of an array of Q15 type
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup variance
@{
*/
/**
@brief Variance of the elements of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult variance value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 64-bit internal accumulator.
The input is represented in 1.15 format.
Intermediate multiplication yields a 2.30 format, and this
result is added without saturation to a 64-bit accumulator in 34.30 format.
With 33 guard bits in the accumulator, there is no risk of overflow, and the
full precision of the intermediate multiplication is preserved.
Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
15 bits, and then saturated to yield a result in 1.15 format.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_var_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q63_t sumOfSquares = 0LL;
q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
q63_t sum = 0LL;
q15_t in;
if (blockSize <= 1U) {
*pResult = 0;
return;
}
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vldrhq_s16(pSrc);
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */
sumOfSquares = vmlaldavaq_s16(sumOfSquares, vecSrc, vecSrc);
sum = vaddvaq_s16(sum, vecSrc);
blkCnt --;
pSrc += 8;
}
/* Tail */
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
#if defined (ARM_MATH_DSP)
sumOfSquares = __SMLALD(in, in, sumOfSquares);
#else
sumOfSquares += (in * in);
#endif /* #if defined (ARM_MATH_DSP) */
/* Compute sum and store result in a temporary variable, sum. */
sum += in;
/* Decrement loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
meanOfSquares = arm_div_q63_to_q31(sumOfSquares, (blockSize - 1U));
/* Compute square of mean */
squareOfMean = arm_div_q63_to_q31((q63_t)sum * sum, (q31_t)(blockSize * (blockSize - 1U)));
/* mean of the squares minus the square of the mean. */
*pResult = (meanOfSquares - squareOfMean) >> 15;
}
#else
void arm_var_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q31_t sum = 0; /* Accumulator */
q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
q63_t sumOfSquares = 0; /* Sum of squares */
q15_t in; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in32; /* Temporary variable to store input value */
#endif
if (blockSize <= 1U)
{
*pResult = 0;
return;
}
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
/* Compute sum and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
in32 = read_q15x2_ia (&pSrc);
sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
sum += ((in32 << 16U) >> 16U);
sum += (in32 >> 16U);
in32 = read_q15x2_ia (&pSrc);
sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
sum += ((in32 << 16U) >> 16U);
sum += (in32 >> 16U);
#else
in = *pSrc++;
sumOfSquares += (in * in);
sum += in;
in = *pSrc++;
sumOfSquares += (in * in);
sum += in;
in = *pSrc++;
sumOfSquares += (in * in);
sum += in;
in = *pSrc++;
sumOfSquares += (in * in);
sum += in;
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
in = *pSrc++;
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
#if defined (ARM_MATH_DSP)
sumOfSquares = __SMLALD(in, in, sumOfSquares);
#else
sumOfSquares += (in * in);
#endif /* #if defined (ARM_MATH_DSP) */
/* Compute sum and store result in a temporary variable, sum. */
sum += in;
/* Decrement loop counter */
blkCnt--;
}
/* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
/* Compute square of mean */
squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
/* mean of squares minus the square of mean. */
*pResult = (meanOfSquares - squareOfMean) >> 15U;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of variance group
*/

View File

@@ -0,0 +1,214 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_var_q31.c
* Description: Variance of an array of Q31 type
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup variance
@{
*/
/**
@brief Variance of the elements of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult variance value returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using an internal 64-bit accumulator.
The input is represented in 1.31 format, which is then downshifted by 8 bits
which yields 1.23, and intermediate multiplication yields a 2.46 format.
The accumulator maintains full precision of the intermediate multiplication results,
and as a consequence has only 16 guard bits.
There is no saturation on intermediate additions.
If the accumulator overflows it wraps around and distorts the result.
In order to avoid overflows completely the input signal must be scaled down by
log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
After division, internal variables should be Q18.46
Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_var_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q63_t sumOfSquares = 0LL;
q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
q63_t sum = 0LL;
q31_t in;
if (blockSize <= 1U) {
*pResult = 0;
return;
}
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_s32(pSrc);
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */
/* downscale */
vecSrc = vshrq(vecSrc, 8);
sumOfSquares = vmlaldavaq(sumOfSquares, vecSrc, vecSrc);
sum = vaddlvaq(sum, vecSrc);
blkCnt --;
pSrc += 4;
}
/*
* tail
*/
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
in = *pSrc++ >> 8U;
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
sumOfSquares += ((q63_t) (in) * (in));
/* Compute sum and store result in a temporary variable, sum. */
sum += in;
/* Decrement loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
meanOfSquares = sumOfSquares / (q63_t) (blockSize - 1U);
/* Compute square of mean */
squareOfMean = sum * sum / (q63_t) (blockSize * (blockSize - 1U));
/* Compute standard deviation and then store the result to the destination */
*pResult = asrl(meanOfSquares - squareOfMean, 15U);
}
#else
void arm_var_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
q63_t sumOfSquares = 0; /* Sum of squares */
q31_t in; /* Temporary variable to store input value */
if (blockSize <= 1U)
{
*pResult = 0;
return;
}
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
in = *pSrc++ >> 8U;
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
sumOfSquares += ((q63_t) (in) * (in));
/* Compute sum and store result in a temporary variable, sum. */
sum += in;
in = *pSrc++ >> 8U;
sumOfSquares += ((q63_t) (in) * (in));
sum += in;
in = *pSrc++ >> 8U;
sumOfSquares += ((q63_t) (in) * (in));
sum += in;
in = *pSrc++ >> 8U;
sumOfSquares += ((q63_t) (in) * (in));
sum += in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* C = A[0] + A[1] + ... + A[blockSize-1] */
in = *pSrc++ >> 8U;
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
sumOfSquares += ((q63_t) (in) * (in));
/* Compute sum and store result in a temporary variable, sum. */
sum += in;
/* Decrement loop counter */
blkCnt--;
}
/* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
/* Compute square of mean */
squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
/* Compute variance and store result in destination */
*pResult = (meanOfSquares - squareOfMean) >> 15U;
}
#endif
/**
@} end of variance group
*/