Skip to content

Commit

Permalink
AP_HAL_ChibiOS: hardware M4-Cortex and M7-Cortex (and H7) implementat…
Browse files Browse the repository at this point in the history
…ion of HAL FFT abstraction

implements an FFT engine based on the betaflight feature using ARM hardware accelerated CMSIS library
make the FFT feature optional
add dynamic gyro windows
add quinns and candans estimators and record in DSP state
disable DSP for boards with limited flash
calculate power spectrum rather than amplitude
start/analyse version of analysis to support threading
allocate memory in a specific region
constrain window size by CPU class
control inclusion of DSP based on board size
  • Loading branch information
andyp1per authored and tridge committed Feb 22, 2020
1 parent f4a99a1 commit 3d0cf7e
Show file tree
Hide file tree
Showing 12 changed files with 425 additions and 3 deletions.
1 change: 1 addition & 0 deletions libraries/AP_HAL_ChibiOS/AP_HAL_ChibiOS_Namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ namespace ChibiOS {
class AnalogIn;
class AnalogSource;
class DigitalSource;
class DSP;
class GPIO;
class I2CBus;
class I2CDevice;
Expand Down
1 change: 1 addition & 0 deletions libraries/AP_HAL_ChibiOS/AP_HAL_ChibiOS_Private.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
#include "RCOutput.h"
#include "I2CDevice.h"
#include "Flash.h"
#include "DSP.h"
307 changes: 307 additions & 0 deletions libraries/AP_HAL_ChibiOS/DSP.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
/*
* This file is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Code by Andy Piper and the betaflight team
*/

#include "AP_HAL_ChibiOS.h"

#if HAL_WITH_DSP

#include <AP_HAL/AP_HAL.h>
#include <AP_Math/AP_Math.h>
#include <GCS_MAVLink/GCS.h>
#include "DSP.h"
#include <cmath>

using namespace ChibiOS;

#if DEBUG_FFT
#define TIMER_START(timer) \
void *istate = hal.scheduler->disable_interrupts_save(); \
uint32_t timer##now = AP_HAL::micros()
#define TIMER_END(timer) timer.time(timer##now); \
hal.scheduler->restore_interrupts(istate)
#else
#define TIMER_START(timer)
#define TIMER_END(timer)
#endif

#define TICK_CYCLE 10

extern const AP_HAL::HAL& hal;

// The algorithms originally came from betaflight but are now substantially modified based on theory and experiment.
// https://holometer.fnal.gov/GH_FFT.pdf "Spectrum and spectral density estimation by the Discrete Fourier transform (DFT),
// including a comprehensive list of window functions and some new flat-top windows." - Heinzel et. al is a great reference
// for understanding the underlying theory although we do not use spectral density here since time resolution is equally
// important as frequency resolution. Referred to as [Heinz] throughout the code.

// initialize the FFT state machine
AP_HAL::DSP::FFTWindowState* DSP::fft_init(uint16_t window_size, uint16_t sample_rate)
{
DSP::FFTWindowStateARM* fft = new DSP::FFTWindowStateARM(window_size, sample_rate);
if (fft->_hanning_window == nullptr || fft->_rfft_data == nullptr || fft->_freq_bins == nullptr) {
delete fft;
return nullptr;
}
return fft;
}

// start an FFT analysis
void DSP::fft_start(AP_HAL::DSP::FFTWindowState* state, const float* samples, uint16_t buffer_index, uint16_t buffer_size)
{
step_hanning((FFTWindowStateARM*)state, samples, buffer_index, buffer_size);
}

// perform remaining steps of an FFT analysis
uint16_t DSP::fft_analyse(AP_HAL::DSP::FFTWindowState* state, uint16_t start_bin, uint16_t end_bin, uint8_t harmonics, float noise_att_cutoff)
{
FFTWindowStateARM* fft = (FFTWindowStateARM*)state;
step_arm_cfft_f32(fft);
step_bitreversal(fft);
step_stage_rfft_f32(fft);
step_arm_cmplx_mag_f32(fft, start_bin, end_bin, harmonics, noise_att_cutoff);
return step_calc_frequencies_f32(fft, start_bin, end_bin);
}

// create an instance of the FFT state machine
DSP::FFTWindowStateARM::FFTWindowStateARM(uint16_t window_size, uint16_t sample_rate)
: AP_HAL::DSP::FFTWindowState::FFTWindowState(window_size, sample_rate)
{
if (_freq_bins == nullptr || _hanning_window == nullptr || _rfft_data == nullptr) {
gcs().send_text(MAV_SEVERITY_WARNING, "Failed to allocate %u bytes for window %u for DSP",
unsigned(sizeof(float) * (window_size * 3 + 2)), unsigned(window_size));
return;
}

// initialize the ARM data structure.
// it's important not to use arm_rfft_fast_init_f32() as this links all of the twiddle tables
// by being selective we save 70k in text space

switch (window_size) {
case 32:
arm_rfft_32_fast_init_f32(&_fft_instance);
break;
case 64:
arm_rfft_64_fast_init_f32(&_fft_instance);
break;
case 128:
arm_rfft_128_fast_init_f32(&_fft_instance);
break;
case 256:
arm_rfft_256_fast_init_f32(&_fft_instance);
break;
#if defined(STM32H7)
// Don't pull in the larger FFT tables unless we have to
case 512:
arm_rfft_512_fast_init_f32(&_fft_instance);
break;
case 1024:
arm_rfft_1024_fast_init_f32(&_fft_instance);
break;
#endif
}
}

DSP::FFTWindowStateARM::~FFTWindowStateARM()
{
}

extern "C" {
void stage_rfft_f32(arm_rfft_fast_instance_f32 *S, float32_t *p, float32_t *pOut);
void arm_cfft_radix8by2_f32(arm_cfft_instance_f32 *S, float32_t *p1);
void arm_cfft_radix8by4_f32(arm_cfft_instance_f32 *S, float32_t *p1);
void arm_radix8_butterfly_f32(float32_t *pSrc, uint16_t fftLen, const float32_t *pCoef, uint16_t twidCoefModifier);
void arm_bitreversal_32(uint32_t *pSrc, const uint16_t bitRevLen, const uint16_t *pBitRevTable);
}

// step 1: filter the incoming samples through a Hanning window
void DSP::step_hanning(FFTWindowStateARM* fft, const float* samples, uint16_t buffer_index, uint16_t buffer_size)
{
TIMER_START(_hanning_timer);
// 5us
// apply hanning window to gyro samples and store result in _freq_bins
// hanning starts and ends with 0, could be skipped for minor speed improvement
const uint16_t ring_buf_idx = MIN(buffer_size - buffer_index, fft->_window_size);
arm_mult_f32(&samples[buffer_index], &fft->_hanning_window[0], &fft->_freq_bins[0], ring_buf_idx);
if (buffer_index > 0) {
arm_mult_f32(&samples[0], &fft->_hanning_window[ring_buf_idx], &fft->_freq_bins[ring_buf_idx], fft->_window_size - ring_buf_idx);
}

TIMER_END(_hanning_timer);
}

// step 2: guts of complex fft processing
void DSP::step_arm_cfft_f32(FFTWindowStateARM* fft)
{
arm_cfft_instance_f32 *Sint = &(fft->_fft_instance.Sint);
Sint->fftLen = fft->_fft_instance.fftLenRFFT / 2;

TIMER_START(_arm_cfft_f32_timer);

switch (fft->_bin_count) {
case 16: // window 32
// 16us (BF)
// 5us F7, 7us F4, 8us H7
case 128: // window 256
// 37us F7, 81us F4, 17us H7
arm_cfft_radix8by2_f32(Sint, fft->_freq_bins);
break;
case 32: // window 64
// 35us (BF)
// 10us F7, 24us F4
case 256: // window 512
// 66us F7, 174us F4, 37us H7
arm_cfft_radix8by4_f32(Sint, fft->_freq_bins);
break;
case 64: // window 128
// 70us BF
// 21us F7, 34us F4
case 512: // window 1024
// 152us F7, 73us H7
arm_radix8_butterfly_f32(fft->_freq_bins, fft->_bin_count, Sint->pTwiddle, 1);
break;
}

TIMER_END(_arm_cfft_f32_timer);
}

// step 3: reverse the bits of the output
void DSP::step_bitreversal(FFTWindowStateARM* fft)
{
TIMER_START(_bitreversal_timer);
// 6us (BF)
// 32 - 2us F7, 3us F4, 1us H7
// 64 - 3us F7, 6us F4
// 128 - 4us F7, 9us F4
// 256 - 10us F7, 20us F4, 5us H7
// 512 - 22us F7, 54us F4, 15us H7
// 1024 - 42us F7, 15us H7
arm_bitreversal_32((uint32_t *)fft->_freq_bins, fft->_fft_instance.Sint.bitRevLength, fft->_fft_instance.Sint.pBitRevTable);

TIMER_END(_bitreversal_timer);
}

// step 4: convert from complex to real data
void DSP::step_stage_rfft_f32(FFTWindowStateARM* fft)
{
TIMER_START(_stage_rfft_f32_timer);
// 14us (BF)
// 32 - 2us F7, 5us F4, 2us H7
// 64 - 5us F7, 16us F4
// 128 - 17us F7, 26us F4
// 256 - 21us F7, 70us F4, 9us H7
// 512 - 35us F7, 71us F4, 17us H7
// 1024 - 76us F7, 33us H7
// this does not work in place => _freq_bins AND _rfft_data needed
stage_rfft_f32(&fft->_fft_instance, fft->_freq_bins, fft->_rfft_data);

TIMER_END(_stage_rfft_f32_timer);
}

// step 5: find the magnitudes of the complex data
void DSP::step_arm_cmplx_mag_f32(FFTWindowStateARM* fft, uint16_t start_bin, uint16_t end_bin, uint8_t harmonics, float noise_att_cutoff)
{
TIMER_START(_arm_cmplx_mag_f32_timer);
// 8us (BF)
// 32 - 4us F7, 5us F4, 5us H7
// 64 - 7us F7, 13us F4
// 128 - 14us F7, 17us F4
// 256 - 29us F7, 28us F4, 7us H7
// 512 - 55us F7, 93us F4, 13us H7
// 1024 - 131us F7, 25us H7
// General case for the magnitudes - see https://stackoverflow.com/questions/42299932/dsp-libraries-rfft-strange-results
// The frequency of each of those frequency components are given by k*fs/N

arm_cmplx_mag_squared_f32(&fft->_rfft_data[2], &fft->_freq_bins[1], fft->_bin_count - 1);
fft->_freq_bins[0] = sq(fft->_rfft_data[0]); // DC
fft->_freq_bins[fft->_bin_count] = sq(fft->_rfft_data[1]); // Nyquist
fft->_rfft_data[fft->_window_size] = fft->_rfft_data[1]; // Nyquist for the interpolator
fft->_rfft_data[fft->_window_size + 1] = 0;

step_cmplx_mag(fft, start_bin, end_bin, harmonics, noise_att_cutoff);

TIMER_END(_arm_cmplx_mag_f32_timer);
}

// step 6: find the bin with the highest energy and interpolate the required frequency
uint16_t DSP::step_calc_frequencies_f32(FFTWindowStateARM* fft, uint16_t start_bin, uint16_t end_bin)
{
TIMER_START(_step_calc_frequencies);
// 4us H7

step_calc_frequencies(fft, start_bin, end_bin);

TIMER_END(_step_calc_frequencies);

#if DEBUG_FFT
_output_count++;
// outputs at approx 1hz
if (_output_count % 400 == 0) {
gcs().send_text(MAV_SEVERITY_WARNING, "FFT(us): t1:%lu,t2:%lu,t3:%lu,t4:%lu,t5:%lu,t6:%lu",
_hanning_timer._timer_avg, _arm_cfft_f32_timer._timer_avg, _bitreversal_timer._timer_avg, _stage_rfft_f32_timer._timer_avg, _arm_cmplx_mag_f32_timer._timer_avg, _step_calc_frequencies._timer_avg);
}
#endif

return fft->_max_energy_bin;
}

static const float PI_N = M_PI / 32.0f;
static const float CANDAN_FACTOR = tanf(PI_N) / PI_N;

// Interpolate center frequency using http://users.metu.edu.tr/ccandan//pub_dir/FineDopplerEst_IEEE_SPL_June2011.pdf
// This is slightly less accurate than Quinn, but much cheaper to calculate
float DSP::calculate_candans_estimator(const FFTWindowStateARM* fft, uint16_t k_max) const
{
if (k_max <= 1 || k_max == fft->_bin_count) {
return 0.0f;
}

const uint16_t k_m1 = (k_max - 1) * 2;
const uint16_t k_p1 = (k_max + 1) * 2;
const uint16_t k = k_max * 2;

const float npr = fft->_rfft_data[k_m1] - fft->_rfft_data[k_p1];
const float npc = fft->_rfft_data[k_m1 + 1] - fft->_rfft_data[k_p1 + 1];
const float dpr = 2.0f * fft->_rfft_data[k] - fft->_rfft_data[k_m1] - fft->_rfft_data[k_p1];
const float dpc = 2.0f * fft->_rfft_data[k + 1] - fft->_rfft_data[k_m1 + 1] - fft->_rfft_data[k_p1 + 1];

const float realn = npr * dpr + npc * dpc;
const float reald = dpr * dpr + dpc * dpc;

// sanity check
if (is_zero(reald)) {
return 0.0f;
}

float d = CANDAN_FACTOR * (realn / reald);

// -0.5 < d < 0.5 which is the fraction of the sample spacing about the center element
return constrain_float(d, -0.5f, 0.5f);
}

#if DEBUG_FFT
void DSP::StepTimer::time(uint32_t start)
{
_timer_total += (AP_HAL::micros() - start);
_time_ticks = (_time_ticks + 1) % TICK_CYCLE;
if (_time_ticks == 0) {
_timer_avg = _timer_total / TICK_CYCLE;
_timer_total = 0;
}
}
#endif

#endif
Loading

0 comments on commit 3d0cf7e

Please sign in to comment.