You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/runtime/engine/common/utils/audio_process.cc

83 lines
2.6 KiB

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "utils/audio_process.h"
namespace ppspeech{
int WaveformFloatNormal(std::vector<float>* waveform) {
int tot_samples = waveform->size();
for (int i = 0; i < tot_samples; i++) {
(*waveform)[i] = (*waveform)[i] / 32768.0;
}
return 0;
}
int WaveformNormal(std::vector<float>* waveform,
bool wav_normal,
const std::string& wav_normal_type,
float wav_norm_mul_factor) {
if (wav_normal == false) {
return 0;
}
if (wav_normal_type == "linear") {
float amax = INT32_MIN;
for (int i = 0; i < waveform->size(); ++i) {
float tmp = std::abs((*waveform)[i]);
amax = std::max(amax, tmp);
}
float factor = 1.0 / (amax + 1e-8);
for (int i = 0; i < waveform->size(); ++i) {
(*waveform)[i] = (*waveform)[i] * factor * wav_norm_mul_factor;
}
} else if (wav_normal_type == "gaussian") {
double sum = std::accumulate(waveform->begin(), waveform->end(), 0.0);
double mean = sum / waveform->size(); //均值
double accum = 0.0;
std::for_each(waveform->begin(), waveform->end(), [&](const double d) {
accum += (d - mean) * (d - mean);
});
double stdev = sqrt(accum / (waveform->size() - 1)); //方差
stdev = std::max(stdev, 1e-8);
for (int i = 0; i < waveform->size(); ++i) {
(*waveform)[i] =
wav_norm_mul_factor * ((*waveform)[i] - mean) / stdev;
}
} else {
printf("don't support\n");
return -1;
}
return 0;
}
float PowerTodb(float in, float ref_value, float amin, float top_db) {
if (amin <= 0) {
printf("amin must be strictly positive\n");
return -1;
}
if (ref_value <= 0) {
printf("ref_value must be strictly positive\n");
return -1;
}
float out = 10.0 * log10(std::max(amin, in));
out -= 10.0 * log10(std::max(ref_value, amin));
return out;
}
} // namespace ppspeech