parent
a16a044fef
commit
7ae36b219d
@ -0,0 +1,15 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from . import fbank
|
||||
from . import pitch
|
@ -0,0 +1,103 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from paddlespeech.audio._internal import module_utils
|
||||
import paddlespeech.audio.ops.paddleaudio.ComputeFbank as ComputeFbank
|
||||
import paddlespeech.audio.ops.paddleaudio.ComputeKaldiPitch as ComputeKaldiPitch
|
||||
|
||||
__all__ = [
|
||||
'fbank',
|
||||
'pitch',
|
||||
]
|
||||
|
||||
@module_utils.requires_kaldi()
|
||||
def fbank(wav,
|
||||
samp_freq: int=16000,
|
||||
frame_shift_ms: float=10.0,
|
||||
frame_length_ms: float=25.0,
|
||||
dither: float=0.0,
|
||||
preemph_coeff: float=0.97,
|
||||
remove_dc_offset: bool=True,
|
||||
window_type: str='povey',
|
||||
round_to_power_of_two: bool=True,
|
||||
blackman_coeff: float=0.42,
|
||||
snip_edges: bool=True,
|
||||
allow_downsample: bool=False,
|
||||
allow_upsample: bool=False,
|
||||
max_feature_vectors: int=-1,
|
||||
num_bins: int=23,
|
||||
low_freq: float=20,
|
||||
high_freq: float=0,
|
||||
vtln_low: float=100,
|
||||
vtln_high: float=-500,
|
||||
debug_mel: bool=False,
|
||||
htk_mode: bool=False,
|
||||
use_energy: bool=False, # fbank opts
|
||||
energy_floor: float=0.0,
|
||||
raw_energy: bool=True,
|
||||
htk_compat: bool=False,
|
||||
use_log_fbank: bool=True,
|
||||
use_power: bool=True):
|
||||
feat = ComputeFbank(
|
||||
samp_freq, frame_shift_ms, frame_length_ms,
|
||||
dither, preemph_coeff, remove_dc_offset,
|
||||
window_type, round_to_power_of_two, blackman_coeff,
|
||||
snip_edges, allow_downsample, allow_upsample,
|
||||
max_feature_vectors, num_bins, low_freq,
|
||||
high_freq, vtln_low, vtln_high, debug_mel,
|
||||
htk_mode, use_energy, energy_floor,
|
||||
raw_energy, htk_compat, use_log_fbank, use_power, wav)
|
||||
return feat
|
||||
|
||||
@module_utils.requires_kaldi()
|
||||
def pitch(wav,
|
||||
samp_freq: int=16000,
|
||||
frame_shift_ms: float=10.0,
|
||||
frame_length_ms: float=25.0,
|
||||
preemph_coeff: float=0.0,
|
||||
min_f0: int=50,
|
||||
max_f0: int=400,
|
||||
soft_min_f0: float=10.0,
|
||||
penalty_factor: float=0.1,
|
||||
lowpass_cutoff: int=1000,
|
||||
resample_freq: int=4000,
|
||||
delta_pitch: float=0.005,
|
||||
nccf_ballast: int=7000,
|
||||
lowpass_filter_width: int=1,
|
||||
upsample_filter_width: int=5,
|
||||
max_frames_latency: int=0,
|
||||
frames_per_chunk: int=0,
|
||||
simulate_first_pass_online: bool=False,
|
||||
recompute_frame: int=500,
|
||||
nccf_ballast_online: bool=False,
|
||||
snip_edges: bool=True):
|
||||
pitch = ComputeKaldiPitch(samp_freq, frame_shift_ms,
|
||||
frame_length_ms,
|
||||
preemph_coeff,
|
||||
min_f0,
|
||||
max_f0,
|
||||
soft_min_f0,
|
||||
penalty_factor,
|
||||
lowpass_cutoff,
|
||||
resample_freq,
|
||||
delta_pitch,
|
||||
nccf_ballast,
|
||||
lowpass_filter_width,
|
||||
upsample_filter_width,
|
||||
max_frames_latency,
|
||||
frames_per_chunk,
|
||||
simulate_first_pass_online,
|
||||
recompute_frame,
|
||||
nccf_ballast_online,
|
||||
snip_edges, wav)
|
||||
return pitch
|
@ -0,0 +1,125 @@
|
||||
// itf/online-feature-itf.h
|
||||
|
||||
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef KALDI_ITF_ONLINE_FEATURE_ITF_H_
|
||||
#define KALDI_ITF_ONLINE_FEATURE_ITF_H_ 1
|
||||
#include "base/kaldi-common.h"
|
||||
#include "matrix/matrix-lib.h"
|
||||
|
||||
namespace kaldi {
|
||||
/// @ingroup Interfaces
|
||||
/// @{
|
||||
|
||||
/**
|
||||
OnlineFeatureInterface is an interface for online feature processing (it is
|
||||
also usable in the offline setting, but currently we're not using it for
|
||||
that). This is for use in the online2/ directory, and it supersedes the
|
||||
interface in ../online/online-feat-input.h. We have a slightly different
|
||||
model that puts more control in the hands of the calling thread, and won't
|
||||
involve waiting on semaphores in the decoding thread.
|
||||
|
||||
This interface only specifies how the object *outputs* the features.
|
||||
How it obtains the features, e.g. from a previous object or objects of type
|
||||
OnlineFeatureInterface, is not specified in the interface and you will
|
||||
likely define new constructors or methods in the derived type to do that.
|
||||
|
||||
You should appreciate that this interface is designed to allow random
|
||||
access to features, as long as they are ready. That is, the user
|
||||
can call GetFrame for any frame less than NumFramesReady(), and when
|
||||
implementing a child class you must not make assumptions about the
|
||||
order in which the user makes these calls.
|
||||
*/
|
||||
|
||||
class OnlineFeatureInterface {
|
||||
public:
|
||||
virtual int32 Dim() const = 0; /// returns the feature dimension.
|
||||
|
||||
/// Returns the total number of frames, since the start of the utterance, that
|
||||
/// are now available. In an online-decoding context, this will likely
|
||||
/// increase with time as more data becomes available.
|
||||
virtual int32 NumFramesReady() const = 0;
|
||||
|
||||
/// Returns true if this is the last frame. Frame indices are zero-based, so the
|
||||
/// first frame is zero. IsLastFrame(-1) will return false, unless the file
|
||||
/// is empty (which is a case that I'm not sure all the code will handle, so
|
||||
/// be careful). This function may return false for some frame if
|
||||
/// we haven't yet decided to terminate decoding, but later true if we decide
|
||||
/// to terminate decoding. This function exists mainly to correctly handle
|
||||
/// end effects in feature extraction, and is not a mechanism to determine how
|
||||
/// many frames are in the decodable object (as it used to be, and for backward
|
||||
/// compatibility, still is, in the Decodable interface).
|
||||
virtual bool IsLastFrame(int32 frame) const = 0;
|
||||
|
||||
/// Gets the feature vector for this frame. Before calling this for a given
|
||||
/// frame, it is assumed that you called NumFramesReady() and it returned a
|
||||
/// number greater than "frame". Otherwise this call will likely crash with
|
||||
/// an assert failure. This function is not declared const, in case there is
|
||||
/// some kind of caching going on, but most of the time it shouldn't modify
|
||||
/// the class.
|
||||
virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat) = 0;
|
||||
|
||||
|
||||
/// This is like GetFrame() but for a collection of frames. There is a
|
||||
/// default implementation that just gets the frames one by one, but it
|
||||
/// may be overridden for efficiency by child classes (since sometimes
|
||||
/// it's more efficient to do things in a batch).
|
||||
virtual void GetFrames(const std::vector<int32> &frames,
|
||||
MatrixBase<BaseFloat> *feats) {
|
||||
KALDI_ASSERT(static_cast<int32>(frames.size()) == feats->NumRows());
|
||||
for (size_t i = 0; i < frames.size(); i++) {
|
||||
SubVector<BaseFloat> feat(*feats, i);
|
||||
GetFrame(frames[i], &feat);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Returns frame shift in seconds. Helps to estimate duration from frame
|
||||
// counts.
|
||||
virtual BaseFloat FrameShiftInSeconds() const = 0;
|
||||
|
||||
/// Virtual destructor. Note: constructors that take another member of
|
||||
/// type OnlineFeatureInterface are not expected to take ownership of
|
||||
/// that pointer; the caller needs to keep track of that manually.
|
||||
virtual ~OnlineFeatureInterface() { }
|
||||
|
||||
};
|
||||
|
||||
|
||||
/// Add a virtual class for "source" features such as MFCC or PLP or pitch
|
||||
/// features.
|
||||
class OnlineBaseFeature: public OnlineFeatureInterface {
|
||||
public:
|
||||
/// This would be called from the application, when you get more wave data.
|
||||
/// Note: the sampling_rate is typically only provided so the code can assert
|
||||
/// that it matches the sampling rate expected in the options.
|
||||
virtual void AcceptWaveform(BaseFloat sampling_rate,
|
||||
const VectorBase<BaseFloat> &waveform) = 0;
|
||||
|
||||
/// InputFinished() tells the class you won't be providing any
|
||||
/// more waveform. This will help flush out the last few frames
|
||||
/// of delta or LDA features (it will typically affect the return value
|
||||
/// of IsLastFrame.
|
||||
virtual void InputFinished() = 0;
|
||||
};
|
||||
|
||||
|
||||
/// @}
|
||||
} // namespace Kaldi
|
||||
|
||||
#endif // KALDI_ITF_ONLINE_FEATURE_ITF_H_
|
Loading…
Reference in new issue