commit
43b52082c3
@ -0,0 +1,127 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
from typing import Dict
|
||||||
|
from typing import List
|
||||||
|
from typing import Text
|
||||||
|
|
||||||
|
import textgrid
|
||||||
|
|
||||||
|
|
||||||
|
def segment_alignment(alignment: List[int], blank_id=0) -> List[List[int]]:
|
||||||
|
"""segment ctc alignment ids by continuous blank and repeat label.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
alignment (List[int]): ctc alignment id sequence.
|
||||||
|
e.g. [0, 0, 0, 1, 1, 1, 2, 0, 0, 3]
|
||||||
|
blank_id (int, optional): blank id. Defaults to 0.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[List[int]]: token align, segment aligment id sequence.
|
||||||
|
e.g. [[0, 0, 0, 1, 1, 1], [2], [0, 0, 3]]
|
||||||
|
"""
|
||||||
|
# convert alignment to a praat format, which is a doing phonetics
|
||||||
|
# by computer and helps analyzing alignment
|
||||||
|
align_segs = []
|
||||||
|
# get frames level duration for each token
|
||||||
|
start = 0
|
||||||
|
end = 0
|
||||||
|
while end < len(alignment):
|
||||||
|
while end < len(alignment) and alignment[end] == blank_id: # blank
|
||||||
|
end += 1
|
||||||
|
if end == len(alignment):
|
||||||
|
align_segs[-1].extend(alignment[start:])
|
||||||
|
break
|
||||||
|
end += 1
|
||||||
|
while end < len(alignment) and alignment[end - 1] == alignment[
|
||||||
|
end]: # repeat label
|
||||||
|
end += 1
|
||||||
|
align_segs.append(alignment[start:end])
|
||||||
|
start = end
|
||||||
|
return align_segs
|
||||||
|
|
||||||
|
|
||||||
|
def align_to_tierformat(align_segs: List[List[int]],
|
||||||
|
subsample: int,
|
||||||
|
token_dict: Dict[int, Text],
|
||||||
|
blank_id=0) -> List[Text]:
|
||||||
|
"""Generate textgrid.Interval format from alignment segmentations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
align_segs (List[List[int]]): segmented ctc alignment ids.
|
||||||
|
subsample (int): 25ms frame_length, 10ms hop_length, 1/subsample
|
||||||
|
token_dict (Dict[int, Text]): int -> str map.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Text]: list of textgrid.Interval text, str(start, end, text).
|
||||||
|
"""
|
||||||
|
hop_length = 10 # ms
|
||||||
|
second_ms = 1000 # ms
|
||||||
|
frame_per_second = second_ms / hop_length # 25ms frame_length, 10ms hop_length
|
||||||
|
second_per_frame = 1.0 / frame_per_second
|
||||||
|
|
||||||
|
begin = 0
|
||||||
|
duration = 0
|
||||||
|
tierformat = []
|
||||||
|
|
||||||
|
for idx, tokens in enumerate(align_segs):
|
||||||
|
token_len = len(tokens)
|
||||||
|
token = tokens[-1]
|
||||||
|
# time duration in second
|
||||||
|
duration = token_len * subsample * second_per_frame
|
||||||
|
if idx < len(align_segs) - 1:
|
||||||
|
print(f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}")
|
||||||
|
tierformat.append(
|
||||||
|
f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}\n")
|
||||||
|
else:
|
||||||
|
for i in tokens:
|
||||||
|
if i != blank_id:
|
||||||
|
token = i
|
||||||
|
break
|
||||||
|
print(f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}")
|
||||||
|
tierformat.append(
|
||||||
|
f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}\n")
|
||||||
|
begin = begin + duration
|
||||||
|
|
||||||
|
return tierformat
|
||||||
|
|
||||||
|
|
||||||
|
def generate_textgrid(maxtime: float,
|
||||||
|
intervals: List[Text],
|
||||||
|
output: Text,
|
||||||
|
name: Text='ali') -> None:
|
||||||
|
"""Create alignment textgrid file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
maxtime (float): audio duartion.
|
||||||
|
intervals (List[Text]): ctc output alignment. e.g. "start-time end-time word" per item.
|
||||||
|
output (Text): textgrid filepath.
|
||||||
|
name (Text, optional): tier or layer name. Defaults to 'ali'.
|
||||||
|
"""
|
||||||
|
# Download Praat: https://www.fon.hum.uva.nl/praat/
|
||||||
|
avg_interval = maxtime / (len(intervals) + 1)
|
||||||
|
print(f"average second/token: {avg_interval}")
|
||||||
|
margin = 0.0001
|
||||||
|
|
||||||
|
tg = textgrid.TextGrid(maxTime=maxtime)
|
||||||
|
tier = textgrid.IntervalTier(name=name, maxTime=maxtime)
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for dur in intervals:
|
||||||
|
s, e, text = dur.split()
|
||||||
|
tier.add(minTime=float(s) + margin, maxTime=float(e), mark=text)
|
||||||
|
|
||||||
|
tg.append(tier)
|
||||||
|
|
||||||
|
tg.write(output)
|
||||||
|
print("successfully generator textgrid {}.".format(output))
|
@ -0,0 +1,43 @@
|
|||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
if [ $# != 2 ];then
|
||||||
|
echo "usage: ${0} config_path ckpt_path_prefix"
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
|
||||||
|
echo "using $ngpu gpus..."
|
||||||
|
|
||||||
|
device=gpu
|
||||||
|
if [ ngpu == 0 ];then
|
||||||
|
device=cpu
|
||||||
|
fi
|
||||||
|
config_path=$1
|
||||||
|
ckpt_prefix=$2
|
||||||
|
|
||||||
|
ckpt_name=$(basename ${ckpt_prefxi})
|
||||||
|
|
||||||
|
mkdir -p exp
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
batch_size=1
|
||||||
|
output_dir=${ckpt_prefix}
|
||||||
|
mkdir -p ${output_dir}
|
||||||
|
|
||||||
|
# align dump in `result_file`
|
||||||
|
# .tier, .TextGrid dump in `dir of result_file`
|
||||||
|
python3 -u ${BIN_DIR}/alignment.py \
|
||||||
|
--device ${device} \
|
||||||
|
--nproc 1 \
|
||||||
|
--config ${config_path} \
|
||||||
|
--result_file ${output_dir}/${type}.align \
|
||||||
|
--checkpoint_path ${ckpt_prefix} \
|
||||||
|
--opts decoding.batch_size ${batch_size}
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Failed in ctc alignment!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
@ -0,0 +1,43 @@
|
|||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
if [ $# != 2 ];then
|
||||||
|
echo "usage: ${0} config_path ckpt_path_prefix"
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
|
||||||
|
echo "using $ngpu gpus..."
|
||||||
|
|
||||||
|
device=gpu
|
||||||
|
if [ ngpu == 0 ];then
|
||||||
|
device=cpu
|
||||||
|
fi
|
||||||
|
config_path=$1
|
||||||
|
ckpt_prefix=$2
|
||||||
|
|
||||||
|
ckpt_name=$(basename ${ckpt_prefxi})
|
||||||
|
|
||||||
|
mkdir -p exp
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
batch_size=1
|
||||||
|
output_dir=${ckpt_prefix}
|
||||||
|
mkdir -p ${output_dir}
|
||||||
|
|
||||||
|
# align dump in `result_file`
|
||||||
|
# .tier, .TextGrid dump in `dir of result_file`
|
||||||
|
python3 -u ${BIN_DIR}/alignment.py \
|
||||||
|
--device ${device} \
|
||||||
|
--nproc 1 \
|
||||||
|
--config ${config_path} \
|
||||||
|
--result_file ${output_dir}/${type}.align \
|
||||||
|
--checkpoint_path ${ckpt_prefix} \
|
||||||
|
--opts decoding.batch_size ${batch_size}
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Failed in ctc alignment!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
@ -0,0 +1,43 @@
|
|||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
if [ $# != 2 ];then
|
||||||
|
echo "usage: ${0} config_path ckpt_path_prefix"
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
|
||||||
|
echo "using $ngpu gpus..."
|
||||||
|
|
||||||
|
device=gpu
|
||||||
|
if [ ngpu == 0 ];then
|
||||||
|
device=cpu
|
||||||
|
fi
|
||||||
|
config_path=$1
|
||||||
|
ckpt_prefix=$2
|
||||||
|
|
||||||
|
ckpt_name=$(basename ${ckpt_prefxi})
|
||||||
|
|
||||||
|
mkdir -p exp
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
batch_size=1
|
||||||
|
output_dir=${ckpt_prefix}
|
||||||
|
mkdir -p ${output_dir}
|
||||||
|
|
||||||
|
# align dump in `result_file`
|
||||||
|
# .tier, .TextGrid dump in `dir of result_file`
|
||||||
|
python3 -u ${BIN_DIR}/alignment.py \
|
||||||
|
--device ${device} \
|
||||||
|
--nproc 1 \
|
||||||
|
--config ${config_path} \
|
||||||
|
--result_file ${output_dir}/${type}.align \
|
||||||
|
--checkpoint_path ${ckpt_prefix} \
|
||||||
|
--opts decoding.batch_size ${batch_size}
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Failed in ctc alignment!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
Loading…
Reference in new issue