You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
85 lines
2.9 KiB
85 lines
2.9 KiB
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import argparse
|
|
import ast
|
|
import os
|
|
from typing import Dict
|
|
|
|
import numpy as np
|
|
|
|
from paddleaudio.utils import logger
|
|
|
|
# yapf: disable
|
|
parser = argparse.ArgumentParser(__doc__)
|
|
parser.add_argument('--tagging_file', type=str, required=True, help='')
|
|
parser.add_argument('--top_k', type=int, default=10, help='Get top k predicted results of audioset labels.')
|
|
parser.add_argument('--smooth', type=ast.literal_eval, default=True, help='Set "True" to apply posterior smoothing.')
|
|
parser.add_argument('--smooth_size', type=int, default=5, help='Window size of posterior smoothing.')
|
|
parser.add_argument('--label_file', type=str, default='./assets/audioset_labels.txt', help='File of audioset labels.')
|
|
parser.add_argument('--output_dir', type=str, default='./output_dir', help='Directory to save tagging labels.')
|
|
args = parser.parse_args()
|
|
# yapf: enable
|
|
|
|
|
|
def smooth(results: np.ndarray, win_size: int):
|
|
"""
|
|
Execute posterior smoothing in-place.
|
|
"""
|
|
for i in range(len(results) - 1, -1, -1):
|
|
if i < win_size - 1:
|
|
left = 0
|
|
else:
|
|
left = i + 1 - win_size
|
|
results[i] = np.sum(results[left:i + 1], axis=0) / (i - left + 1)
|
|
|
|
|
|
def generate_topk_label(k: int, label_map: Dict, result: np.ndarray):
|
|
"""
|
|
Return top k result.
|
|
"""
|
|
result = np.asarray(result)
|
|
topk_idx = (-result).argsort()[:k]
|
|
|
|
ret = ''
|
|
for idx in topk_idx:
|
|
label, score = label_map[idx], result[idx]
|
|
ret += f'{label}: {score}\n'
|
|
return ret
|
|
|
|
|
|
if __name__ == "__main__":
|
|
label_map = {}
|
|
with open(args.label_file, 'r') as f:
|
|
for i, l in enumerate(f.readlines()):
|
|
label_map[i] = l.strip()
|
|
|
|
results = np.load(args.tagging_file, allow_pickle=True)
|
|
times, scores = results['time'], results['scores']
|
|
|
|
if args.smooth:
|
|
logger.info('Posterior smoothing...')
|
|
smooth(scores, win_size=args.smooth_size)
|
|
|
|
if not os.path.exists(args.output_dir):
|
|
os.makedirs(args.output_dir)
|
|
output_file = os.path.join(
|
|
args.output_dir,
|
|
os.path.basename(args.tagging_file).split('.')[0] + '.txt')
|
|
with open(output_file, 'w') as f:
|
|
for time, score in zip(times, scores):
|
|
f.write(f'{time}\n')
|
|
f.write(generate_topk_label(args.top_k, label_map, score) + '\n')
|
|
|
|
logger.info(f'Saved tagging labels to {output_file}')
|