#!/usr/bin/env python3 import argparse def main(args): """Token Transducer""" # entry print('0 1 ') # skip begining and ending print('1 1 ') print('2 2 ') # exit print('2 0 ') # linking `token` between node 1 and node 2 with open(args.token_file, 'r') as fin: node = 3 for entry in fin: fields = entry.strip().split(' ') phone = fields[0] if phone == '' or phone == '': continue elif '#' in phone: # disambiguous phone # `token` maybe ending with disambiguous symbol print('{} {} {} {}'.format(0, 0, '', phone)) else: # eating `token` print('{} {} {} {}'.format(1, node, phone, phone)) # remove repeating `token` print('{} {} {} {}'.format(node, node, phone, '')) # leaving `token` print('{} {} {} {}'.format(node, 2, '', '')) node += 1 # Fianl node print('0') if __name__ == '__main__': parser = argparse.ArgumentParser( description='FST: CTC Token FST transducer') parser.add_argument( '--token_file', required=True, help='e2e model token file. line: token(char/phone/spm/disambigous)') args = parser.parse_args() main(args)