|
|
@ -147,7 +147,8 @@ def start_server():
|
|
|
|
augmentation_config='{}',
|
|
|
|
augmentation_config='{}',
|
|
|
|
specgram_type=args.specgram_type,
|
|
|
|
specgram_type=args.specgram_type,
|
|
|
|
num_threads=1,
|
|
|
|
num_threads=1,
|
|
|
|
keep_transcription_text=True)
|
|
|
|
keep_transcription_text=True,
|
|
|
|
|
|
|
|
num_conv_layers=args.num_conv_layers)
|
|
|
|
# prepare ASR model
|
|
|
|
# prepare ASR model
|
|
|
|
ds2_model = DeepSpeech2Model(
|
|
|
|
ds2_model = DeepSpeech2Model(
|
|
|
|
vocab_size=data_generator.vocab_size,
|
|
|
|
vocab_size=data_generator.vocab_size,
|
|
|
@ -163,8 +164,20 @@ def start_server():
|
|
|
|
# prepare ASR inference handler
|
|
|
|
# prepare ASR inference handler
|
|
|
|
def file_to_transcript(filename):
|
|
|
|
def file_to_transcript(filename):
|
|
|
|
feature = data_generator.process_utterance(filename, "")
|
|
|
|
feature = data_generator.process_utterance(filename, "")
|
|
|
|
|
|
|
|
ins = []
|
|
|
|
|
|
|
|
conv0_h = (feature[0].shape[0] - 1) // 2 + 1
|
|
|
|
|
|
|
|
conv0_w = (feature[0].shape[1] - 1) // 3 + 1
|
|
|
|
|
|
|
|
ins += [feature[0], feature[1],
|
|
|
|
|
|
|
|
[0], [conv0_w],
|
|
|
|
|
|
|
|
[1, 32, 1, conv0_h, conv0_w + 1, conv0_w]]
|
|
|
|
|
|
|
|
pre_h = conv0_h
|
|
|
|
|
|
|
|
for i in xrange(args.num_conv_layers - 1):
|
|
|
|
|
|
|
|
h = (pre_h - 1) // 2 + 1
|
|
|
|
|
|
|
|
pre_h = h
|
|
|
|
|
|
|
|
ins += [[1, 32, 1, h, conv0_w + 1, conv0_w]]
|
|
|
|
|
|
|
|
|
|
|
|
result_transcript = ds2_model.infer_batch(
|
|
|
|
result_transcript = ds2_model.infer_batch(
|
|
|
|
infer_data=[feature],
|
|
|
|
infer_data=[ins],
|
|
|
|
decoding_method=args.decoding_method,
|
|
|
|
decoding_method=args.decoding_method,
|
|
|
|
beam_alpha=args.alpha,
|
|
|
|
beam_alpha=args.alpha,
|
|
|
|
beam_beta=args.beta,
|
|
|
|
beam_beta=args.beta,
|
|
|
@ -173,7 +186,8 @@ def start_server():
|
|
|
|
cutoff_top_n=args.cutoff_top_n,
|
|
|
|
cutoff_top_n=args.cutoff_top_n,
|
|
|
|
vocab_list=vocab_list,
|
|
|
|
vocab_list=vocab_list,
|
|
|
|
language_model_path=args.lang_model_path,
|
|
|
|
language_model_path=args.lang_model_path,
|
|
|
|
num_processes=1)
|
|
|
|
num_processes=1,
|
|
|
|
|
|
|
|
feeding_dict=data_generator.feeding)
|
|
|
|
return result_transcript[0]
|
|
|
|
return result_transcript[0]
|
|
|
|
|
|
|
|
|
|
|
|
# warming up with utterrances sampled from Librispeech
|
|
|
|
# warming up with utterrances sampled from Librispeech
|
|
|
|