diff --git a/egs/aishell/transformer/utils/compute_wer.py b/egs/aishell/transformer/utils/compute_wer.py index 349a3f609..26a9f491f 100755 --- a/egs/aishell/transformer/utils/compute_wer.py +++ b/egs/aishell/transformer/utils/compute_wer.py @@ -45,8 +45,8 @@ def compute_wer(ref_file, if out_item['wrong'] > 0: rst['wrong_sentences'] += 1 cer_detail_writer.write(hyp_key + print_cer_detail(out_item) + '\n') - cer_detail_writer.write("ref:" + '\t' + "".join(ref_dict[hyp_key]) + '\n') - cer_detail_writer.write("hyp:" + '\t' + "".join(hyp_dict[hyp_key]) + '\n') + cer_detail_writer.write("ref:" + '\t' + " ".join(list(map(lambda x: x.lower(), ref_dict[hyp_key]))) + '\n') + cer_detail_writer.write("hyp:" + '\t' + " ".join(list(map(lambda x: x.lower(), hyp_dict[hyp_key]))) + '\n') if rst['Wrd'] > 0: rst['Err'] = round(rst['wrong_words'] * 100 / rst['Wrd'], 2) diff --git a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer.py b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer.py index c016c19a7..77b2cbd23 100644 --- a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer.py +++ b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer.py @@ -74,7 +74,7 @@ def modelscope_infer(params): # If text exists, compute CER text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(best_recog_path, "token") + text_proc_file = os.path.join(best_recog_path, "text") compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer")) diff --git a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer_after_finetune.py b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer_after_finetune.py index b3260672c..488936c7f 100644 --- a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer_after_finetune.py +++ b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer_after_finetune.py @@ -38,7 +38,7 @@ def modelscope_infer_after_finetune(params): # computer CER if GT text is set text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(decoding_path, "1best_recog/token") + text_proc_file = os.path.join(decoding_path, "1best_recog/text") compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) diff --git a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer.py b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer.py index 54cfec0bf..0d06377e0 100644 --- a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer.py +++ b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer.py @@ -74,7 +74,7 @@ def modelscope_infer(params): # If text exists, compute CER text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(best_recog_path, "token") + text_proc_file = os.path.join(best_recog_path, "text") compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer")) diff --git a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer_after_finetune.py b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer_after_finetune.py index 2f038a85a..c94f685dc 100644 --- a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer_after_finetune.py +++ b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer_after_finetune.py @@ -38,7 +38,7 @@ def modelscope_infer_after_finetune(params): # computer CER if GT text is set text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(decoding_path, "1best_recog/token") + text_proc_file = os.path.join(decoding_path, "1best_recog/text") compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.sh b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.sh index f0802575e..221479d99 100644 --- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.sh +++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.sh @@ -63,8 +63,8 @@ fi if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then echo "Computing WER ..." - python utils/proce_text.py ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc - python utils/proce_text.py ${data_dir}/text ${output_dir}/1best_recog/text.ref + cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc + cp ${data_dir}/text ${output_dir}/1best_recog/text.ref python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer tail -n 3 ${output_dir}/1best_recog/text.cer fi diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer_after_finetune.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer_after_finetune.py index 295c95d7f..2d311ddc6 100644 --- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer_after_finetune.py +++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer_after_finetune.py @@ -34,7 +34,7 @@ def modelscope_infer_after_finetune(params): # computer CER if GT text is set text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(decoding_path, "1best_recog/token") + text_proc_file = os.path.join(decoding_path, "1best_recog/text") compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) diff --git a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.sh b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.sh index cdf81dcbf..6daf7d43e 100644 --- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.sh +++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.sh @@ -63,8 +63,8 @@ fi if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then echo "Computing WER ..." - python utils/proce_text.py ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc - python utils/proce_text.py ${data_dir}/text ${output_dir}/1best_recog/text.ref + cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc + cp ${data_dir}/text ${output_dir}/1best_recog/text.ref python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer tail -n 3 ${output_dir}/1best_recog/text.cer fi diff --git a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer_after_finetune.py b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer_after_finetune.py index e8fee02a0..747b49f96 100644 --- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer_after_finetune.py +++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer_after_finetune.py @@ -34,7 +34,7 @@ def modelscope_infer_after_finetune(params): # computer CER if GT text is set text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(decoding_path, "1best_recog/token") + text_proc_file = os.path.join(decoding_path, "1best_recog/text") compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py index 5d74837da..96db5f99f 100644 --- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py +++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py @@ -75,7 +75,7 @@ def modelscope_infer(params): # If text exists, compute CER text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(best_recog_path, "token") + text_proc_file = os.path.join(best_recog_path, "text") compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer")) diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py index 861fefb7f..74691f0aa 100644 --- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py +++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py @@ -39,7 +39,7 @@ def modelscope_infer_after_finetune(params): # computer CER if GT text is set text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(decoding_path, "1best_recog/token") + text_proc_file = os.path.join(decoding_path, "1best_recog/text") compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer.py index 5c6236255..8b4a04dd3 100644 --- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer.py +++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer.py @@ -75,7 +75,7 @@ def modelscope_infer(params): # If text exists, compute CER text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(best_recog_path, "token") + text_proc_file = os.path.join(best_recog_path, "text") compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer")) diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer_after_finetune.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer_after_finetune.py index d73cae267..fd124ffcc 100644 --- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer_after_finetune.py +++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer_after_finetune.py @@ -39,7 +39,7 @@ def modelscope_infer_after_finetune(params): # computer CER if GT text is set text_in = os.path.join(params["data_dir"], "text") if os.path.exists(text_in): - text_proc_file = os.path.join(decoding_path, "1best_recog/token") + text_proc_file = os.path.join(decoding_path, "1best_recog/text") compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) diff --git a/funasr/bin/asr_inference_paraformer.py b/funasr/bin/asr_inference_paraformer.py index 2eeffcd91..8cbd41905 100644 --- a/funasr/bin/asr_inference_paraformer.py +++ b/funasr/bin/asr_inference_paraformer.py @@ -797,7 +797,7 @@ def inference_modelscope( finish_count += 1 # asr_utils.print_progress(finish_count / file_count) if writer is not None: - ibest_writer["text"][key] = text_postprocessed + ibest_writer["text"][key] = " ".join(word_lists) logging.info("decoding, utt: {}, predictions: {}".format(key, text)) rtf_avg = "decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}".format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor)) diff --git a/funasr/bin/asr_inference_paraformer_vad.py b/funasr/bin/asr_inference_paraformer_vad.py index a0dc0aa8d..1548f9ff1 100644 --- a/funasr/bin/asr_inference_paraformer_vad.py +++ b/funasr/bin/asr_inference_paraformer_vad.py @@ -338,7 +338,7 @@ def inference_modelscope( ibest_writer["token"][key] = " ".join(token) ibest_writer["token_int"][key] = " ".join(map(str, token_int)) ibest_writer["vad"][key] = "{}".format(vadsegments) - ibest_writer["text"][key] = text_postprocessed + ibest_writer["text"][key] = " ".join(word_lists) ibest_writer["text_with_punc"][key] = text_postprocessed_punc if time_stamp_postprocessed is not None: ibest_writer["time_stamp"][key] = "{}".format(time_stamp_postprocessed) diff --git a/funasr/bin/asr_inference_paraformer_vad_punc.py b/funasr/bin/asr_inference_paraformer_vad_punc.py index ab3e1e368..9dc0b79ce 100644 --- a/funasr/bin/asr_inference_paraformer_vad_punc.py +++ b/funasr/bin/asr_inference_paraformer_vad_punc.py @@ -670,7 +670,7 @@ def inference_modelscope( ibest_writer["token"][key] = " ".join(token) ibest_writer["token_int"][key] = " ".join(map(str, token_int)) ibest_writer["vad"][key] = "{}".format(vadsegments) - ibest_writer["text"][key] = text_postprocessed + ibest_writer["text"][key] = " ".join(word_lists) ibest_writer["text_with_punc"][key] = text_postprocessed_punc if time_stamp_postprocessed is not None: ibest_writer["time_stamp"][key] = "{}".format(time_stamp_postprocessed) diff --git a/funasr/bin/asr_inference_rnnt.py b/funasr/bin/asr_inference_rnnt.py index 4a9ff0bda..2189a718d 100644 --- a/funasr/bin/asr_inference_rnnt.py +++ b/funasr/bin/asr_inference_rnnt.py @@ -738,13 +738,13 @@ def inference_modelscope( ibest_writer["rtf"][key] = rtf_cur if text is not None: - text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) + text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token) item = {'key': key, 'value': text_postprocessed} asr_result_list.append(item) finish_count += 1 # asr_utils.print_progress(finish_count / file_count) if writer is not None: - ibest_writer["text"][key] = text_postprocessed + ibest_writer["text"][key] = " ".join(word_lists) logging.info("decoding, utt: {}, predictions: {}".format(key, text)) rtf_avg = "decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}".format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor)) diff --git a/funasr/bin/asr_inference_uniasr.py b/funasr/bin/asr_inference_uniasr.py index 7961d5af3..2e5b6f54b 100644 --- a/funasr/bin/asr_inference_uniasr.py +++ b/funasr/bin/asr_inference_uniasr.py @@ -507,13 +507,13 @@ def inference_modelscope( ibest_writer["score"][key] = str(hyp.score) if text is not None: - text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) + text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token) item = {'key': key, 'value': text_postprocessed} asr_result_list.append(item) finish_count += 1 asr_utils.print_progress(finish_count / file_count) if writer is not None: - ibest_writer["text"][key] = text_postprocessed + ibest_writer["text"][key] = " ".join(word_lists) return asr_result_list return _forward diff --git a/funasr/bin/asr_inference_uniasr_vad.py b/funasr/bin/asr_inference_uniasr_vad.py index 3164d0d24..52c29b8a9 100644 --- a/funasr/bin/asr_inference_uniasr_vad.py +++ b/funasr/bin/asr_inference_uniasr_vad.py @@ -507,13 +507,13 @@ def inference_modelscope( ibest_writer["score"][key] = str(hyp.score) if text is not None: - text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) + text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token) item = {'key': key, 'value': text_postprocessed} asr_result_list.append(item) finish_count += 1 asr_utils.print_progress(finish_count / file_count) if writer is not None: - ibest_writer["text"][key] = text_postprocessed + ibest_writer["text"][key] = " ".join(word_lists) return asr_result_list return _forward diff --git a/funasr/utils/compute_wer.py b/funasr/utils/compute_wer.py index 349a3f609..26a9f491f 100755 --- a/funasr/utils/compute_wer.py +++ b/funasr/utils/compute_wer.py @@ -45,8 +45,8 @@ def compute_wer(ref_file, if out_item['wrong'] > 0: rst['wrong_sentences'] += 1 cer_detail_writer.write(hyp_key + print_cer_detail(out_item) + '\n') - cer_detail_writer.write("ref:" + '\t' + "".join(ref_dict[hyp_key]) + '\n') - cer_detail_writer.write("hyp:" + '\t' + "".join(hyp_dict[hyp_key]) + '\n') + cer_detail_writer.write("ref:" + '\t' + " ".join(list(map(lambda x: x.lower(), ref_dict[hyp_key]))) + '\n') + cer_detail_writer.write("hyp:" + '\t' + " ".join(list(map(lambda x: x.lower(), hyp_dict[hyp_key]))) + '\n') if rst['Wrd'] > 0: rst['Err'] = round(rst['wrong_words'] * 100 / rst['Wrd'], 2)