From 1d7ba1be1ad824135698e8000386c1fd55268ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AF=AD=E5=B8=86?= Date: Mon, 4 Mar 2024 13:52:21 +0800 Subject: [PATCH] atsr --- .gitignore | 1 + .../lcbnet/demo2.sh | 71 +++++++++++++++++++ .../lcbnet/demo2_tmp.sh | 71 +++++++++++++++++++ .../lcbnet/demo_pdb.sh | 9 ++- .../lcbnet/demo_pdb2.sh | 15 ++++ .../lcbnet/demo_tmp1.sh | 71 +++++++++++++++++++ 6 files changed, 236 insertions(+), 2 deletions(-) create mode 100755 examples/industrial_data_pretraining/lcbnet/demo2.sh create mode 100755 examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh create mode 100755 examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh create mode 100755 examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh diff --git a/.gitignore b/.gitignore index bdfe70f1a..d2b4c53b9 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,5 @@ outputs* emotion2vec* GPT-SoVITS* examples/*/*/outputs +examples/*/*/exp cmd_read diff --git a/examples/industrial_data_pretraining/lcbnet/demo2.sh b/examples/industrial_data_pretraining/lcbnet/demo2.sh new file mode 100755 index 000000000..69df6d16c --- /dev/null +++ b/examples/industrial_data_pretraining/lcbnet/demo2.sh @@ -0,0 +1,71 @@ +file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch" +CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" +inference_device="cuda" +test_set="dev_wav" +if [ ${inference_device} == "cuda" ]; then + nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +else + inference_batch_size=1 + CUDA_VISIBLE_DEVICES="" + for JOB in $(seq ${nj}); do + CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1," + done +fi + +inference_dir="outputs/slidespeech_dev_beamsearch_wav" +_logdir="${inference_dir}/logdir" +echo "inference_dir: ${inference_dir}" + +mkdir -p "${_logdir}" +key_file1=${file_dir}/${test_set}/wav.scp +key_file2=${file_dir}/${test_set}/ocr.txt +split_scps1= +split_scps2= +for JOB in $(seq "${nj}"); do + split_scps1+=" ${_logdir}/wav.${JOB}.scp" + split_scps2+=" ${_logdir}/ocr.${JOB}.txt" +done +utils/split_scp.pl "${key_file1}" ${split_scps1} +utils/split_scp.pl "${key_file2}" ${split_scps2} + +gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ }) +for JOB in $(seq ${nj}); do + { + id=$((JOB-1)) + gpuid=${gpuid_list_array[$id]} + + export CUDA_VISIBLE_DEVICES=${gpuid} + + python -m funasr.bin.inference \ + --config-path=${file_dir} \ + --config-name="config.yaml" \ + ++init_param=${file_dir}/model.pb \ + ++tokenizer_conf.token_list=${file_dir}/tokens.txt \ + ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \ + +data_type='["sound", "text"]' \ + ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \ + ++output_dir="${inference_dir}/${JOB}" \ + ++device="${inference_device}" \ + ++ncpu=1 \ + ++disable_log=true &> ${_logdir}/log.${JOB}.txt + + }& +done +wait + + +mkdir -p ${inference_dir}/1best_recog + +for JOB in $(seq "${nj}"); do + cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token" +done + +echo "Computing WER ..." +sed -e 's/ /\t/' -e 's/ //g' -e 's/▁/ /g' -e 's/\t /\t/' ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc +cp ${file_dir}/${test_set}/text ${inference_dir}/1best_recog/token.ref +cp ${file_dir}/${test_set}/ocr.list ${inference_dir}/1best_recog/ocr.list +python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer +tail -n 3 ${inference_dir}/1best_recog/token.cer + +./run_bwer_recall.sh ${inference_dir}/1best_recog/ +tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5 diff --git a/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh b/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh new file mode 100755 index 000000000..da6ad686d --- /dev/null +++ b/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh @@ -0,0 +1,71 @@ +file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch" +CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" +inference_device="cuda" +test_set="test_wav" +if [ ${inference_device} == "cuda" ]; then + nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +else + inference_batch_size=1 + CUDA_VISIBLE_DEVICES="" + for JOB in $(seq ${nj}); do + CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1," + done +fi + +inference_dir="outputs/slidespeech_test_beamsearch_wav" +_logdir="${inference_dir}/logdir" +echo "inference_dir: ${inference_dir}" + +mkdir -p "${_logdir}" +key_file1=${file_dir}/${test_set}/wav.scp +key_file2=${file_dir}/${test_set}/ocr.txt +split_scps1= +split_scps2= +for JOB in $(seq "${nj}"); do + split_scps1+=" ${_logdir}/wav.${JOB}.scp" + split_scps2+=" ${_logdir}/ocr.${JOB}.txt" +done +utils/split_scp.pl "${key_file1}" ${split_scps1} +utils/split_scp.pl "${key_file2}" ${split_scps2} + +gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ }) +for JOB in $(seq ${nj}); do + { + id=$((JOB-1)) + gpuid=${gpuid_list_array[$id]} + + export CUDA_VISIBLE_DEVICES=${gpuid} + + python -m funasr.bin.inference \ + --config-path=${file_dir} \ + --config-name="config.yaml" \ + ++init_param=${file_dir}/model.pb \ + ++tokenizer_conf.token_list=${file_dir}/tokens.txt \ + ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \ + +data_type='["sound", "text"]' \ + ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \ + ++output_dir="${inference_dir}/${JOB}" \ + ++device="${inference_device}" \ + ++ncpu=1 \ + ++disable_log=true &> ${_logdir}/log.${JOB}.txt + + }& +done +wait + + +mkdir -p ${inference_dir}/1best_recog + +for JOB in $(seq "${nj}"); do + cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token" +done + +echo "Computing WER ..." +sed -e 's/ /\t/' -e 's/ //g' -e 's/▁/ /g' -e 's/\t /\t/' ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc +cp ${file_dir}/${test_set}/text ${inference_dir}/1best_recog/token.ref +cp ${file_dir}/${test_set}/ocr.list ${inference_dir}/1best_recog/ocr.list +python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer +tail -n 3 ${inference_dir}/1best_recog/token.cer + +./run_bwer_recall.sh ${inference_dir}/1best_recog/ +tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5 diff --git a/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh b/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh index e435905bf..0747a8d7b 100755 --- a/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh +++ b/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh @@ -6,8 +6,13 @@ python -m funasr.bin.inference \ --config-name="config.yaml" \ ++init_param=${file_dir}/model.pb \ ++tokenizer_conf.token_list=${file_dir}/tokens.txt \ -++input=[${file_dir}/dev/wav.scp,${file_dir}/dev/ocr.txt] \ -+data_type='["kaldi_ark", "text"]' \ ++input=["${file_dir}/example/asr_example.wav","${file_dir}/example/ocr.txt"] \ ++data_type='["sound","text"]' \ ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \ ++output_dir="./outputs/debug" \ ++device="cpu" \ + +#++input=["/nfs/yufan.yf/workspace/espnet/egs2/youtube_ppt/asr/dump/raw/dev_oracle_v1_new/data/format.1/YTB+--tMoLpQI-w+00322.wav"] \ +#+data_type='["sound"]' \ +#++input=["/nfs/yufan.yf/workspace/espnet/egs2/youtube_ppt/asr/dump/raw/dev_oracle_v1_new/data/format.1/YTB+--tMoLpQI-w+00322.wav","/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch/example/ocr2.txt"] \ +#+data_type='["sound","text"]' \ diff --git a/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh b/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh new file mode 100755 index 000000000..557e9b2d8 --- /dev/null +++ b/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh @@ -0,0 +1,15 @@ +file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch" + +#CUDA_VISIBLE_DEVICES="" \ +python -m funasr.bin.inference \ +--config-path=${file_dir} \ +--config-name="config.yaml" \ +++init_param=${file_dir}/model.pb \ +++tokenizer_conf.token_list=${file_dir}/tokens.txt \ +++input=[${file_dir}/dev_wav/wav.scp,${file_dir}/dev_wav/ocr.txt] \ ++data_type='["sound", "text"]' \ +++tokenizer_conf.bpemodel=${file_dir}/bpe.model \ +++output_dir="./outputs/debug" \ +++device="cpu" \ + +#++input=[${file_dir}/dev_wav/wav.scp,${file_dir}/dev_wav/ocr.txt] \ diff --git a/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh b/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh new file mode 100755 index 000000000..488f7d2a1 --- /dev/null +++ b/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh @@ -0,0 +1,71 @@ +file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch" +CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" +inference_device="cuda" + +if [ ${inference_device} == "cuda" ]; then + nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +else + inference_batch_size=1 + CUDA_VISIBLE_DEVICES="" + for JOB in $(seq ${nj}); do + CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1," + done +fi + +inference_dir="outputs/slidespeech_test_beamsearch_new" +_logdir="${inference_dir}/logdir" +echo "inference_dir: ${inference_dir}" + +mkdir -p "${_logdir}" +key_file1=${file_dir}/test/wav.scp +key_file2=${file_dir}/test/ocr.txt +split_scps1= +split_scps2= +for JOB in $(seq "${nj}"); do + split_scps1+=" ${_logdir}/wav.${JOB}.scp" + split_scps2+=" ${_logdir}/ocr.${JOB}.txt" +done +utils/split_scp.pl "${key_file1}" ${split_scps1} +utils/split_scp.pl "${key_file2}" ${split_scps2} + +gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ }) +for JOB in $(seq ${nj}); do + { + id=$((JOB-1)) + gpuid=${gpuid_list_array[$id]} + + export CUDA_VISIBLE_DEVICES=${gpuid} + + python -m funasr.bin.inference \ + --config-path=${file_dir} \ + --config-name="config.yaml" \ + ++init_param=${file_dir}/model.pb \ + ++tokenizer_conf.token_list=${file_dir}/tokens.txt \ + ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \ + +data_type='["kaldi_ark", "text"]' \ + ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \ + ++output_dir="${inference_dir}/${JOB}" \ + ++device="${inference_device}" \ + ++ncpu=1 \ + ++disable_log=true &> ${_logdir}/log.${JOB}.txt + + }& +done +wait + + +mkdir -p ${inference_dir}/1best_recog + +for JOB in $(seq "${nj}"); do + cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token" +done + +echo "Computing WER ..." +sed -e 's/ /\t/' -e 's/ //g' -e 's/▁/ /g' -e 's/\t /\t/' ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc +cp ${file_dir}/test/text ${inference_dir}/1best_recog/token.ref +cp ${file_dir}/test/ocr.list ${inference_dir}/1best_recog/ocr.list +python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer +tail -n 3 ${inference_dir}/1best_recog/token.cer + +./run_bwer_recall.sh ${inference_dir}/1best_recog/ +tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5