From a88b51c5442efba7bf1e8d91881f69279b27224d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AF=AD=E5=B8=86?= Date: Wed, 28 Feb 2024 16:04:35 +0800 Subject: [PATCH] test --- examples/industrial_data_pretraining/lcbnet/demo2.sh | 2 +- funasr/auto/auto_model.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/industrial_data_pretraining/lcbnet/demo2.sh b/examples/industrial_data_pretraining/lcbnet/demo2.sh index 0d5a4f031..9ba176be6 100755 --- a/examples/industrial_data_pretraining/lcbnet/demo2.sh +++ b/examples/industrial_data_pretraining/lcbnet/demo2.sh @@ -6,7 +6,7 @@ python -m funasr.bin.inference \ --config-name="config.yaml" \ ++init_param=${file_dir}/model.pb \ ++tokenizer_conf.token_list=${file_dir}/tokens.txt \ -++input=[${file_dir}/wav.scp,${file_dir}/ocr_text] \ +++input=[${file_dir}/wav.scp,${file_dir}/ocr.txt] \ +data_type='["kaldi_ark", "text"]' \ ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \ ++output_dir="./outputs/debug" \ diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py index d5225dee8..ba7dcabaa 100644 --- a/funasr/auto/auto_model.py +++ b/funasr/auto/auto_model.py @@ -39,13 +39,11 @@ def prepare_data_iterator(data_in, input_len=None, data_type=None, key=None): filelist = [".scp", ".txt", ".json", ".jsonl"] chars = string.ascii_letters + string.digits - pdb.set_trace() if isinstance(data_in, str) and data_in.startswith('http'): # url data_in = download_from_url(data_in) if isinstance(data_in, str) and os.path.exists(data_in): # wav_path; filelist: wav.scp, file.jsonl;text.txt; _, file_extension = os.path.splitext(data_in) file_extension = file_extension.lower() - pdb.set_trace() if file_extension in filelist: #filelist: wav.scp, file.jsonl;text.txt; with open(data_in, encoding='utf-8') as fin: for line in fin: