This commit is contained in:
语帆 2024-02-28 16:04:35 +08:00
parent ecd9e74b6e
commit a88b51c544
2 changed files with 1 additions and 3 deletions

View File

@ -6,7 +6,7 @@ python -m funasr.bin.inference \
--config-name="config.yaml" \ --config-name="config.yaml" \
++init_param=${file_dir}/model.pb \ ++init_param=${file_dir}/model.pb \
++tokenizer_conf.token_list=${file_dir}/tokens.txt \ ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
++input=[${file_dir}/wav.scp,${file_dir}/ocr_text] \ ++input=[${file_dir}/wav.scp,${file_dir}/ocr.txt] \
+data_type='["kaldi_ark", "text"]' \ +data_type='["kaldi_ark", "text"]' \
++tokenizer_conf.bpemodel=${file_dir}/bpe.model \ ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
++output_dir="./outputs/debug" \ ++output_dir="./outputs/debug" \

View File

@ -39,13 +39,11 @@ def prepare_data_iterator(data_in, input_len=None, data_type=None, key=None):
filelist = [".scp", ".txt", ".json", ".jsonl"] filelist = [".scp", ".txt", ".json", ".jsonl"]
chars = string.ascii_letters + string.digits chars = string.ascii_letters + string.digits
pdb.set_trace()
if isinstance(data_in, str) and data_in.startswith('http'): # url if isinstance(data_in, str) and data_in.startswith('http'): # url
data_in = download_from_url(data_in) data_in = download_from_url(data_in)
if isinstance(data_in, str) and os.path.exists(data_in): # wav_path; filelist: wav.scp, file.jsonl;text.txt; if isinstance(data_in, str) and os.path.exists(data_in): # wav_path; filelist: wav.scp, file.jsonl;text.txt;
_, file_extension = os.path.splitext(data_in) _, file_extension = os.path.splitext(data_in)
file_extension = file_extension.lower() file_extension = file_extension.lower()
pdb.set_trace()
if file_extension in filelist: #filelist: wav.scp, file.jsonl;text.txt; if file_extension in filelist: #filelist: wav.scp, file.jsonl;text.txt;
with open(data_in, encoding='utf-8') as fin: with open(data_in, encoding='utf-8') as fin:
for line in fin: for line in fin: