mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
aishell example
This commit is contained in:
parent
1448e021ac
commit
6a9c21a408
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
. ./path.sh || exit 1;
|
||||
workspace=`pwd`
|
||||
|
||||
# machines configuration
|
||||
CUDA_VISIBLE_DEVICES="0,1"
|
||||
@ -39,7 +39,7 @@ train_set=train
|
||||
valid_set=dev
|
||||
test_sets="dev test"
|
||||
|
||||
asr_config=conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml
|
||||
asr_config=train_asr_paraformer_conformer_12e_6d_2048_256.yaml
|
||||
model_dir="baseline_$(basename "${asr_config}" .yaml)_${lang}_${token_type}_${tag}"
|
||||
|
||||
#inference_config=conf/decode_asr_transformer_noctc_1best.yaml
|
||||
@ -74,19 +74,21 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
utils/text2token.py -n 1 -s 1 ${feats_dir}/data/${x}/text > ${feats_dir}/data/${x}/text.org
|
||||
mv ${feats_dir}/data/${x}/text.org ${feats_dir}/data/${x}/text
|
||||
|
||||
python funasr/datasets/audio_datasets/scp2jsonl.py \
|
||||
++scp_file_list='["${feats_dir}/data/${x}/wav.scp", "${feats_dir}/data/${x}/text"]' \
|
||||
# convert wav.scp text to jsonl
|
||||
scp_file_list_arg="++scp_file_list='[\"${feats_dir}/data/${x}/wav.scp\",\"${feats_dir}/data/${x}/text\"]'"
|
||||
python ../../../funasr/datasets/audio_datasets/scp2jsonl.py \
|
||||
++data_type_list='["source", "target"]' \
|
||||
++jsonl_file_out=${feats_dir}/data/${x}/audio_datasets.jsonl
|
||||
++jsonl_file_out=${feats_dir}/data/${x}/audio_datasets.jsonl \
|
||||
${scp_file_list_arg}
|
||||
done
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
echo "stage 1: Feature and CMVN Generation"
|
||||
# utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
|
||||
python funasr/bin/compute_audio_cmvn.py \
|
||||
--config-path "/Users/zhifu/funasr1.0/examples/aishell/conf" \
|
||||
--config-name "train_asr_paraformer_conformer_12e_6d_2048_256.yaml" \
|
||||
python ../../../funasr/bin/compute_audio_cmvn.py \
|
||||
--config-path "${workspace}" \
|
||||
--config-name "${asr_config}" \
|
||||
++train_data_set_list="${feats_dir}/data/${train_set}/audio_datasets.jsonl" \
|
||||
++cmvn_file="${feats_dir}/data/${train_set}/cmvn.json" \
|
||||
++dataset_conf.num_workers=$nj
|
||||
@ -116,16 +118,16 @@ fi
|
||||
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
echo "stage 4: ASR Training"
|
||||
|
||||
torchrun \
|
||||
--nnodes 1 \
|
||||
--nproc_per_node ${gpu_num} \
|
||||
funasr/bin/train.py \
|
||||
--config-path "/Users/zhifu/funasr1.0/examples/aishell/conf" \
|
||||
--config-name "train_asr_paraformer_conformer_12e_6d_2048_256.yaml" \
|
||||
++train_data_set_list="${feats_dir}/data/${train_set}/audio_datasets.jsonl" \
|
||||
++cmvn_file="${feats_dir}/data/${train_set}/am.mvn" \
|
||||
++token_list="${token_list}" \
|
||||
++output_dir="${exp_dir}/exp/${model_dir}"
|
||||
torchrun \
|
||||
--nnodes 1 \
|
||||
--nproc_per_node ${gpu_num} \
|
||||
../../../funasr/bin/train.py \
|
||||
--config-path "${workspace}" \
|
||||
--config-name "${asr_config}" \
|
||||
++train_data_set_list="${feats_dir}/data/${train_set}/audio_datasets.jsonl" \
|
||||
++cmvn_file="${feats_dir}/data/${train_set}/am.mvn" \
|
||||
++token_list="${token_list}" \
|
||||
++output_dir="${exp_dir}/exp/${model_dir}"
|
||||
fi
|
||||
|
||||
#
|
||||
|
||||
@ -6,10 +6,12 @@
|
||||
#git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path}
|
||||
|
||||
## generate jsonl from wav.scp and text.txt
|
||||
#python funasr/datasets/audio_datasets/scp2jsonl.py \
|
||||
#++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
||||
#++data_type_list='["source", "target"]' \
|
||||
#++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
||||
python funasr/datasets/audio_datasets/scp2jsonl.py \
|
||||
++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
||||
++data_type_list='["source", "target"]' \
|
||||
++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
||||
|
||||
|
||||
# torchrun \
|
||||
# --nnodes 1 \
|
||||
# --nproc_per_node 1 \
|
||||
|
||||
@ -19,7 +19,6 @@ from funasr.train_utils.load_pretrained_model import load_pretrained_model
|
||||
from funasr.utils.load_utils import load_audio_text_image_video, extract_fbank
|
||||
from funasr.utils.timestamp_tools import timestamp_sentence
|
||||
from funasr.models.campplus.utils import sv_chunk, postprocess, distribute_spk
|
||||
from funasr.models.campplus.cluster_backend import ClusterBackend
|
||||
from funasr.auto.auto_model import prepare_data_iterator
|
||||
|
||||
|
||||
|
||||
@ -20,7 +20,10 @@ from funasr.train_utils.load_pretrained_model import load_pretrained_model
|
||||
from funasr.utils.load_utils import load_audio_text_image_video, extract_fbank
|
||||
from funasr.utils.timestamp_tools import timestamp_sentence
|
||||
from funasr.models.campplus.utils import sv_chunk, postprocess, distribute_spk
|
||||
from funasr.models.campplus.cluster_backend import ClusterBackend
|
||||
try:
|
||||
from funasr.models.campplus.cluster_backend import ClusterBackend
|
||||
except:
|
||||
print("If you want to use the speaker diarization, please `pip install hdbscan`")
|
||||
|
||||
|
||||
def prepare_data_iterator(data_in, input_len=None, data_type=None, key=None):
|
||||
|
||||
@ -19,7 +19,7 @@ def gen_jsonl_from_wav_text_list(path, data_type_list=("source", "target"), json
|
||||
world_size = 1
|
||||
|
||||
cpu_cores = os.cpu_count() or 1
|
||||
|
||||
print(f"convert wav.scp text to jsonl, ncpu: {cpu_cores}")
|
||||
if rank == 0:
|
||||
json_dict = {}
|
||||
for data_type, data_file in zip(data_type_list, path):
|
||||
@ -65,7 +65,7 @@ def parse_context_length(data_list: list, data_type: str):
|
||||
sample_num = len(waveform)
|
||||
context_len = int(sample_num//16000*1000/10)
|
||||
else:
|
||||
context_len = len(line)
|
||||
context_len = len(line.split()) if " " in line else len(line)
|
||||
res[key] = {data_type: line, f"{data_type}_len": context_len}
|
||||
return res
|
||||
|
||||
@ -83,6 +83,8 @@ def main_hydra(cfg: DictConfig):
|
||||
kwargs = OmegaConf.to_container(cfg, resolve=True)
|
||||
|
||||
scp_file_list = kwargs.get("scp_file_list", ("/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"))
|
||||
if isinstance(scp_file_list, str):
|
||||
scp_file_list = eval(scp_file_list)
|
||||
data_type_list = kwargs.get("data_type_list", ("source", "target"))
|
||||
jsonl_file_out = kwargs.get("jsonl_file_out", "/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl")
|
||||
gen_jsonl_from_wav_text_list(scp_file_list, data_type_list=data_type_list, jsonl_file_out=jsonl_file_out)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user