mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
update
This commit is contained in:
parent
df9d3438da
commit
362a6de3c8
@ -94,13 +94,16 @@ scheduler_conf:
|
||||
dataset: AudioDataset
|
||||
dataset_conf:
|
||||
index_ds: IndexDSJsonl
|
||||
batch_sampler: DynamicBatchLocalShuffleSampler
|
||||
batch_sampler: RankFullLocalShuffleBatchSampler
|
||||
batch_type: example # example or length
|
||||
batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
||||
batch_size: 32 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
||||
max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
|
||||
buffer_size: 500
|
||||
buffer_size: 1024
|
||||
shuffle: True
|
||||
num_workers: 4
|
||||
preprocessor_speech: SpeechPreprocessSpeedPerturb
|
||||
preprocessor_speech_conf:
|
||||
speed_perturb: [0.9, 1.0, 1.1]
|
||||
|
||||
tokenizer: CharTokenizer
|
||||
tokenizer_conf:
|
||||
|
||||
@ -94,13 +94,16 @@ scheduler_conf:
|
||||
dataset: AudioDataset
|
||||
dataset_conf:
|
||||
index_ds: IndexDSJsonl
|
||||
batch_sampler: DynamicBatchLocalShuffleSampler
|
||||
batch_sampler: RankFullLocalShuffleBatchSampler
|
||||
batch_type: example # example or length
|
||||
batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
||||
batch_size: 32 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
||||
max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
|
||||
buffer_size: 500
|
||||
buffer_size: 1024
|
||||
shuffle: True
|
||||
num_workers: 4
|
||||
preprocessor_speech: SpeechPreprocessSpeedPerturb
|
||||
preprocessor_speech_conf:
|
||||
speed_perturb: [0.9, 1.0, 1.1]
|
||||
|
||||
tokenizer: CharTokenizer
|
||||
tokenizer_conf:
|
||||
|
||||
@ -109,9 +109,14 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
echo "log_file: ${log_file}"
|
||||
|
||||
gpu_num=$(echo CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
|
||||
torchrun \
|
||||
--nnodes 1 \
|
||||
--nproc_per_node ${gpu_num} \
|
||||
# torchrun \
|
||||
# --nnodes 1 \
|
||||
# --nproc_per_node ${gpu_num}
|
||||
cmd="python"
|
||||
if [ ${gpu_num} -gt 1 ];then
|
||||
cmd="torchrun --nnodes 1 --nproc_per_node ${gpu_num}"
|
||||
fi
|
||||
${cmd} \
|
||||
../../../funasr/bin/train.py \
|
||||
--config-path "${workspace}/conf" \
|
||||
--config-name "${config}" \
|
||||
|
||||
@ -94,13 +94,16 @@ scheduler_conf:
|
||||
dataset: AudioDataset
|
||||
dataset_conf:
|
||||
index_ds: IndexDSJsonl
|
||||
batch_sampler: DynamicBatchLocalShuffleSampler
|
||||
batch_sampler: RankFullLocalShuffleBatchSampler
|
||||
batch_type: example # example or length
|
||||
batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
||||
batch_size: 32 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
||||
max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
|
||||
buffer_size: 500
|
||||
buffer_size: 1024
|
||||
shuffle: True
|
||||
num_workers: 4
|
||||
preprocessor_speech: SpeechPreprocessSpeedPerturb
|
||||
preprocessor_speech_conf:
|
||||
speed_perturb: [0.9, 1.0, 1.1]
|
||||
|
||||
tokenizer: CharTokenizer
|
||||
tokenizer_conf:
|
||||
|
||||
@ -88,13 +88,16 @@ scheduler_conf:
|
||||
dataset: AudioDataset
|
||||
dataset_conf:
|
||||
index_ds: IndexDSJsonl
|
||||
batch_sampler: DynamicBatchLocalShuffleSampler
|
||||
batch_sampler: RankFullLocalShuffleBatchSampler
|
||||
batch_type: example # example or length
|
||||
batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
||||
batch_size: 32 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
||||
max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
|
||||
buffer_size: 500
|
||||
buffer_size: 1024
|
||||
shuffle: True
|
||||
num_workers: 0
|
||||
num_workers: 4
|
||||
preprocessor_speech: SpeechPreprocessSpeedPerturb
|
||||
preprocessor_speech_conf:
|
||||
speed_perturb: [0.9, 1.0, 1.1]
|
||||
|
||||
tokenizer: CharTokenizer
|
||||
tokenizer_conf:
|
||||
Loading…
Reference in New Issue
Block a user