diff --git a/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_linear2.yaml b/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_linear2.yaml index c2f641977..483f219d2 100644 --- a/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_linear2.yaml +++ b/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_linear2.yaml @@ -28,7 +28,7 @@ audio_adaptor_conf: downsample_rate: 2 llm_dim: 4096 encoder_dim: 1280 - n_layer: 2 + n_layer: 0 # frontend related frontend: WhisperFrontend diff --git a/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_transformer.yaml b/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_transformer.yaml new file mode 100644 index 000000000..c2f641977 --- /dev/null +++ b/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_transformer.yaml @@ -0,0 +1,81 @@ +# This is an example that demonstrates how to configure a model file. +# You can modify the configuration according to your own requirements. + +# to print the register_table: +# from funasr.register import tables +# tables.print() + +# network architecture +model: LLMASR2 +model_conf: + lsm_weight: 0.1 # label smoothing option + length_normalized_loss: true + +# encoder +audio_encoder: "/nfs/zhifu.gzf/init_model/SenseVoiceModelscope" +audio_encoder_conf: + hub: ms + freeze: true + +llm: Qwen1.5-7b-chat +llm_conf: + hub: hf + freeze: true + init_param_path: "/nfs/zhifu.gzf/init_model/qwen/Qwen1___5-7B-Chat_raw" + +audio_adaptor: Transformer +audio_adaptor_conf: + downsample_rate: 2 + llm_dim: 4096 + encoder_dim: 1280 + n_layer: 2 + +# frontend related +frontend: WhisperFrontend +frontend_conf: + fs: 16000 + whisper_model: large-v3 + do_pad_trim: false + permute: false # true: [bs, frames, dims]; false: [bs, dims, frames] + filters_path: "/nfs/zhifu.gzf/init_model/SenseVoiceModelscope/assets/mel_filters.npz" + + + +train_conf: + accum_grad: 1 + grad_clip: 5 + max_epoch: 15 + keep_nbest_models: 10 + log_interval: 10 + +optim: adamw +optim_conf: + lr: 0.0001 + weight_decay: 0.000000 + +scheduler: warmuplr +scheduler_conf: + warmup_steps: 1500 + +dataset: OpenAIDataset +dataset_conf: + index_ds: OpenAIIndexDSJsonl + batch_sampler: BatchSampler + batch_type: token + batch_size: 900 + max_token_length: 1024 + shuffle: true + sort_size: 1024 + batch_size_scale_ratio_max: 2 + num_workers: 4 + audio_adaptor_downsample_rate: ${audio_adaptor_conf.downsample_rate} + audio_encoder_downsample_rate: 2 + data_split_num: 512 + batch_size_sample_max: 15 + retry: 20 + + +tokenizer: HuggingfaceTokenizer +tokenizer_conf: + init_param_path: "/nfs/zhifu.gzf/init_model/qwen/Qwen1___5-7B-Chat_raw" +