Dev gzf (#1475)

* qwenaudio qwenaudiochat * qwenaudio qwenaudiochat * whisper * whisper * llm * llm * llm * llm * llm * llm * llm * llm * export onnx * export onnx * export onnx * dingding * dingding * llm * doc * onnx * onnx * onnx * onnx * onnx * onnx * v1.0.15 * qwenaudio * qwenaudio * issue doc
2025-09-15 14:48:36 +08:00 · 2024-03-12 09:23:04 +08:00 · 2024-03-12 09:23:04 +08:00 · 68f0603b10
commit 68f0603b10
parent 0d9384c8c0
4 changed files with 18 additions and 10 deletions
--- a/.github/ISSUE_TEMPLATE/ask_questions.md
+++ b/.github/ISSUE_TEMPLATE/ask_questions.md
@ -4,8 +4,12 @@ about: If you have questions, please first search existing issues and docs
 labels: 'question, needs triage'
 ---

+Notice: In order to resolve issues more efficiently, please raise issue following the template.
+（注意：为了更加高效率解决您遇到的问题，请按照模板提问，补充细节）
+
 ## ❓ Questions and Help

+
 ### Before asking:
 1. search the issues.
 2. search the docs.
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -4,6 +4,9 @@ about: Submit a bug report to help us improve
 labels: 'bug, needs triage'
 ---

+Notice: In order to resolve issues more efficiently, please raise issue following the template.
+（注意：为了更加高效率解决您遇到的问题，请按照模板提问，补充细节）
+
 ## 🐛 Bug

 <!-- A clear and concise description of what the bug is. -->
--- a/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_linear.yaml
+++ b/examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_linear.yaml
@ -12,17 +12,16 @@ model_conf:
    length_normalized_loss: true

 # encoder
-audio_encoder: iic/Whisper-large-v2 #iic/Whisper-large-v3
+audio_encoder: "/nfs/zhifu.gzf/init_model/Whisper-large-v3" #iic/Whisper-large-v3
 audio_encoder_conf:
    hub: ms
    freeze: true
-    init_param_path: "/nfs/maziyang.mzy/models/vicuna-7b-v1.5"

-llm: Vicuna
+llm: Qwen1.5-7b-chat
 llm_conf:
  hub: hf
  freeze: true
-  init_param_path: "/nfs/maziyang.mzy/models/vicuna-7b-v1.5"
+  init_param_path: "/nfs/zhifu.gzf/init_model/qwen/Qwen1___5-7B-Chat"

 audio_adaptor: Linear
 audio_adaptor_conf:
@ -34,7 +33,7 @@ audio_adaptor_conf:
 frontend: WhisperFrontend
 frontend_conf:
    fs: 16000
-    whisper_model: large-v2
+    whisper_model: large-v3
    do_pad_trim: true
    permute: true # true: [bs, frames, dims]; false: [bs, dims, frames]

@ -66,8 +65,9 @@ train_conf:
 optim: adamw
 optim_conf:
   lr: 0.0001
-   weight_decay: 0.000001
-scheduler: warmuplr
+   weight_decay: 0.000000
+
+scheduler: custom_lambdalr
 scheduler_conf:
   warmup_steps: 1000

@ -83,12 +83,12 @@ dataset_conf:
    preprocessor_text: TextPreprocessRemovePunctuation
    audio_adaptor_downsample_rate: ${audio_adaptor_conf.downsample_rate}
    audio_encoder_downsample_rate: 2
-    prompt: "<|startoftranscription|><|zh|><|transcribe|><|zh|><|notimestamps|><|wo_itn|>"
+#    prompt: "<|startoftranscription|><|zh|><|transcribe|><|zh|><|notimestamps|><|wo_itn|>"



 tokenizer: HuggingfaceTokenizer
 tokenizer_conf:
  unk_symbol: <unk>
-  init_param_path: "/nfs/maziyang.mzy/models/vicuna-7b-v1.5"
+  init_param_path: "/nfs/zhifu.gzf/init_model/qwen/Qwen1___5-7B-Chat"

--- a/funasr/datasets/audio_datasets/index_ds.py
+++ b/funasr/datasets/audio_datasets/index_ds.py
@ -99,7 +99,8 @@ class IndexDSJsonlRankFull(torch.utils.data.Dataset):
                    target = data["target"]
                    source_len = data.get("source_len", 1)
                    target_len = data.get("target_len", 0)
-                    
+                    if "aishell" in source:
+                        target = target.replace(" ", "")
                    contents.append({"source": source,
                                     "prompt": prompt,
                                     "target": target,