diff --git a/docs/tutorial/Tables.md b/docs/tutorial/Tables.md
index 2cbfc6200..831a3ac8e 100644
--- a/docs/tutorial/Tables.md
+++ b/docs/tutorial/Tables.md
@@ -5,13 +5,13 @@
 The original intention of the funasr-1.x.x version is to make model integration easier. The core feature is the registry and AutoModel:
 
 *   The introduction of the registry enables the development of building blocks to access the model, compatible with a variety of tasks;
-    
+
 *   The newly designed AutoModel interface unifies modelscope, huggingface, and funasr inference and training interfaces, and supports free download of repositories;
-    
+
 *   Support model export, demo-level service deployment, and industrial-level multi-concurrent service deployment;
-    
+
 *   Unify academic and industrial model inference training scripts;
-    
+
 
 # Quick to get started
 
@@ -51,19 +51,19 @@ Model = AutoModel(model=[str], device=[str], ncpu=[int], output_dir=[str], batch
 ```
 
 *   `model`(str): [Model Warehouse](https://github.com/alibaba-damo-academy/FunASR/tree/main/model_zoo)The model name in, or the model path in the local disk
-    
+
 *   `device`(str): `cuda:0`(Default gpu0), using GPU for inference, specified. If`cpu`Then the CPU is used for inference
-    
+
 *   `ncpu`(int): `4`(Default), set the number of threads used for CPU internal operation parallelism
-    
+
 *   `output_dir`(str): `None`(Default) If set, the output path of the output result
-    
+
 *   `batch_size`(int): `1`(Default), batch processing during decoding, number of samples
-    
+
 *   `hub`(str)：`ms`(Default) to download the model from modelscope. If`hf`To download the model from huggingface.
-    
+
 *   `**kwargs`(dict): All in`config.yaml`Parameters, which can be specified directly here, for example, the maximum cut length in the vad model.`max_single_segment_time=6000`(Milliseconds).
-    
+
 
 #### AutoModel reasoning
 
@@ -72,13 +72,13 @@ Res = model.generate(input=[str], output_dir=[str])
 ```
 
 *   *   wav file path, for example: asr\_example.wav
-        
+
     *   pcm file path, for example: asr\_example.pcm, you need to specify the audio sampling rate fs (default is 16000)
-        
+
     *   Audio byte stream, for example: microphone byte data
-        
+
     *   wav.scp,kaldi-style wav list (`wav_id \t wav_path`), for example:
-        
+
 
 ```plaintext
 Asr_example1./audios/asr_example1.wav
@@ -89,13 +89,13 @@ Asr_example2./audios/asr_example2.wav
 In this input
 
 *   Audio sampling points, for example:`audio, rate = soundfile.read("asr_example_zh.wav")`Is numpy.ndarray. batch input is supported. The type is list:`[audio_sample1, audio_sample2, ..., audio_sampleN]`
-    
+
 *   fbank input, support group batch. shape is \[batch, frames, dim\], type is torch.Tensor, for example
-    
+
 *   `output_dir`: None (default), if set, the output path of the output result
-    
+
 *   `**kwargs`(dict): Model-related inference parameters, e.g,`beam_size=10`,`decoding_ctc_weight=0.1`.
-    
+
 
 Detailed documentation link:[https://github.com/modelscope/FunASR/blob/main/examples/README\_zh.md](https://github.com/modelscope/FunASR/blob/main/examples/README_zh.md)
 
@@ -128,7 +128,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 
@@ -208,10 +208,10 @@ Path resolution: configuration.json (not required)
   "model": {"type" : "funasr"},
   "pipeline": {"type":"funasr-pipeline"},
   "model_name_in_hub": {
-    "ms":"", 
+    "ms":"",
     "hf":""},
   "file_path_metas": {
-    "init_param":"model.pt", 
+    "init_param":"model.pt",
     "config":"config.yaml",
     "tokenizer_conf": {"bpemodel": "chn_jpn_yue_eng_ko_spectok.bpe.model"},
     "frontend_conf":{"cmvn_file": "am.mvn"}}
@@ -274,7 +274,7 @@ class SenseVoiceSmall(nn.Module):
   def forward(
       self,
       **kwargs,
-  ):  
+  ):
 
   def inference(
       self,
@@ -320,9 +320,9 @@ from funasr.models.sense_voice.model import *
 ## Principles of Registration
 
 *   Model: models are independent of each other. Each Model needs to create a new Model directory under funasr/models/. Do not use class inheritance method!!! Do not import from other model directories, and put everything you need into your own model directory!!! Do not modify the existing model code!!!
-    
+
 *   dataset,frontend,tokenizer, if you can reuse the existing one, reuse it directly, if you cannot reuse it, please register a new one, modify it again, and do not modify the original one!!!
-    
+
 
 # Independent warehouse
 
@@ -337,7 +337,7 @@ from funasr import AutoModel
 model = AutoModel (
 model="iic/SenseVoiceSmall ",
 trust_remote_code=True
-remote_code = "./model.py", 
+remote_code = "./model.py",
 )
 ```
 
@@ -360,4 +360,4 @@ res = m.inference(
 print(text)
 ```
 
-Trim reference:[https://github.com/FunAudioLLM/SenseVoice/blob/main/finetune.sh](https://github.com/FunAudioLLM/SenseVoice/blob/main/finetune.sh)
\ No newline at end of file
+Trim reference:[https://github.com/FunAudioLLM/SenseVoice/blob/main/finetune.sh](https://github.com/FunAudioLLM/SenseVoice/blob/main/finetune.sh)
diff --git a/docs/tutorial/Tables_zh.md b/docs/tutorial/Tables_zh.md
index 72af82a0b..e9360e0d4 100644
--- a/docs/tutorial/Tables_zh.md
+++ b/docs/tutorial/Tables_zh.md
@@ -5,13 +5,13 @@
 funasr-1.x.x 版本的设计初衷是【**让模型集成更简单**】，核心feature为注册表与AutoModel：
 
 *   注册表的引入，使得开发中可以用搭积木的方式接入模型，兼容多种task；
-    
+
 *   新设计的AutoModel接口，统一modelscope、huggingface与funasr推理与训练接口，支持自由选择下载仓库；
-    
+
 *   支持模型导出，demo级别服务部署，以及工业级别多并发服务部署；
-    
+
 *   统一学术与工业模型推理训练脚本；
-    
+
 
 # 快速上手
 
@@ -51,19 +51,19 @@ model = AutoModel(model=[str], device=[str], ncpu=[int], output_dir=[str], batch
 ```
 
 *   `model`(str): [模型仓库](https://github.com/alibaba-damo-academy/FunASR/tree/main/model_zoo) 中的模型名称，或本地磁盘中的模型路径
-    
+
 *   `device`(str): `cuda:0`（默认gpu0），使用 GPU 进行推理，指定。如果为`cpu`，则使用 CPU 进行推理
-    
+
 *   `ncpu`(int): `4` （默认），设置用于 CPU 内部操作并行性的线程数
-    
+
 *   `output_dir`(str): `None` （默认），如果设置，输出结果的输出路径
-    
+
 *   `batch_size`(int): `1` （默认），解码时的批处理，样本个数
-    
+
 *   `hub`(str)：`ms`（默认），从modelscope下载模型。如果为`hf`，从huggingface下载模型。
-    
+
 *   `**kwargs`(dict): 所有在`config.yaml`中参数，均可以直接在此处指定，例如，vad模型中最大切割长度 `max_single_segment_time=6000` （毫秒）。
-    
+
 
 #### AutoModel 推理
 
@@ -72,13 +72,13 @@ res = model.generate(input=[str], output_dir=[str])
 ```
 
 *   *   wav文件路径, 例如: asr\_example.wav
-        
+
     *   pcm文件路径, 例如: asr\_example.pcm，此时需要指定音频采样率fs（默认为16000）
-        
+
     *   音频字节数流，例如：麦克风的字节数数据
-        
+
     *   wav.scp，kaldi 样式的 wav 列表 (`wav_id \t wav_path`), 例如:
-        
+
 
 ```plaintext
 asr_example1  ./audios/asr_example1.wav
@@ -89,13 +89,13 @@ asr_example2  ./audios/asr_example2.wav
 在这种输入 
 
 *   音频采样点，例如：`audio, rate = soundfile.read("asr_example_zh.wav")`, 数据类型为 numpy.ndarray。支持batch输入，类型为list： `[audio_sample1, audio_sample2, ..., audio_sampleN]`
-    
+
 *   fbank输入，支持组batch。shape为\[batch, frames, dim\]，类型为torch.Tensor，例如
-    
+
 *   `output_dir`: None （默认），如果设置，输出结果的输出路径
-    
+
 *   `**kwargs`(dict): 与模型相关的推理参数，例如，`beam_size=10`，`decoding_ctc_weight=0.1`。
-    
+
 
 详细文档链接：[https://github.com/modelscope/FunASR/blob/main/examples/README\_zh.md](https://github.com/modelscope/FunASR/blob/main/examples/README_zh.md)
 
@@ -128,7 +128,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 
@@ -208,10 +208,10 @@ scheduler_conf:
   "model": {"type" : "funasr"},
   "pipeline": {"type":"funasr-pipeline"},
   "model_name_in_hub": {
-    "ms":"", 
+    "ms":"",
     "hf":""},
   "file_path_metas": {
-    "init_param":"model.pt", 
+    "init_param":"model.pt",
     "config":"config.yaml",
     "tokenizer_conf": {"bpemodel": "chn_jpn_yue_eng_ko_spectok.bpe.model"},
     "frontend_conf":{"cmvn_file": "am.mvn"}}
@@ -274,7 +274,7 @@ class SenseVoiceSmall(nn.Module):
   def forward(
       self,
       **kwargs,
-  ):  
+  ):
 
   def inference(
       self,
@@ -320,9 +320,9 @@ from funasr.models.sense_voice.model import *
 ## 注册原则
 
 *   Model：模型之间互相独立，每一个模型，都需要在funasr/models/下面新建一个模型目录，不要采用类的继承方法！！！不要从其他模型目录中import，所有需要用到的都单独放到自己的模型目录中！！！不要修改现有的模型代码！！！
-    
+
 *   dataset，frontend，tokenizer，如果能复用现有的，直接复用，如果不能复用，请注册一个新的，再修改，不要修改原来的！！！
-    
+
 
 # 独立仓库
 
@@ -336,8 +336,8 @@ from funasr import AutoModel
 # trust_remote_code：`True` 表示 model 代码实现从 `remote_code` 处加载，`remote_code` 指定 `model` 具体代码的位置（例如，当前目录下的 `model.py`），支持绝对路径与相对路径，以及网络 url。
 model = AutoModel(
     model="iic/SenseVoiceSmall",
-    trust_remote_code=True,  
-    remote_code="./model.py", 
+    trust_remote_code=True,
+    remote_code="./model.py",
 )
 ```
 
@@ -360,4 +360,4 @@ res = m.inference(
 print(text)
 ```
 
-微调参考：[https://github.com/FunAudioLLM/SenseVoice/blob/main/finetune.sh](https://github.com/FunAudioLLM/SenseVoice/blob/main/finetune.sh)
\ No newline at end of file
+微调参考：[https://github.com/FunAudioLLM/SenseVoice/blob/main/finetune.sh](https://github.com/FunAudioLLM/SenseVoice/blob/main/finetune.sh)
diff --git a/examples/industrial_data_pretraining/sanm_kws/conf/sanm_6e_320_256_fdim40_t2602.yaml b/examples/industrial_data_pretraining/sanm_kws/conf/sanm_6e_320_256_fdim40_t2602.yaml
index c4d8c1826..4fa8f35ed 100644
--- a/examples/industrial_data_pretraining/sanm_kws/conf/sanm_6e_320_256_fdim40_t2602.yaml
+++ b/examples/industrial_data_pretraining/sanm_kws/conf/sanm_6e_320_256_fdim40_t2602.yaml
@@ -18,7 +18,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 # frontend related
diff --git a/examples/industrial_data_pretraining/sanm_kws_streaming/conf/sanm_6e_320_256_fdim40_t2602.yaml b/examples/industrial_data_pretraining/sanm_kws_streaming/conf/sanm_6e_320_256_fdim40_t2602.yaml
index 664997c10..28b3ccbf0 100644
--- a/examples/industrial_data_pretraining/sanm_kws_streaming/conf/sanm_6e_320_256_fdim40_t2602.yaml
+++ b/examples/industrial_data_pretraining/sanm_kws_streaming/conf/sanm_6e_320_256_fdim40_t2602.yaml
@@ -18,7 +18,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
     chunk_size:
     - 16
diff --git a/funasr/models/bicif_paraformer/template.yaml b/funasr/models/bicif_paraformer/template.yaml
index db7ce5555..710938c3d 100644
--- a/funasr/models/bicif_paraformer/template.yaml
+++ b/funasr/models/bicif_paraformer/template.yaml
@@ -30,7 +30,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 # decoder
@@ -45,7 +45,7 @@ decoder_conf:
     src_attention_dropout_rate: 0.1
     att_layer_num: 16
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
 
 predictor: CifPredictorV3
 predictor_conf:
diff --git a/funasr/models/contextual_paraformer/decoder.py b/funasr/models/contextual_paraformer/decoder.py
index ba2ce9ade..958c46b54 100644
--- a/funasr/models/contextual_paraformer/decoder.py
+++ b/funasr/models/contextual_paraformer/decoder.py
@@ -137,7 +137,7 @@ class ContextualParaformerDecoder(ParaformerSANMDecoder):
         concat_after: bool = False,
         att_layer_num: int = 6,
         kernel_size: int = 21,
-        sanm_shfit: int = 0,
+        sanm_shift: int = 0,
     ):
         super().__init__(
             vocab_size=vocab_size,
@@ -179,14 +179,14 @@ class ContextualParaformerDecoder(ParaformerSANMDecoder):
 
         self.att_layer_num = att_layer_num
         self.num_blocks = num_blocks
-        if sanm_shfit is None:
-            sanm_shfit = (kernel_size - 1) // 2
+        if sanm_shift is None:
+            sanm_shift = (kernel_size - 1) // 2
         self.decoders = repeat(
             att_layer_num - 1,
             lambda lnum: DecoderLayerSANM(
                 attention_dim,
                 MultiHeadedAttentionSANMDecoder(
-                    attention_dim, self_attention_dropout_rate, kernel_size, sanm_shfit=sanm_shfit
+                    attention_dim, self_attention_dropout_rate, kernel_size, sanm_shift=sanm_shift
                 ),
                 MultiHeadedAttentionCrossAtt(
                     attention_heads, attention_dim, src_attention_dropout_rate
@@ -210,7 +210,7 @@ class ContextualParaformerDecoder(ParaformerSANMDecoder):
         self.last_decoder = ContextualDecoderLayer(
             attention_dim,
             MultiHeadedAttentionSANMDecoder(
-                attention_dim, self_attention_dropout_rate, kernel_size, sanm_shfit=sanm_shfit
+                attention_dim, self_attention_dropout_rate, kernel_size, sanm_shift=sanm_shift
             ),
             MultiHeadedAttentionCrossAtt(
                 attention_heads, attention_dim, src_attention_dropout_rate
@@ -228,7 +228,7 @@ class ContextualParaformerDecoder(ParaformerSANMDecoder):
                 lambda lnum: DecoderLayerSANM(
                     attention_dim,
                     MultiHeadedAttentionSANMDecoder(
-                        attention_dim, self_attention_dropout_rate, kernel_size, sanm_shfit=0
+                        attention_dim, self_attention_dropout_rate, kernel_size, sanm_shift=0
                     ),
                     None,
                     PositionwiseFeedForwardDecoderSANM(attention_dim, linear_units, dropout_rate),
diff --git a/funasr/models/contextual_paraformer/template.yaml b/funasr/models/contextual_paraformer/template.yaml
index 22052500c..17e542a65 100644
--- a/funasr/models/contextual_paraformer/template.yaml
+++ b/funasr/models/contextual_paraformer/template.yaml
@@ -30,7 +30,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 
@@ -46,7 +46,7 @@ decoder_conf:
     src_attention_dropout_rate: 0.1
     att_layer_num: 16
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
 
 predictor: CifPredictorV2
 predictor_conf:
@@ -126,4 +126,4 @@ ctc_conf:
     ctc_type: builtin
     reduce: true
     ignore_nan_grad: true
-normalize: null
\ No newline at end of file
+normalize: null
diff --git a/funasr/models/ct_transformer/template.yaml b/funasr/models/ct_transformer/template.yaml
index 2538e6b1d..7ad6e697d 100644
--- a/funasr/models/ct_transformer/template.yaml
+++ b/funasr/models/ct_transformer/template.yaml
@@ -41,7 +41,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
     padding_idx: 0
 
diff --git a/funasr/models/ct_transformer_streaming/attention.py b/funasr/models/ct_transformer_streaming/attention.py
index 97e770bbb..be7113f89 100644
--- a/funasr/models/ct_transformer_streaming/attention.py
+++ b/funasr/models/ct_transformer_streaming/attention.py
@@ -11,9 +11,9 @@ class MultiHeadedAttentionSANMwithMask(MultiHeadedAttentionSANM):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-    def forward(self, x, mask, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
+    def forward(self, x, mask, mask_shift_chunk=None, mask_att_chunk_encoder=None):
         q_h, k_h, v_h, v = self.forward_qkv(x)
-        fsmn_memory = self.forward_fsmn(v, mask[0], mask_shfit_chunk)
+        fsmn_memory = self.forward_fsmn(v, mask[0], mask_shift_chunk)
         q_h = q_h * self.d_k ** (-0.5)
         scores = torch.matmul(q_h, k_h.transpose(-2, -1))
         att_outs = self.forward_attention(v_h, scores, mask[1], mask_att_chunk_encoder)
diff --git a/funasr/models/ct_transformer_streaming/encoder.py b/funasr/models/ct_transformer_streaming/encoder.py
index a61319aa4..7d09875fb 100644
--- a/funasr/models/ct_transformer_streaming/encoder.py
+++ b/funasr/models/ct_transformer_streaming/encoder.py
@@ -56,7 +56,7 @@ class EncoderLayerSANM(torch.nn.Module):
         self.stochastic_depth_rate = stochastic_depth_rate
         self.dropout_rate = dropout_rate
 
-    def forward(self, x, mask, cache=None, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
+    def forward(self, x, mask, cache=None, mask_shift_chunk=None, mask_att_chunk_encoder=None):
         """Compute encoded features.
 
         Args:
@@ -93,7 +93,7 @@ class EncoderLayerSANM(torch.nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     ),
                 ),
@@ -109,7 +109,7 @@ class EncoderLayerSANM(torch.nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     )
                 )
@@ -118,7 +118,7 @@ class EncoderLayerSANM(torch.nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     )
                 )
@@ -132,7 +132,7 @@ class EncoderLayerSANM(torch.nn.Module):
         if not self.normalize_before:
             x = self.norm2(x)
 
-        return x, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder
+        return x, mask, cache, mask_shift_chunk, mask_att_chunk_encoder
 
     def forward_chunk(self, x, cache=None, chunk_size=None, look_back=0):
         """Compute encoded features.
@@ -198,7 +198,7 @@ class SANMVadEncoder(torch.nn.Module):
         interctc_layer_idx: List[int] = [],
         interctc_use_conditioning: bool = False,
         kernel_size: int = 11,
-        sanm_shfit: int = 0,
+        sanm_shift: int = 0,
         selfattention_layer_type: str = "sanm",
     ):
         super().__init__()
@@ -277,7 +277,7 @@ class SANMVadEncoder(torch.nn.Module):
                 output_size,
                 attention_dropout_rate,
                 kernel_size,
-                sanm_shfit,
+                sanm_shift,
             )
 
             encoder_selfattn_layer_args = (
@@ -286,7 +286,7 @@ class SANMVadEncoder(torch.nn.Module):
                 output_size,
                 attention_dropout_rate,
                 kernel_size,
-                sanm_shfit,
+                sanm_shift,
             )
 
         self.encoders0 = repeat(
diff --git a/funasr/models/ct_transformer_streaming/template.yaml b/funasr/models/ct_transformer_streaming/template.yaml
index 2477ac2be..ae59b4446 100644
--- a/funasr/models/ct_transformer_streaming/template.yaml
+++ b/funasr/models/ct_transformer_streaming/template.yaml
@@ -41,10 +41,10 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 5
+    sanm_shift: 5
     selfattention_layer_type: sanm
     padding_idx: 0
 
 tokenizer: CharTokenizer
 tokenizer_conf:
-  unk_symbol: <unk>
\ No newline at end of file
+  unk_symbol: <unk>
diff --git a/funasr/models/monotonic_aligner/template.yaml b/funasr/models/monotonic_aligner/template.yaml
index f8d5ded7d..82abf35c6 100644
--- a/funasr/models/monotonic_aligner/template.yaml
+++ b/funasr/models/monotonic_aligner/template.yaml
@@ -25,7 +25,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 predictor: CifPredictorV3
@@ -111,5 +111,5 @@ ctc_conf:
     ctc_type: builtin
     reduce: true
     ignore_nan_grad: true
-    
+
 normalize: null
diff --git a/funasr/models/paraformer/decoder.py b/funasr/models/paraformer/decoder.py
index 7edd91a2a..fafb8d41a 100644
--- a/funasr/models/paraformer/decoder.py
+++ b/funasr/models/paraformer/decoder.py
@@ -248,7 +248,7 @@ class ParaformerSANMDecoder(BaseTransformerDecoder):
         concat_after: bool = False,
         att_layer_num: int = 6,
         kernel_size: int = 21,
-        sanm_shfit: int = 0,
+        sanm_shift: int = 0,
         lora_list: List[str] = None,
         lora_rank: int = 8,
         lora_alpha: int = 16,
@@ -298,14 +298,14 @@ class ParaformerSANMDecoder(BaseTransformerDecoder):
 
         self.att_layer_num = att_layer_num
         self.num_blocks = num_blocks
-        if sanm_shfit is None:
-            sanm_shfit = (kernel_size - 1) // 2
+        if sanm_shift is None:
+            sanm_shift = (kernel_size - 1) // 2
         self.decoders = repeat(
             att_layer_num,
             lambda lnum: DecoderLayerSANM(
                 attention_dim,
                 MultiHeadedAttentionSANMDecoder(
-                    attention_dim, self_attention_dropout_rate, kernel_size, sanm_shfit=sanm_shfit
+                    attention_dim, self_attention_dropout_rate, kernel_size, sanm_shift=sanm_shift
                 ),
                 MultiHeadedAttentionCrossAtt(
                     attention_heads,
@@ -330,7 +330,7 @@ class ParaformerSANMDecoder(BaseTransformerDecoder):
                 lambda lnum: DecoderLayerSANM(
                     attention_dim,
                     MultiHeadedAttentionSANMDecoder(
-                        attention_dim, self_attention_dropout_rate, kernel_size, sanm_shfit=0
+                        attention_dim, self_attention_dropout_rate, kernel_size, sanm_shift=0
                     ),
                     None,
                     PositionwiseFeedForwardDecoderSANM(attention_dim, linear_units, dropout_rate),
@@ -785,20 +785,20 @@ class ParaformerSANMDecoderExport(torch.nn.Module):
             for _ in range(cache_num)
         ]
         return (tgt, memory, pre_acoustic_embeds, cache)
-    
+
     def is_optimizable(self):
         return True
-    
+
     def get_input_names(self):
         cache_num = len(self.model.decoders) + len(self.model.decoders2)
         return ['tgt', 'memory', 'pre_acoustic_embeds'] \
                + ['cache_%d' % i for i in range(cache_num)]
-    
+
     def get_output_names(self):
         cache_num = len(self.model.decoders) + len(self.model.decoders2)
         return ['y'] \
                + ['out_cache_%d' % i for i in range(cache_num)]
-    
+
     def get_dynamic_axes(self):
         ret = {
             'tgt': {
diff --git a/funasr/models/paraformer/template.yaml b/funasr/models/paraformer/template.yaml
index 249e88ca6..170c10be4 100644
--- a/funasr/models/paraformer/template.yaml
+++ b/funasr/models/paraformer/template.yaml
@@ -29,7 +29,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 # decoder
@@ -44,7 +44,7 @@ decoder_conf:
     src_attention_dropout_rate: 0.1
     att_layer_num: 16
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
 
 predictor: CifPredictorV2
 predictor_conf:
diff --git a/funasr/models/paraformer_streaming/model.py b/funasr/models/paraformer_streaming/model.py
index 16021ceb6..bbc9668d0 100644
--- a/funasr/models/paraformer_streaming/model.py
+++ b/funasr/models/paraformer_streaming/model.py
@@ -198,10 +198,10 @@ class ParaformerStreaming(Paraformer):
             mask_chunk_predictor = self.encoder.overlap_chunk_cls.get_mask_chunk_predictor(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            mask_shfit_chunk = self.encoder.overlap_chunk_cls.get_mask_shfit_chunk(
+            mask_shift_chunk = self.encoder.overlap_chunk_cls.get_mask_shift_chunk(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            encoder_out = encoder_out * mask_shfit_chunk
+            encoder_out = encoder_out * mask_shift_chunk
         pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor(
             encoder_out,
             ys_pad,
@@ -357,10 +357,10 @@ class ParaformerStreaming(Paraformer):
             mask_chunk_predictor = self.encoder.overlap_chunk_cls.get_mask_chunk_predictor(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            mask_shfit_chunk = self.encoder.overlap_chunk_cls.get_mask_shfit_chunk(
+            mask_shift_chunk = self.encoder.overlap_chunk_cls.get_mask_shift_chunk(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            encoder_out = encoder_out * mask_shfit_chunk
+            encoder_out = encoder_out * mask_shift_chunk
         pre_acoustic_embeds, pre_token_length, pre_alphas, pre_peak_index = self.predictor(
             encoder_out,
             None,
diff --git a/funasr/models/paraformer_streaming/template.yaml b/funasr/models/paraformer_streaming/template.yaml
index 889971ad1..44cbbc7bb 100644
--- a/funasr/models/paraformer_streaming/template.yaml
+++ b/funasr/models/paraformer_streaming/template.yaml
@@ -29,7 +29,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
     chunk_size:
     - 12
@@ -59,7 +59,7 @@ decoder_conf:
     src_attention_dropout_rate: 0.1
     att_layer_num: 16
     kernel_size: 11
-    sanm_shfit: 5
+    sanm_shift: 5
 
 predictor: CifPredictorV2
 predictor_conf:
diff --git a/funasr/models/sanm/attention.py b/funasr/models/sanm/attention.py
index 47d60cb67..a9bb70fe8 100644
--- a/funasr/models/sanm/attention.py
+++ b/funasr/models/sanm/attention.py
@@ -154,7 +154,7 @@ class MultiHeadedAttentionSANM(nn.Module):
         n_feat,
         dropout_rate,
         kernel_size,
-        sanm_shfit=0,
+        sanm_shift=0,
         lora_list=None,
         lora_rank=8,
         lora_alpha=16,
@@ -199,17 +199,17 @@ class MultiHeadedAttentionSANM(nn.Module):
         )
         # padding
         left_padding = (kernel_size - 1) // 2
-        if sanm_shfit > 0:
-            left_padding = left_padding + sanm_shfit
+        if sanm_shift > 0:
+            left_padding = left_padding + sanm_shift
         right_padding = kernel_size - 1 - left_padding
         self.pad_fn = nn.ConstantPad1d((left_padding, right_padding), 0.0)
 
-    def forward_fsmn(self, inputs, mask, mask_shfit_chunk=None):
+    def forward_fsmn(self, inputs, mask, mask_shift_chunk=None):
         b, t, d = inputs.size()
         if mask is not None:
             mask = torch.reshape(mask, (b, -1, 1))
-            if mask_shfit_chunk is not None:
-                mask = mask * mask_shfit_chunk
+            if mask_shift_chunk is not None:
+                mask = mask * mask_shift_chunk
             inputs = inputs * mask
 
         x = inputs.transpose(1, 2)
@@ -289,7 +289,7 @@ class MultiHeadedAttentionSANM(nn.Module):
 
         return self.linear_out(x)  # (batch, time1, d_model)
 
-    def forward(self, x, mask, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
+    def forward(self, x, mask, mask_shift_chunk=None, mask_att_chunk_encoder=None):
         """Compute scaled dot product attention.
 
         Args:
@@ -304,7 +304,7 @@ class MultiHeadedAttentionSANM(nn.Module):
 
         """
         q_h, k_h, v_h, v = self.forward_qkv(x)
-        fsmn_memory = self.forward_fsmn(v, mask, mask_shfit_chunk)
+        fsmn_memory = self.forward_fsmn(v, mask, mask_shift_chunk)
         q_h = q_h * self.d_k ** (-0.5)
         scores = torch.matmul(q_h, k_h.transpose(-2, -1))
         att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)
@@ -478,7 +478,7 @@ class MultiHeadedAttentionSANMDecoder(nn.Module):
 
     """
 
-    def __init__(self, n_feat, dropout_rate, kernel_size, sanm_shfit=0):
+    def __init__(self, n_feat, dropout_rate, kernel_size, sanm_shift=0):
         """Construct an MultiHeadedAttention object."""
         super().__init__()
 
@@ -490,13 +490,13 @@ class MultiHeadedAttentionSANMDecoder(nn.Module):
         # padding
         # padding
         left_padding = (kernel_size - 1) // 2
-        if sanm_shfit > 0:
-            left_padding = left_padding + sanm_shfit
+        if sanm_shift > 0:
+            left_padding = left_padding + sanm_shift
         right_padding = kernel_size - 1 - left_padding
         self.pad_fn = nn.ConstantPad1d((left_padding, right_padding), 0.0)
         self.kernel_size = kernel_size
 
-    def forward(self, inputs, mask, cache=None, mask_shfit_chunk=None):
+    def forward(self, inputs, mask, cache=None, mask_shift_chunk=None):
         """
         :param x: (#batch, time1, size).
         :param mask: Mask tensor (#batch, 1, time)
@@ -509,9 +509,9 @@ class MultiHeadedAttentionSANMDecoder(nn.Module):
         if mask is not None:
             mask = torch.reshape(mask, (b, -1, 1))
             # logging.info("in fsmn, mask: {}, {}".format(mask.size(), mask[0:100:50, :, :]))
-            if mask_shfit_chunk is not None:
-                # logging.info("in fsmn, mask_fsmn: {}, {}".format(mask_shfit_chunk.size(), mask_shfit_chunk[0:100:50, :, :]))
-                mask = mask * mask_shfit_chunk
+            if mask_shift_chunk is not None:
+                # logging.info("in fsmn, mask_fsmn: {}, {}".format(mask_shift_chunk.size(), mask_shift_chunk[0:100:50, :, :]))
+                mask = mask * mask_shift_chunk
             # logging.info("in fsmn, mask_after_fsmn: {}, {}".format(mask.size(), mask[0:100:50, :, :]))
             # print("in fsmn, mask", mask.size())
             # print("in fsmn, inputs", inputs.size())
diff --git a/funasr/models/sanm/decoder.py b/funasr/models/sanm/decoder.py
index 1a4fb26e2..01a5f0ece 100644
--- a/funasr/models/sanm/decoder.py
+++ b/funasr/models/sanm/decoder.py
@@ -226,7 +226,7 @@ class FsmnDecoder(BaseTransformerDecoder):
         concat_after: bool = False,
         att_layer_num: int = 6,
         kernel_size: int = 21,
-        sanm_shfit: int = None,
+        sanm_shift: int = None,
         concat_embeds: bool = False,
         attention_dim: int = None,
         tf2torch_tensor_name_prefix_torch: str = "decoder",
@@ -271,14 +271,14 @@ class FsmnDecoder(BaseTransformerDecoder):
 
         self.att_layer_num = att_layer_num
         self.num_blocks = num_blocks
-        if sanm_shfit is None:
-            sanm_shfit = (kernel_size - 1) // 2
+        if sanm_shift is None:
+            sanm_shift = (kernel_size - 1) // 2
         self.decoders = repeat(
             att_layer_num,
             lambda lnum: DecoderLayerSANM(
                 attention_dim,
                 MultiHeadedAttentionSANMDecoder(
-                    attention_dim, self_attention_dropout_rate, kernel_size, sanm_shfit=sanm_shfit
+                    attention_dim, self_attention_dropout_rate, kernel_size, sanm_shift=sanm_shift
                 ),
                 MultiHeadedAttentionCrossAtt(
                     attention_heads,
@@ -303,7 +303,7 @@ class FsmnDecoder(BaseTransformerDecoder):
                         attention_dim,
                         self_attention_dropout_rate,
                         kernel_size,
-                        sanm_shfit=sanm_shfit,
+                        sanm_shift=sanm_shift,
                     ),
                     None,
                     PositionwiseFeedForwardDecoderSANM(attention_dim, linear_units, dropout_rate),
diff --git a/funasr/models/sanm/encoder.py b/funasr/models/sanm/encoder.py
index 0d39ca742..b590e2489 100644
--- a/funasr/models/sanm/encoder.py
+++ b/funasr/models/sanm/encoder.py
@@ -69,7 +69,7 @@ class EncoderLayerSANM(nn.Module):
         self.stochastic_depth_rate = stochastic_depth_rate
         self.dropout_rate = dropout_rate
 
-    def forward(self, x, mask, cache=None, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
+    def forward(self, x, mask, cache=None, mask_shift_chunk=None, mask_att_chunk_encoder=None):
         """Compute encoded features.
 
         Args:
@@ -106,7 +106,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     ),
                 ),
@@ -122,7 +122,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     )
                 )
@@ -131,7 +131,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     )
                 )
@@ -145,7 +145,7 @@ class EncoderLayerSANM(nn.Module):
         if not self.normalize_before:
             x = self.norm2(x)
 
-        return x, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder
+        return x, mask, cache, mask_shift_chunk, mask_att_chunk_encoder
 
     def forward_chunk(self, x, cache=None, chunk_size=None, look_back=0):
         """Compute encoded features.
@@ -212,7 +212,7 @@ class SANMEncoder(nn.Module):
         interctc_layer_idx: List[int] = [],
         interctc_use_conditioning: bool = False,
         kernel_size: int = 11,
-        sanm_shfit: int = 0,
+        sanm_shift: int = 0,
         lora_list: List[str] = None,
         lora_rank: int = 8,
         lora_alpha: int = 16,
@@ -299,7 +299,7 @@ class SANMEncoder(nn.Module):
                 output_size,
                 attention_dropout_rate,
                 kernel_size,
-                sanm_shfit,
+                sanm_shift,
                 lora_list,
                 lora_rank,
                 lora_alpha,
@@ -312,7 +312,7 @@ class SANMEncoder(nn.Module):
                 output_size,
                 attention_dropout_rate,
                 kernel_size,
-                sanm_shfit,
+                sanm_shift,
                 lora_list,
                 lora_rank,
                 lora_alpha,
diff --git a/funasr/models/sanm/template.yaml b/funasr/models/sanm/template.yaml
index 316fe75cb..987fec2ae 100644
--- a/funasr/models/sanm/template.yaml
+++ b/funasr/models/sanm/template.yaml
@@ -26,7 +26,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 # decoder
@@ -41,7 +41,7 @@ decoder_conf:
     src_attention_dropout_rate: 0.1
     att_layer_num: 16
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
 
 
 
diff --git a/funasr/models/scama/chunk_utilis.py b/funasr/models/scama/chunk_utilis.py
index 2fe3fa4e3..d9b4aa996 100644
--- a/funasr/models/scama/chunk_utilis.py
+++ b/funasr/models/scama/chunk_utilis.py
@@ -21,7 +21,7 @@ class overlap_chunk:
         stride: tuple = (10,),
         pad_left: tuple = (0,),
         encoder_att_look_back_factor: tuple = (1,),
-        shfit_fsmn: int = 0,
+        shift_fsmn: int = 0,
         decoder_att_look_back_factor: tuple = (1,),
     ):
 
@@ -45,11 +45,11 @@ class overlap_chunk:
             encoder_att_look_back_factor,
             decoder_att_look_back_factor,
         )
-        self.shfit_fsmn = shfit_fsmn
+        self.shift_fsmn = shift_fsmn
         self.x_add_mask = None
         self.x_rm_mask = None
         self.x_len = None
-        self.mask_shfit_chunk = None
+        self.mask_shift_chunk = None
         self.mask_chunk_predictor = None
         self.mask_att_chunk_encoder = None
         self.mask_shift_att_chunk_decoder = None
@@ -88,7 +88,7 @@ class overlap_chunk:
             stride,
             pad_left,
             encoder_att_look_back_factor,
-            chunk_size + self.shfit_fsmn,
+            chunk_size + self.shift_fsmn,
             decoder_att_look_back_factor,
         )
         return (
@@ -118,13 +118,13 @@ class overlap_chunk:
             chunk_size, stride, pad_left, encoder_att_look_back_factor, chunk_size_pad_shift = (
                 self.get_chunk_size(ind)
             )
-            shfit_fsmn = self.shfit_fsmn
+            shift_fsmn = self.shift_fsmn
             pad_right = chunk_size - stride - pad_left
 
             chunk_num_batch = np.ceil(x_len / stride).astype(np.int32)
             x_len_chunk = (
                 (chunk_num_batch - 1) * chunk_size_pad_shift
-                + shfit_fsmn
+                + shift_fsmn
                 + pad_left
                 + 0
                 + x_len
@@ -138,13 +138,13 @@ class overlap_chunk:
             max_len_for_x_mask_tmp = max(chunk_size, x_len_max + pad_left)
             x_add_mask = np.zeros([0, max_len_for_x_mask_tmp], dtype=dtype)
             x_rm_mask = np.zeros([max_len_for_x_mask_tmp, 0], dtype=dtype)
-            mask_shfit_chunk = np.zeros([0, num_units], dtype=dtype)
+            mask_shift_chunk = np.zeros([0, num_units], dtype=dtype)
             mask_chunk_predictor = np.zeros([0, num_units_predictor], dtype=dtype)
             mask_shift_att_chunk_decoder = np.zeros([0, 1], dtype=dtype)
             mask_att_chunk_encoder = np.zeros([0, chunk_num * chunk_size_pad_shift], dtype=dtype)
             for chunk_ids in range(chunk_num):
                 # x_mask add
-                fsmn_padding = np.zeros((shfit_fsmn, max_len_for_x_mask_tmp), dtype=dtype)
+                fsmn_padding = np.zeros((shift_fsmn, max_len_for_x_mask_tmp), dtype=dtype)
                 x_mask_cur = np.diag(np.ones(chunk_size, dtype=np.float32))
                 x_mask_pad_left = np.zeros((chunk_size, chunk_ids * stride), dtype=dtype)
                 x_mask_pad_right = np.zeros((chunk_size, max_len_for_x_mask_tmp), dtype=dtype)
@@ -154,7 +154,7 @@ class overlap_chunk:
                 x_add_mask = np.concatenate([x_add_mask, x_add_mask_fsmn], axis=0)
 
                 # x_mask rm
-                fsmn_padding = np.zeros((max_len_for_x_mask_tmp, shfit_fsmn), dtype=dtype)
+                fsmn_padding = np.zeros((max_len_for_x_mask_tmp, shift_fsmn), dtype=dtype)
                 padding_mask_left = np.zeros((max_len_for_x_mask_tmp, pad_left), dtype=dtype)
                 padding_mask_right = np.zeros((max_len_for_x_mask_tmp, pad_right), dtype=dtype)
                 x_mask_cur = np.diag(np.ones(stride, dtype=dtype))
@@ -170,13 +170,13 @@ class overlap_chunk:
                 x_rm_mask = np.concatenate([x_rm_mask, x_rm_mask_cur_fsmn], axis=1)
 
                 # fsmn_padding_mask
-                pad_shfit_mask = np.zeros([shfit_fsmn, num_units], dtype=dtype)
+                pad_shift_mask = np.zeros([shift_fsmn, num_units], dtype=dtype)
                 ones_1 = np.ones([chunk_size, num_units], dtype=dtype)
-                mask_shfit_chunk_cur = np.concatenate([pad_shfit_mask, ones_1], axis=0)
-                mask_shfit_chunk = np.concatenate([mask_shfit_chunk, mask_shfit_chunk_cur], axis=0)
+                mask_shift_chunk_cur = np.concatenate([pad_shift_mask, ones_1], axis=0)
+                mask_shift_chunk = np.concatenate([mask_shift_chunk, mask_shift_chunk_cur], axis=0)
 
                 # predictor mask
-                zeros_1 = np.zeros([shfit_fsmn + pad_left, num_units_predictor], dtype=dtype)
+                zeros_1 = np.zeros([shift_fsmn + pad_left, num_units_predictor], dtype=dtype)
                 ones_2 = np.ones([stride, num_units_predictor], dtype=dtype)
                 zeros_3 = np.zeros(
                     [chunk_size - stride - pad_left, num_units_predictor], dtype=dtype
@@ -188,13 +188,13 @@ class overlap_chunk:
                 )
 
                 # encoder att mask
-                zeros_1_top = np.zeros([shfit_fsmn, chunk_num * chunk_size_pad_shift], dtype=dtype)
+                zeros_1_top = np.zeros([shift_fsmn, chunk_num * chunk_size_pad_shift], dtype=dtype)
 
                 zeros_2_num = max(chunk_ids - encoder_att_look_back_factor, 0)
                 zeros_2 = np.zeros([chunk_size, zeros_2_num * chunk_size_pad_shift], dtype=dtype)
 
                 encoder_att_look_back_num = max(chunk_ids - zeros_2_num, 0)
-                zeros_2_left = np.zeros([chunk_size, shfit_fsmn], dtype=dtype)
+                zeros_2_left = np.zeros([chunk_size, shift_fsmn], dtype=dtype)
                 ones_2_mid = np.ones([stride, stride], dtype=dtype)
                 zeros_2_bottom = np.zeros([chunk_size - stride, stride], dtype=dtype)
                 zeros_2_right = np.zeros([chunk_size, chunk_size - stride], dtype=dtype)
@@ -202,7 +202,7 @@ class overlap_chunk:
                 ones_2 = np.concatenate([zeros_2_left, ones_2, zeros_2_right], axis=1)
                 ones_2 = np.tile(ones_2, [1, encoder_att_look_back_num])
 
-                zeros_3_left = np.zeros([chunk_size, shfit_fsmn], dtype=dtype)
+                zeros_3_left = np.zeros([chunk_size, shift_fsmn], dtype=dtype)
                 ones_3_right = np.ones([chunk_size, chunk_size], dtype=dtype)
                 ones_3 = np.concatenate([zeros_3_left, ones_3_right], axis=1)
 
@@ -218,7 +218,7 @@ class overlap_chunk:
                 )
 
                 # decoder fsmn_shift_att_mask
-                zeros_1 = np.zeros([shfit_fsmn, 1])
+                zeros_1 = np.zeros([shift_fsmn, 1])
                 ones_1 = np.ones([chunk_size, 1])
                 mask_shift_att_chunk_decoder_cur = np.concatenate([zeros_1, ones_1], axis=0)
                 mask_shift_att_chunk_decoder = np.concatenate(
@@ -229,7 +229,7 @@ class overlap_chunk:
             self.x_len_chunk = x_len_chunk
             self.x_rm_mask = x_rm_mask[:x_len_max, :x_len_chunk_max]
             self.x_len = x_len
-            self.mask_shfit_chunk = mask_shfit_chunk[:x_len_chunk_max, :]
+            self.mask_shift_chunk = mask_shift_chunk[:x_len_chunk_max, :]
             self.mask_chunk_predictor = mask_chunk_predictor[:x_len_chunk_max, :]
             self.mask_att_chunk_encoder = mask_att_chunk_encoder[:x_len_chunk_max, :x_len_chunk_max]
             self.mask_shift_att_chunk_decoder = mask_shift_att_chunk_decoder[:x_len_chunk_max, :]
@@ -238,7 +238,7 @@ class overlap_chunk:
                 self.x_len_chunk,
                 self.x_rm_mask,
                 self.x_len,
-                self.mask_shfit_chunk,
+                self.mask_shift_chunk,
                 self.mask_chunk_predictor,
                 self.mask_att_chunk_encoder,
                 self.mask_shift_att_chunk_decoder,
@@ -309,7 +309,7 @@ class overlap_chunk:
             x = torch.from_numpy(x).type(dtype).to(device)
         return x
 
-    def get_mask_shfit_chunk(
+    def get_mask_shift_chunk(
         self, chunk_outs=None, device="cpu", batch_size=1, num_units=1, idx=4, dtype=torch.float32
     ):
         with torch.no_grad():
diff --git a/funasr/models/scama/decoder.py b/funasr/models/scama/decoder.py
index 31b235778..f457b75bf 100644
--- a/funasr/models/scama/decoder.py
+++ b/funasr/models/scama/decoder.py
@@ -226,7 +226,7 @@ class FsmnDecoderSCAMAOpt(BaseTransformerDecoder):
         concat_after: bool = False,
         att_layer_num: int = 6,
         kernel_size: int = 21,
-        sanm_shfit: int = None,
+        sanm_shift: int = None,
         concat_embeds: bool = False,
         attention_dim: int = None,
         tf2torch_tensor_name_prefix_torch: str = "decoder",
@@ -271,14 +271,14 @@ class FsmnDecoderSCAMAOpt(BaseTransformerDecoder):
 
         self.att_layer_num = att_layer_num
         self.num_blocks = num_blocks
-        if sanm_shfit is None:
-            sanm_shfit = (kernel_size - 1) // 2
+        if sanm_shift is None:
+            sanm_shift = (kernel_size - 1) // 2
         self.decoders = repeat(
             att_layer_num,
             lambda lnum: DecoderLayerSANM(
                 attention_dim,
                 MultiHeadedAttentionSANMDecoder(
-                    attention_dim, self_attention_dropout_rate, kernel_size, sanm_shfit=sanm_shfit
+                    attention_dim, self_attention_dropout_rate, kernel_size, sanm_shift=sanm_shift
                 ),
                 MultiHeadedAttentionCrossAtt(
                     attention_heads,
@@ -303,7 +303,7 @@ class FsmnDecoderSCAMAOpt(BaseTransformerDecoder):
                         attention_dim,
                         self_attention_dropout_rate,
                         kernel_size,
-                        sanm_shfit=sanm_shfit,
+                        sanm_shift=sanm_shift,
                     ),
                     None,
                     PositionwiseFeedForwardDecoderSANM(attention_dim, linear_units, dropout_rate),
diff --git a/funasr/models/scama/encoder.py b/funasr/models/scama/encoder.py
index e1fe9242e..0c871e107 100644
--- a/funasr/models/scama/encoder.py
+++ b/funasr/models/scama/encoder.py
@@ -69,7 +69,7 @@ class EncoderLayerSANM(nn.Module):
         self.stochastic_depth_rate = stochastic_depth_rate
         self.dropout_rate = dropout_rate
 
-    def forward(self, x, mask, cache=None, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
+    def forward(self, x, mask, cache=None, mask_shift_chunk=None, mask_att_chunk_encoder=None):
         """Compute encoded features.
 
         Args:
@@ -106,7 +106,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     ),
                 ),
@@ -122,7 +122,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     )
                 )
@@ -131,7 +131,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     )
                 )
@@ -145,7 +145,7 @@ class EncoderLayerSANM(nn.Module):
         if not self.normalize_before:
             x = self.norm2(x)
 
-        return x, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder
+        return x, mask, cache, mask_shift_chunk, mask_att_chunk_encoder
 
     def forward_chunk(self, x, cache=None, chunk_size=None, look_back=0):
         """Compute encoded features.
@@ -212,7 +212,7 @@ class SANMEncoderChunkOpt(nn.Module):
         interctc_layer_idx: List[int] = [],
         interctc_use_conditioning: bool = False,
         kernel_size: int = 11,
-        sanm_shfit: int = 0,
+        sanm_shift: int = 0,
         selfattention_layer_type: str = "sanm",
         chunk_size: Union[int, Sequence[int]] = (16,),
         stride: Union[int, Sequence[int]] = (10,),
@@ -299,7 +299,7 @@ class SANMEncoderChunkOpt(nn.Module):
                 output_size,
                 attention_dropout_rate,
                 kernel_size,
-                sanm_shfit,
+                sanm_shift,
             )
 
             encoder_selfattn_layer_args = (
@@ -308,7 +308,7 @@ class SANMEncoderChunkOpt(nn.Module):
                 output_size,
                 attention_dropout_rate,
                 kernel_size,
-                sanm_shfit,
+                sanm_shift,
             )
         self.encoders0 = repeat(
             1,
@@ -343,12 +343,12 @@ class SANMEncoderChunkOpt(nn.Module):
             assert 0 < min(interctc_layer_idx) and max(interctc_layer_idx) < num_blocks
         self.interctc_use_conditioning = interctc_use_conditioning
         self.conditioning_layer = None
-        shfit_fsmn = (kernel_size - 1) // 2
+        shift_fsmn = (kernel_size - 1) // 2
         self.overlap_chunk_cls = overlap_chunk(
             chunk_size=chunk_size,
             stride=stride,
             pad_left=pad_left,
-            shfit_fsmn=shfit_fsmn,
+            shift_fsmn=shift_fsmn,
             encoder_att_look_back_factor=encoder_att_look_back_factor,
             decoder_att_look_back_factor=decoder_att_look_back_factor,
         )
@@ -397,31 +397,31 @@ class SANMEncoderChunkOpt(nn.Module):
         else:
             xs_pad = self.embed(xs_pad)
 
-        mask_shfit_chunk, mask_att_chunk_encoder = None, None
+        mask_shift_chunk, mask_att_chunk_encoder = None, None
         if self.overlap_chunk_cls is not None:
             ilens = masks.squeeze(1).sum(1)
             chunk_outs = self.overlap_chunk_cls.gen_chunk_mask(ilens, ind)
             xs_pad, ilens = self.overlap_chunk_cls.split_chunk(xs_pad, ilens, chunk_outs=chunk_outs)
             masks = (~make_pad_mask(ilens)[:, None, :]).to(xs_pad.device)
-            mask_shfit_chunk = self.overlap_chunk_cls.get_mask_shfit_chunk(
+            mask_shift_chunk = self.overlap_chunk_cls.get_mask_shift_chunk(
                 chunk_outs, xs_pad.device, xs_pad.size(0), dtype=xs_pad.dtype
             )
             mask_att_chunk_encoder = self.overlap_chunk_cls.get_mask_att_chunk_encoder(
                 chunk_outs, xs_pad.device, xs_pad.size(0), dtype=xs_pad.dtype
             )
 
-        encoder_outs = self.encoders0(xs_pad, masks, None, mask_shfit_chunk, mask_att_chunk_encoder)
+        encoder_outs = self.encoders0(xs_pad, masks, None, mask_shift_chunk, mask_att_chunk_encoder)
         xs_pad, masks = encoder_outs[0], encoder_outs[1]
         intermediate_outs = []
         if len(self.interctc_layer_idx) == 0:
             encoder_outs = self.encoders(
-                xs_pad, masks, None, mask_shfit_chunk, mask_att_chunk_encoder
+                xs_pad, masks, None, mask_shift_chunk, mask_att_chunk_encoder
             )
             xs_pad, masks = encoder_outs[0], encoder_outs[1]
         else:
             for layer_idx, encoder_layer in enumerate(self.encoders):
                 encoder_outs = encoder_layer(
-                    xs_pad, masks, None, mask_shfit_chunk, mask_att_chunk_encoder
+                    xs_pad, masks, None, mask_shift_chunk, mask_att_chunk_encoder
                 )
                 xs_pad, masks = encoder_outs[0], encoder_outs[1]
                 if layer_idx + 1 in self.interctc_layer_idx:
diff --git a/funasr/models/scama/model.py b/funasr/models/scama/model.py
index c15f435a4..4a28435fb 100644
--- a/funasr/models/scama/model.py
+++ b/funasr/models/scama/model.py
@@ -321,10 +321,10 @@ class SCAMA(nn.Module):
             mask_chunk_predictor = self.encoder.overlap_chunk_cls.get_mask_chunk_predictor(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            mask_shfit_chunk = self.encoder.overlap_chunk_cls.get_mask_shfit_chunk(
+            mask_shift_chunk = self.encoder.overlap_chunk_cls.get_mask_shift_chunk(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            encoder_out = encoder_out * mask_shfit_chunk
+            encoder_out = encoder_out * mask_shift_chunk
         pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor(
             encoder_out,
             ys_out_pad,
@@ -415,10 +415,10 @@ class SCAMA(nn.Module):
         mask_chunk_predictor = self.encoder.overlap_chunk_cls.get_mask_chunk_predictor(
             None, device=encoder_out.device, batch_size=encoder_out.size(0)
         )
-        mask_shfit_chunk = self.encoder.overlap_chunk_cls.get_mask_shfit_chunk(
+        mask_shift_chunk = self.encoder.overlap_chunk_cls.get_mask_shift_chunk(
             None, device=encoder_out.device, batch_size=encoder_out.size(0)
         )
-        encoder_out = encoder_out * mask_shfit_chunk
+        encoder_out = encoder_out * mask_shift_chunk
         pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor(
             encoder_out,
             ys_out_pad,
diff --git a/funasr/models/scama/template.yaml b/funasr/models/scama/template.yaml
index bc2e210b2..8e14cd38e 100644
--- a/funasr/models/scama/template.yaml
+++ b/funasr/models/scama/template.yaml
@@ -26,7 +26,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 # decoder
@@ -41,7 +41,7 @@ decoder_conf:
     src_attention_dropout_rate: 0.1
     att_layer_num: 16
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
 
 predictor: CifPredictorV2
 predictor_conf:
diff --git a/funasr/models/seaco_paraformer/template.yaml b/funasr/models/seaco_paraformer/template.yaml
index fcaf5243d..2bf0825eb 100644
--- a/funasr/models/seaco_paraformer/template.yaml
+++ b/funasr/models/seaco_paraformer/template.yaml
@@ -36,7 +36,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
 
 # decoder
@@ -51,7 +51,7 @@ decoder_conf:
     src_attention_dropout_rate: 0.1
     att_layer_num: 16
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
 
 # seaco decoder
 seaco_decoder: ParaformerSANMDecoder
@@ -64,7 +64,7 @@ seaco_decoder_conf:
     self_attention_dropout_rate: 0.1
     src_attention_dropout_rate: 0.1
     kernel_size: 21
-    sanm_shfit: 0
+    sanm_shift: 0
     use_output_layer: false
     wo_input_layer: true
 
diff --git a/funasr/models/sense_voice/model.py b/funasr/models/sense_voice/model.py
index 9d8ef8421..ca0c40a9d 100644
--- a/funasr/models/sense_voice/model.py
+++ b/funasr/models/sense_voice/model.py
@@ -95,7 +95,7 @@ class MultiHeadedAttentionSANM(nn.Module):
         n_feat,
         dropout_rate,
         kernel_size,
-        sanm_shfit=0,
+        sanm_shift=0,
         lora_list=None,
         lora_rank=8,
         lora_alpha=16,
@@ -121,17 +121,17 @@ class MultiHeadedAttentionSANM(nn.Module):
         )
         # padding
         left_padding = (kernel_size - 1) // 2
-        if sanm_shfit > 0:
-            left_padding = left_padding + sanm_shfit
+        if sanm_shift > 0:
+            left_padding = left_padding + sanm_shift
         right_padding = kernel_size - 1 - left_padding
         self.pad_fn = nn.ConstantPad1d((left_padding, right_padding), 0.0)
 
-    def forward_fsmn(self, inputs, mask, mask_shfit_chunk=None):
+    def forward_fsmn(self, inputs, mask, mask_shift_chunk=None):
         b, t, d = inputs.size()
         if mask is not None:
             mask = torch.reshape(mask, (b, -1, 1))
-            if mask_shfit_chunk is not None:
-                mask = mask * mask_shfit_chunk
+            if mask_shift_chunk is not None:
+                mask = mask * mask_shift_chunk
             inputs = inputs * mask
 
         x = inputs.transpose(1, 2)
@@ -211,7 +211,7 @@ class MultiHeadedAttentionSANM(nn.Module):
 
         return self.linear_out(x)  # (batch, time1, d_model)
 
-    def forward(self, x, mask, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
+    def forward(self, x, mask, mask_shift_chunk=None, mask_att_chunk_encoder=None):
         """Compute scaled dot product attention.
 
         Args:
@@ -226,7 +226,7 @@ class MultiHeadedAttentionSANM(nn.Module):
 
         """
         q_h, k_h, v_h, v = self.forward_qkv(x)
-        fsmn_memory = self.forward_fsmn(v, mask, mask_shfit_chunk)
+        fsmn_memory = self.forward_fsmn(v, mask, mask_shift_chunk)
         q_h = q_h * self.d_k ** (-0.5)
         scores = torch.matmul(q_h, k_h.transpose(-2, -1))
         att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)
@@ -326,7 +326,7 @@ class EncoderLayerSANM(nn.Module):
         self.stochastic_depth_rate = stochastic_depth_rate
         self.dropout_rate = dropout_rate
 
-    def forward(self, x, mask, cache=None, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
+    def forward(self, x, mask, cache=None, mask_shift_chunk=None, mask_att_chunk_encoder=None):
         """Compute encoded features.
 
         Args:
@@ -363,7 +363,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     ),
                 ),
@@ -379,7 +379,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     )
                 )
@@ -388,7 +388,7 @@ class EncoderLayerSANM(nn.Module):
                     self.self_attn(
                         x,
                         mask,
-                        mask_shfit_chunk=mask_shfit_chunk,
+                        mask_shift_chunk=mask_shift_chunk,
                         mask_att_chunk_encoder=mask_att_chunk_encoder,
                     )
                 )
@@ -402,7 +402,7 @@ class EncoderLayerSANM(nn.Module):
         if not self.normalize_before:
             x = self.norm2(x)
 
-        return x, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder
+        return x, mask, cache, mask_shift_chunk, mask_att_chunk_encoder
 
     def forward_chunk(self, x, cache=None, chunk_size=None, look_back=0):
         """Compute encoded features.
@@ -469,7 +469,7 @@ class SenseVoiceEncoderSmall(nn.Module):
         positionwise_conv_kernel_size: int = 1,
         padding_idx: int = -1,
         kernel_size: int = 11,
-        sanm_shfit: int = 0,
+        sanm_shift: int = 0,
         selfattention_layer_type: str = "sanm",
         **kwargs,
     ):
@@ -494,7 +494,7 @@ class SenseVoiceEncoderSmall(nn.Module):
             output_size,
             attention_dropout_rate,
             kernel_size,
-            sanm_shfit,
+            sanm_shift,
         )
         encoder_selfattn_layer_args = (
             attention_heads,
@@ -502,7 +502,7 @@ class SenseVoiceEncoderSmall(nn.Module):
             output_size,
             attention_dropout_rate,
             kernel_size,
-            sanm_shfit,
+            sanm_shift,
         )
 
         self.encoders0 = nn.ModuleList(
diff --git a/funasr/models/sond/encoder/fsmn_encoder.py b/funasr/models/sond/encoder/fsmn_encoder.py
index 9ec9912a4..5bccf433b 100644
--- a/funasr/models/sond/encoder/fsmn_encoder.py
+++ b/funasr/models/sond/encoder/fsmn_encoder.py
@@ -36,12 +36,12 @@ class FsmnBlock(torch.nn.Module):
         right_padding = kernel_size - 1 - left_padding
         self.pad_fn = nn.ConstantPad1d((left_padding, right_padding), 0.0)
 
-    def forward(self, inputs, mask, mask_shfit_chunk=None):
+    def forward(self, inputs, mask, mask_shift_chunk=None):
         b, t, d = inputs.size()
         if mask is not None:
             mask = torch.reshape(mask, (b, -1, 1))
-            if mask_shfit_chunk is not None:
-                mask = mask * mask_shfit_chunk
+            if mask_shift_chunk is not None:
+                mask = mask * mask_shift_chunk
 
         inputs = inputs * mask
         x = inputs.transpose(1, 2)
diff --git a/funasr/models/uniasr/model.py b/funasr/models/uniasr/model.py
index bde637777..002dcdda2 100644
--- a/funasr/models/uniasr/model.py
+++ b/funasr/models/uniasr/model.py
@@ -521,10 +521,10 @@ class UniASR(torch.nn.Module):
             mask_chunk_predictor = self.encoder.overlap_chunk_cls.get_mask_chunk_predictor(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            mask_shfit_chunk = self.encoder.overlap_chunk_cls.get_mask_shfit_chunk(
+            mask_shift_chunk = self.encoder.overlap_chunk_cls.get_mask_shift_chunk(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            encoder_out = encoder_out * mask_shfit_chunk
+            encoder_out = encoder_out * mask_shift_chunk
         pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor(
             encoder_out,
             ys_out_pad,
@@ -622,10 +622,10 @@ class UniASR(torch.nn.Module):
             mask_chunk_predictor = self.encoder2.overlap_chunk_cls.get_mask_chunk_predictor(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            mask_shfit_chunk = self.encoder2.overlap_chunk_cls.get_mask_shfit_chunk(
+            mask_shift_chunk = self.encoder2.overlap_chunk_cls.get_mask_shift_chunk(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            encoder_out = encoder_out * mask_shfit_chunk
+            encoder_out = encoder_out * mask_shift_chunk
         pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor2(
             encoder_out,
             ys_out_pad,
@@ -724,10 +724,10 @@ class UniASR(torch.nn.Module):
             mask_chunk_predictor = self.encoder.overlap_chunk_cls.get_mask_chunk_predictor(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            mask_shfit_chunk = self.encoder.overlap_chunk_cls.get_mask_shfit_chunk(
+            mask_shift_chunk = self.encoder.overlap_chunk_cls.get_mask_shift_chunk(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            encoder_out = encoder_out * mask_shfit_chunk
+            encoder_out = encoder_out * mask_shift_chunk
         pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor(
             encoder_out,
             ys_out_pad,
@@ -806,10 +806,10 @@ class UniASR(torch.nn.Module):
             mask_chunk_predictor = self.encoder2.overlap_chunk_cls.get_mask_chunk_predictor(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            mask_shfit_chunk = self.encoder2.overlap_chunk_cls.get_mask_shfit_chunk(
+            mask_shift_chunk = self.encoder2.overlap_chunk_cls.get_mask_shift_chunk(
                 None, device=encoder_out.device, batch_size=encoder_out.size(0)
             )
-            encoder_out = encoder_out * mask_shfit_chunk
+            encoder_out = encoder_out * mask_shift_chunk
         pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor2(
             encoder_out,
             ys_out_pad,
diff --git a/funasr/models/uniasr/template.yaml b/funasr/models/uniasr/template.yaml
index 43d55fc26..c4a4deb4e 100644
--- a/funasr/models/uniasr/template.yaml
+++ b/funasr/models/uniasr/template.yaml
@@ -33,7 +33,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
     chunk_size:
     - 20
@@ -89,7 +89,7 @@ encoder2_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 21
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
     chunk_size:
     - 45
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/DecoderConfEntity.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/DecoderConfEntity.cs
index d5c00573b..97b1d906b 100644
--- a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/DecoderConfEntity.cs
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/DecoderConfEntity.cs
@@ -19,7 +19,7 @@ namespace AliParaformerAsr.Model
         private float _src_attention_dropout_rate = 0.1F;
         private int _att_layer_num = 16;
         private int _kernel_size = 11;
-        private int _sanm_shfit = 0;
+        private int _sanm_shift = 0;
 
         public int attention_heads { get => _attention_heads; set => _attention_heads = value; }
         public int linear_units { get => _linear_units; set => _linear_units = value; }
@@ -30,7 +30,7 @@ namespace AliParaformerAsr.Model
         public float src_attention_dropout_rate { get => _src_attention_dropout_rate; set => _src_attention_dropout_rate = value; }
         public int att_layer_num { get => _att_layer_num; set => _att_layer_num = value; }
         public int kernel_size { get => _kernel_size; set => _kernel_size = value; }
-        public int sanm_shfit { get => _sanm_shfit; set => _sanm_shfit = value; }
-        
+        public int sanm_shift { get => _sanm_shift; set => _sanm_shift = value; }
+
     }
 }
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/EncoderConfEntity.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/EncoderConfEntity.cs
index ffe505e94..5d01266da 100644
--- a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/EncoderConfEntity.cs
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/EncoderConfEntity.cs
@@ -21,7 +21,7 @@ namespace AliParaformerAsr.Model
         private string _pos_enc_class = "SinusoidalPositionEncoder";
         private bool _normalize_before = true;
         private int _kernel_size = 11;
-        private int _sanm_shfit = 0;
+        private int _sanm_shift = 0;
         private string _selfattention_layer_type = "sanm";
 
         public int output_size { get => _output_size; set => _output_size = value; }
@@ -35,7 +35,7 @@ namespace AliParaformerAsr.Model
         public string pos_enc_class { get => _pos_enc_class; set => _pos_enc_class = value; }
         public bool normalize_before { get => _normalize_before; set => _normalize_before = value; }
         public int kernel_size { get => _kernel_size; set => _kernel_size = value; }
-        public int sanm_shfit { get => _sanm_shfit; set => _sanm_shfit = value; }
+        public int sanm_shift { get => _sanm_shift; set => _sanm_shift = value; }
         public string selfattention_layer_type { get => _selfattention_layer_type; set => _selfattention_layer_type = value; }
     }
 }
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.yaml b/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.yaml
index 9b2266f06..cf5750441 100755
--- a/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.yaml
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.yaml
@@ -8593,7 +8593,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm
     chunk_size:
     - 12
@@ -8623,7 +8623,7 @@ decoder_conf:
     src_attention_dropout_rate: 0.1
     att_layer_num: 16
     kernel_size: 11
-    sanm_shfit: 5
+    sanm_shift: 5
 predictor: cif_predictor_v2
 predictor_conf:
     idim: 512
diff --git a/runtime/triton_gpu/model_repo_sense_voice_small/feature_extractor/config.yaml b/runtime/triton_gpu/model_repo_sense_voice_small/feature_extractor/config.yaml
index 26bb9d3d2..a66f1ca45 100644
--- a/runtime/triton_gpu/model_repo_sense_voice_small/feature_extractor/config.yaml
+++ b/runtime/triton_gpu/model_repo_sense_voice_small/feature_extractor/config.yaml
@@ -12,7 +12,7 @@ encoder_conf:
     pos_enc_class: SinusoidalPositionEncoder
     normalize_before: true
     kernel_size: 11
-    sanm_shfit: 0
+    sanm_shift: 0
     selfattention_layer_type: sanm