Compare commits

...

16 Commits

Author SHA1 Message Date
wuhongsheng
e06a737112
Merge bd4340fdfc into 4402e95b0f 2025-08-15 18:19:53 +00:00
游雁
4402e95b0f v1.2.7 2025-08-15 15:22:18 +08:00
游雁
f5051c55cd trust_remote_code 2025-08-15 15:10:37 +08:00
wuhongsheng
bd4340fdfc
Merge branch 'modelscope:main' into main 2024-08-27 09:12:35 +08:00
wuhongsheng
dc06a80dbc fix:情绪识别后处理bug 2024-08-19 09:27:34 +08:00
wuhongsheng
c529ac9b45
Merge branch 'modelscope:main' into main 2024-08-15 10:19:57 +08:00
wuhongsheng
9b423d3d6a 增加emotion2vec模型的支持 2024-08-12 11:24:47 +08:00
wuhongsheng
3ad0599437 优化情绪识别后处理 2024-08-09 13:43:59 +08:00
wuhongsheng
b407b4c345 说话人日志pipline增加情绪识别 2024-08-07 16:05:21 +08:00
wuhongsheng
2add00c614
Merge branch 'modelscope:main' into main 2024-08-05 09:01:31 +08:00
wuhongsheng
be015ec75d 优化speakid和语句匹配逻辑,部分解决speakid不从0递增问题 2024-07-03 11:42:01 +08:00
wuhongsheng
0528806aa7 Revert "优化speakid和语句匹配逻辑,部分解决speakid不从0递增问题"
This reverts commit b31592acd7.
2024-07-03 11:41:17 +08:00
wuhongsheng
ebb0940f2b
Merge branch 'modelscope:main' into main 2024-07-03 11:34:19 +08:00
wuhongsheng
b31592acd7 优化speakid和语句匹配逻辑,部分解决speakid不从0递增问题 2024-07-03 11:33:21 +08:00
wuhongsheng
b1836414b5 修复断句之间时间戳bug 2024-06-28 15:52:52 +08:00
wuhongsheng
fa74a6e26c 优化merge segments 参数,解决新闻联播男女主持人“晚上好”合并一个speakid问题 2024-06-28 15:03:39 +08:00
4 changed files with 51 additions and 13 deletions

View File

@ -1,10 +0,0 @@
# method2, inference from local path
from funasr import AutoModel
model = AutoModel(
model="iic/emotion2vec_base",
hub="ms"
)
res = model.export(type="onnx", quantize=False, opset_version=13, device='cpu') # fp32 onnx-gpu
# res = model.export(type="onnx_fp16", quantize=False, opset_version=13, device='cuda') # fp16 onnx-gpu

View File

@ -108,6 +108,24 @@ def prepare_data_iterator(data_in, input_len=None, data_type=None, key=None):
return key_list, data_list
def distribute_emotion(sentence_list, ser_time_list):
ser_time_list = [(st * 1000, ed * 1000, emotion) for st, ed, emotion in ser_time_list]
for d in sentence_list:
sentence_start = d['start']
sentence_end = d['end']
sentence_emotion = "EMO_UNKNOWN"
max_overlap = 0
for st, ed, emotion in ser_time_list:
overlap = max(min(sentence_end, ed) - max(sentence_start, st), 0)
if overlap > max_overlap:
max_overlap = overlap
sentence_emotion = emotion
if overlap > 0 and sentence_emotion == emotion:
max_overlap += overlap
d['emotion'] = sentence_emotion
return sentence_list
class AutoModel:
def __init__(self, **kwargs):
@ -161,7 +179,11 @@ class AutoModel:
if spk_mode not in ["default", "vad_segment", "punc_segment"]:
logging.error("spk_mode should be one of default, vad_segment and punc_segment.")
self.spk_mode = spk_mode
ser_model = kwargs.get("ser_model", None)
ser_kwargs = {} if kwargs.get("ser_kwargs", {}) is None else kwargs.get("ser_kwargs", {})
if ser_model is not None:
logging.info("Building SER model.")
ser_model, ser_kwargs = self.build_model(**ser_kwargs)
self.kwargs = kwargs
self.model = model
self.vad_model = vad_model
@ -170,6 +192,8 @@ class AutoModel:
self.punc_kwargs = punc_kwargs
self.spk_model = spk_model
self.spk_kwargs = spk_kwargs
self.ser_model = ser_model
self.ser_kwargs = ser_kwargs
self.model_path = kwargs.get("model_path")
@staticmethod
@ -502,6 +526,16 @@ class AutoModel:
speech_b, input_len=None, model=self.spk_model, kwargs=kwargs, **cfg
)
results[_b]["spk_embedding"] = spk_res[0]["spk_embedding"]
if self.ser_model is not None:
ser_res = self.inference(speech_b, input_len=None, model=self.ser_model,
kwargs=self.ser_kwargs, **cfg)
if "SenseVoiceSmall" in kwargs.get("ser_model", None):
results[_b]["ser_type"] = [i['text'].split("|><|")[1] for i in ser_res]
elif "emotion2vec" in kwargs.get("ser_model", None):
results[_b]["ser_type"] = [i['labels'][i["scores"].index(max(i["scores"]))] for i in ser_res]
beg_idx = end_idx
end_idx += 1
max_len_in_batch = sample_length
@ -594,6 +628,7 @@ class AutoModel:
"end": vadsegment[1],
"sentence": rest["text"],
"timestamp": rest["timestamp"],
"emotion": rest["ser_type"],
}
)
elif self.spk_mode == "punc_segment":
@ -617,6 +652,13 @@ class AutoModel:
raw_text,
return_raw_text=return_raw_text,
)
if "ser_type" in result:
if len(sentence_list) == len(result["ser_type"]):
for i in range(len(sentence_list)):
sentence_list[i]["emotion"] = result["ser_type"][i]
else:
merged_list = [[x[0], x[1], y] for x, y in zip(all_segments, result["ser_type"])]
distribute_emotion(sentence_list, merged_list)
distribute_spk(sentence_list, sv_output)
result["sentence_info"] = sentence_list
elif kwargs.get("sentence_timestamp", False):
@ -640,6 +682,8 @@ class AutoModel:
result["sentence_info"] = sentence_list
if "spk_embedding" in result:
del result["spk_embedding"]
if "ser_type" in result:
del result["ser_type"]
result["key"] = key
results_ret_list.append(result)

View File

@ -1,3 +1,4 @@
import logging
import os
import json
from omegaconf import OmegaConf, DictConfig
@ -79,7 +80,10 @@ def download_from_ms(**kwargs):
kwargs["jieba_usr_dict"] = os.path.join(model_or_path, "jieba_usr_dict")
if isinstance(kwargs, DictConfig):
kwargs = OmegaConf.to_container(kwargs, resolve=True)
if os.path.exists(os.path.join(model_or_path, "requirements.txt")):
logging.warning(f'trust_remote_code: {kwargs.get("trust_remote_code", False)}')
if os.path.exists(os.path.join(model_or_path, "requirements.txt")) and kwargs.get(
"trust_remote_code", False
):
requirements = os.path.join(model_or_path, "requirements.txt")
print(f"Detect model requirements, begin to install it: {requirements}")
from funasr.utils.install_model_requirements import install_requirements

View File

@ -1 +1 @@
1.2.6
1.2.7