FunASR/funasr/datasets/large_datasets/utils/hotword_utils.py
zhifu gao 861147c730
Dev gzf exp (#1654)
* sensevoice finetune

* sensevoice finetune

* sensevoice finetune

* sensevoice finetune

* sensevoice finetune

* sensevoice finetune

* sensevoice finetune

* sensevoice finetune

* sensevoice finetune

* sensevoice finetune

* bugfix

* update with main (#1631)

* update seaco finetune

* v1.0.24

---------

Co-authored-by: 维石 <shixian.shi@alibaba-inc.com>

* sensevoice

* sensevoice

* sensevoice

* update with main (#1638)

* update seaco finetune

* v1.0.24

* update rwkv template

---------

Co-authored-by: 维石 <shixian.shi@alibaba-inc.com>

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sensevoice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* sense voice

* whisper

* whisper

* update style

* update style

---------

Co-authored-by: 维石 <shixian.shi@alibaba-inc.com>
2024-04-24 16:03:38 +08:00

43 lines
1.5 KiB
Python

import random
def sample_hotword(
length,
hotword_min_length,
hotword_max_length,
sample_rate,
double_rate,
pre_prob,
pre_index=None,
pre_hwlist=None,
):
if length < hotword_min_length:
return [-1]
if random.random() < sample_rate:
if pre_prob > 0 and random.random() < pre_prob and pre_index is not None:
return pre_index
if length == hotword_min_length:
return [0, length - 1]
elif random.random() < double_rate and length > hotword_max_length + hotword_min_length + 2:
# sample two hotwords in a sentence
_max_hw_length = min(hotword_max_length, length // 2)
# first hotword
start1 = random.randint(0, length // 3)
end1 = random.randint(start1 + hotword_min_length - 1, start1 + _max_hw_length - 1)
# second hotword
start2 = random.randint(end1 + 1, length - hotword_min_length)
end2 = random.randint(
min(length - 1, start2 + hotword_min_length - 1),
min(length - 1, start2 + hotword_max_length - 1),
)
return [start1, end1, start2, end2]
else: # single hotword
start = random.randint(0, length - hotword_min_length)
end = random.randint(
min(length - 1, start + hotword_min_length - 1),
min(length - 1, start + hotword_max_length - 1),
)
return [start, end]
else:
return [-1]