fix task.py with no dest_sample_rate task; fix bug in train and infer

This commit is contained in:
mengzhe.cmz 2023-04-12 19:25:29 +08:00
parent 60d38fa9ca
commit 1ad439f96b
6 changed files with 9 additions and 50 deletions

View File

@ -1,44 +0,0 @@
#!/usr/bin/env python3
import os
from funasr.tasks.punctuation import PunctuationTask
def parse_args():
parser = PunctuationTask.get_parser()
parser.add_argument(
"--gpu_id",
type=int,
default=0,
help="local gpu id.",
)
parser.add_argument(
"--punc_list",
type=str,
default=None,
help="Punctuation list",
)
args = parser.parse_args()
return args
def main(args=None, cmd=None):
"""
punc training.
"""
PunctuationTask.main(args=args, cmd=cmd)
if __name__ == "__main__":
args = parse_args()
# setup local gpu_id
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
# DDP settings
if args.ngpu > 1:
args.distributed = True
else:
args.distributed = False
assert args.num_worker_count == 1
main(args=args)

View File

@ -90,7 +90,7 @@ class Text2Punc:
data = {
"text": torch.unsqueeze(torch.from_numpy(mini_sentence_id), 0),
"text_lengths": torch.from_numpy(np.array([len(mini_sentence_id)], dtype='int32')),
"vad_indexes": torch.from_numpy(np.array([len(cache)-1], dtype='int32')),
"vad_indexes": torch.from_numpy(np.array([len(cache)], dtype='int32')),
}
data = to_device(data, self.device)
y, _ = self.wrapped_model(**data)

View File

@ -47,8 +47,8 @@ def tokenize(data,
length = len(text)
for i in range(length):
x = text[i]
if i == length-1 and "punc" in data and text[i].startswith("vad:"):
vad = x[-1][4:]
if i == length-1 and "punc" in data and x.startswith("vad:"):
vad = x[4:]
if len(vad) == 0:
vad = -1
else:

View File

@ -786,6 +786,7 @@ class PuncTrainTokenizerCommonPreprocessor(CommonPreprocessor):
) -> Dict[str, np.ndarray]:
for i in range(self.num_tokenizer):
text_name = self.text_name[i]
#import pdb; pdb.set_trace()
if text_name in data and self.tokenizer[i] is not None:
text = data[text_name]
text = self.text_cleaner(text)
@ -800,7 +801,7 @@ class PuncTrainTokenizerCommonPreprocessor(CommonPreprocessor):
data[self.vad_name] = np.array([vad], dtype=np.int64)
text_ints = self.token_id_converter[i].tokens2ids(tokens)
data[text_name] = np.array(text_ints, dtype=np.int64)
return data
def split_to_mini_sentence(words: list, word_limit: int = 20):
assert word_limit > 1
@ -813,4 +814,4 @@ def split_to_mini_sentence(words: list, word_limit: int = 20):
sentences.append(words[i * word_limit:(i + 1) * word_limit])
if length % word_limit > 0:
sentences.append(words[sentence_len * word_limit:])
return sentences
return sentences

View File

@ -159,7 +159,7 @@ class CT_Transformer_VadRealtime(CT_Transformer):
data = {
"input": mini_sentence_id[None,:],
"text_lengths": np.array([text_length], dtype='int32'),
"vad_mask": self.vad_mask(text_length, len(cache) - 1)[None, None, :, :].astype(np.float32),
"vad_mask": self.vad_mask(text_length, len(cache))[None, None, :, :].astype(np.float32),
"sub_masks": np.tril(np.ones((text_length, text_length), dtype=np.float32))[None, None, :, :].astype(np.float32)
}
try:

View File

@ -1587,6 +1587,8 @@ class AbsTask(ABC):
dest_sample_rate = args.frontend_conf["fs"]
else:
dest_sample_rate = 16000
else:
dest_sample_rate = 16000
dataset = ESPnetDataset(
iter_options.data_path_and_name_and_type,