diff --git a/funasr/models/monotonic_aligner/model.py b/funasr/models/monotonic_aligner/model.py index ece319dea..cc03ee46a 100644 --- a/funasr/models/monotonic_aligner/model.py +++ b/funasr/models/monotonic_aligner/model.py @@ -4,11 +4,13 @@ import torch from torch.cuda.amp import autocast from typing import Union, Dict, List, Tuple, Optional +from funasr.register import tables +from funasr.utils import postprocess_utils +from funasr.utils.datadir_writer import DatadirWriter from funasr.models.paraformer.cif_predictor import mae_loss -from funasr.models.transformer.utils.add_sos_eos import add_sos_eos -from funasr.models.transformer.utils.nets_utils import make_pad_mask, pad_list -from funasr.metrics.compute_acc import th_accuracy from funasr.train_utils.device_funcs import force_gatherable +from funasr.models.transformer.utils.add_sos_eos import add_sos_eos +from funasr.models.transformer.utils.nets_utils import make_pad_mask from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard from funasr.utils import postprocess_utils from funasr.utils.datadir_writer import DatadirWriter @@ -25,7 +27,6 @@ class MonotonicAligner(torch.nn.Module): Achieving timestamp prediction while recognizing with non-autoregressive end-to-end ASR model https://arxiv.org/abs/2301.12343 """ - def __init__( self, input_size: int = 80, @@ -41,7 +42,6 @@ class MonotonicAligner(torch.nn.Module): length_normalized_loss: bool = False, **kwargs, ): - super().__init__() if specaug is not None: @@ -155,7 +155,6 @@ class MonotonicAligner(torch.nn.Module): frontend=None, **kwargs, ): - meta_data = {} # extract fbank feats time1 = time.perf_counter() @@ -190,8 +189,7 @@ class MonotonicAligner(torch.nn.Module): timestamp_str, timestamp = ts_prediction_lfr6_standard(us_alpha[:encoder_out_lens[i] * 3], us_peak[:encoder_out_lens[i] * 3], copy.copy(token)) - text_postprocessed, time_stamp_postprocessed, word_lists = postprocess_utils.sentence_postprocess( - token, timestamp) + text_postprocessed, time_stamp_postprocessed, _ = postprocess_utils.sentence_postprocess(token, timestamp) result_i = {"key": key[i], "text": text_postprocessed, "timestamp": time_stamp_postprocessed, }