From 435a5906e538de4c975c7847acfd99772881e3f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=87=8C=E5=8C=80?= Date: Wed, 12 Apr 2023 17:43:29 +0800 Subject: [PATCH] support onnxruntime of streaming vad & bug fix --- .../python/onnxruntime/demo_vad_offline.py | 11 ++ .../{demo_vad.py => demo_vad_online.py} | 18 +- .../onnxruntime/funasr_onnx/utils/e2e_vad.py | 35 ++-- .../onnxruntime/funasr_onnx/utils/frontend.py | 184 +++++++++++++++++- .../python/onnxruntime/funasr_onnx/vad_bin.py | 73 ++++--- .../onnxruntime/funasr_onnx/vad_online_bin.py | 134 +++++++++++++ 6 files changed, 400 insertions(+), 55 deletions(-) create mode 100644 funasr/runtime/python/onnxruntime/demo_vad_offline.py rename funasr/runtime/python/onnxruntime/{demo_vad.py => demo_vad_online.py} (60%) create mode 100644 funasr/runtime/python/onnxruntime/funasr_onnx/vad_online_bin.py diff --git a/funasr/runtime/python/onnxruntime/demo_vad_offline.py b/funasr/runtime/python/onnxruntime/demo_vad_offline.py new file mode 100644 index 000000000..69ca94543 --- /dev/null +++ b/funasr/runtime/python/onnxruntime/demo_vad_offline.py @@ -0,0 +1,11 @@ +import soundfile +from funasr_onnx.vad_bin import Fsmn_vad + + +model_dir = "/mnt/ailsa.zly/tfbase/espnet_work/FunASR_dev_zly/export/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch" +wav_path = "/mnt/ailsa.zly/tfbase/espnet_work/FunASR_dev_zly/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common/vad_example_16k.wav" +model = Fsmn_vad(model_dir) + +#offline vad +result = model(wav_path) +print(result) diff --git a/funasr/runtime/python/onnxruntime/demo_vad.py b/funasr/runtime/python/onnxruntime/demo_vad_online.py similarity index 60% rename from funasr/runtime/python/onnxruntime/demo_vad.py rename to funasr/runtime/python/onnxruntime/demo_vad_online.py index 2e171978c..15e62da2b 100644 --- a/funasr/runtime/python/onnxruntime/demo_vad.py +++ b/funasr/runtime/python/onnxruntime/demo_vad_online.py @@ -1,21 +1,18 @@ import soundfile -from funasr_onnx import Fsmn_vad +from funasr_onnx.vad_online_bin import Fsmn_vad -model_dir = "/Users/zhifu/Downloads/speech_fsmn_vad_zh-cn-16k-common-pytorch" -wav_path = "/Users/zhifu/Downloads/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav" +model_dir = "/mnt/ailsa.zly/tfbase/espnet_work/FunASR_dev_zly/export/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch" +wav_path = "/mnt/ailsa.zly/tfbase/espnet_work/FunASR_dev_zly/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common/vad_example_16k.wav" model = Fsmn_vad(model_dir) -#offline vad -# result = model(wav_path) -# print(result) -#online vad +##online vad speech, sample_rate = soundfile.read(wav_path) speech_length = speech.shape[0] - +# sample_offset = 0 -step = 160 * 10 +step = 1600 param_dict = {'in_cache': []} for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)): if sample_offset + step >= speech_length - 1: @@ -26,5 +23,6 @@ for sample_offset in range(0, speech_length, min(step, speech_length - sample_of param_dict['is_final'] = is_final segments_result = model(audio_in=speech[sample_offset: sample_offset + step], param_dict=param_dict) - print(segments_result) + if segments_result: + print(segments_result) diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/e2e_vad.py b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/e2e_vad.py index 3f6c3d1f6..f5407652e 100644 --- a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/e2e_vad.py +++ b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/e2e_vad.py @@ -439,10 +439,9 @@ class E2EVadModel(): - 1)) / self.vad_opts.noise_frame_num_used_for_snr return frame_state - def __call__(self, score: np.ndarray, waveform: np.ndarray, - is_final: bool = False, max_end_sil: int = 800 + is_final: bool = False, max_end_sil: int = 800, online: bool = False ): self.max_end_sil_frame_cnt_thresh = max_end_sil - self.vad_opts.speech_to_sil_time_thres self.waveform = waveform # compute decibel for each frame @@ -457,20 +456,29 @@ class E2EVadModel(): segment_batch = [] if len(self.output_data_buf) > 0: for i in range(self.output_data_buf_offset, len(self.output_data_buf)): - if not self.output_data_buf[i].contain_seg_start_point: - continue - if not self.next_seg and not self.output_data_buf[i].contain_seg_end_point: - continue - start_ms = self.output_data_buf[i].start_ms if self.next_seg else -1 - if self.output_data_buf[i].contain_seg_end_point: - end_ms = self.output_data_buf[i].end_ms - self.next_seg = True - self.output_data_buf_offset += 1 + if online: + if not self.output_data_buf[i].contain_seg_start_point: + continue + if not self.next_seg and not self.output_data_buf[i].contain_seg_end_point: + continue + start_ms = self.output_data_buf[i].start_ms if self.next_seg else -1 + if self.output_data_buf[i].contain_seg_end_point: + end_ms = self.output_data_buf[i].end_ms + self.next_seg = True + self.output_data_buf_offset += 1 + else: + end_ms = -1 + self.next_seg = False else: - end_ms = -1 - self.next_seg = False + if not self.output_data_buf[i].contain_seg_start_point or not self.output_data_buf[ + i].contain_seg_end_point: + continue + start_ms = self.output_data_buf[i].start_ms + end_ms = self.output_data_buf[i].end_ms + self.output_data_buf_offset += 1 segment = [start_ms, end_ms] segment_batch.append(segment) + if segment_batch: segments.append(segment_batch) if is_final: @@ -605,3 +613,4 @@ class E2EVadModel(): if self.vad_state_machine == VadStateMachine.kVadInStateEndPointDetected and \ self.vad_opts.detect_mode == VadDetectMode.kVadMutipleUtteranceDetectMode.value: self.ResetDetection() + diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py index 11a86445d..c92db4e55 100644 --- a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py +++ b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py @@ -1,6 +1,7 @@ # -*- encoding: utf-8 -*- from pathlib import Path from typing import Any, Dict, Iterable, List, NamedTuple, Set, Tuple, Union +import copy import numpy as np from typeguard import check_argument_types @@ -153,6 +154,187 @@ class WavFrontend(): cmvn = np.array([means, vars]) return cmvn + +class WavFrontendOnline(WavFrontend): + def __init__(self, **kwargs): + super().__init__(**kwargs) + # self.fbank_fn = knf.OnlineFbank(self.opts) + # add variables + self.frame_sample_length = int(self.opts.frame_opts.frame_length_ms * self.opts.frame_opts.samp_freq / 1000) + self.frame_shift_sample_length = int(self.opts.frame_opts.frame_shift_ms * self.opts.frame_opts.samp_freq / 1000) + self.waveform = None + self.reserve_waveforms = None + self.input_cache = None + self.lfr_splice_cache = [] + + @staticmethod + # inputs has catted the cache + def apply_lfr(inputs: np.ndarray, lfr_m: int, lfr_n: int, is_final: bool = False) -> Tuple[ + np.ndarray, np.ndarray, int]: + """ + Apply lfr with data + """ + + LFR_inputs = [] + T = inputs.shape[0] # include the right context + T_lfr = int(np.ceil((T - (lfr_m - 1) // 2) / lfr_n)) # minus the right context: (lfr_m - 1) // 2 + splice_idx = T_lfr + for i in range(T_lfr): + if lfr_m <= T - i * lfr_n: + LFR_inputs.append((inputs[i * lfr_n:i * lfr_n + lfr_m]).reshape(1, -1)) + else: # process last LFR frame + if is_final: + num_padding = lfr_m - (T - i * lfr_n) + frame = (inputs[i * lfr_n:]).reshape(-1) + for _ in range(num_padding): + frame = np.hstack((frame, inputs[-1])) + LFR_inputs.append(frame) + else: + # update splice_idx and break the circle + splice_idx = i + break + splice_idx = min(T - 1, splice_idx * lfr_n) + lfr_splice_cache = inputs[splice_idx:, :] + LFR_outputs = np.vstack(LFR_inputs) + return LFR_outputs.astype(np.float32), lfr_splice_cache, splice_idx + + @staticmethod + def compute_frame_num(sample_length: int, frame_sample_length: int, frame_shift_sample_length: int) -> int: + frame_num = int((sample_length - frame_sample_length) / frame_shift_sample_length + 1) + return frame_num if frame_num >= 1 and sample_length >= frame_sample_length else 0 + + + def fbank( + self, + input: np.ndarray, + input_lengths: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + self.fbank_fn = knf.OnlineFbank(self.opts) + batch_size = input.shape[0] + if self.input_cache is None: + self.input_cache = np.empty((batch_size, 0), dtype=np.float32) + input = np.concatenate((self.input_cache, input), axis=1) + frame_num = self.compute_frame_num(input.shape[-1], self.frame_sample_length, self.frame_shift_sample_length) + # update self.in_cache + self.input_cache = input[:, -(input.shape[-1] - frame_num * self.frame_shift_sample_length):] + waveforms = np.empty(0, dtype=np.int16) + feats_pad = np.empty(0, dtype=np.float32) + feats_lens = np.empty(0, dtype=np.int32) + if frame_num: + waveforms = [] + feats = [] + feats_lens = [] + for i in range(batch_size): + waveform = input[i] + waveforms.append( + waveform[:((frame_num - 1) * self.frame_shift_sample_length + self.frame_sample_length)]) + waveform = waveform * (1 << 15) + + self.fbank_fn.accept_waveform(self.opts.frame_opts.samp_freq, waveform.tolist()) + frames = self.fbank_fn.num_frames_ready + mat = np.empty([frames, self.opts.mel_opts.num_bins]) + for i in range(frames): + mat[i, :] = self.fbank_fn.get_frame(i) + feat = mat.astype(np.float32) + feat_len = np.array(mat.shape[0]).astype(np.int32) + feats.append(mat) + feats_lens.append(feat_len) + + waveforms = np.stack(waveforms) + feats_lens = np.array(feats_lens) + feats_pad = np.array(feats) + self.fbanks = feats_pad + self.fbanks_lens = copy.deepcopy(feats_lens) + return waveforms, feats_pad, feats_lens + + def get_fbank(self) -> Tuple[np.ndarray, np.ndarray]: + return self.fbanks, self.fbanks_lens + + def lfr_cmvn( + self, + input: np.ndarray, + input_lengths: np.ndarray, + is_final: bool = False + ) -> Tuple[np.ndarray, np.ndarray, List[int]]: + batch_size = input.shape[0] + feats = [] + feats_lens = [] + lfr_splice_frame_idxs = [] + for i in range(batch_size): + mat = input[i, :input_lengths[i], :] + lfr_splice_frame_idx = -1 + if self.lfr_m != 1 or self.lfr_n != 1: + # update self.lfr_splice_cache in self.apply_lfr + mat, self.lfr_splice_cache[i], lfr_splice_frame_idx = self.apply_lfr(mat, self.lfr_m, self.lfr_n, + is_final) + if self.cmvn_file is not None: + mat = self.apply_cmvn(mat) + feat_length = mat.shape[0] + feats.append(mat) + feats_lens.append(feat_length) + lfr_splice_frame_idxs.append(lfr_splice_frame_idx) + + feats_lens = np.array(feats_lens) + feats_pad = np.array(feats) + return feats_pad, feats_lens, lfr_splice_frame_idxs + + + def extract_fbank( + self, input: np.ndarray, input_lengths: np.ndarray, is_final: bool = False + ) -> Tuple[np.ndarray, np.ndarray]: + batch_size = input.shape[0] + assert batch_size == 1, 'we support to extract feature online only when the batch size is equal to 1 now' + waveforms, feats, feats_lengths = self.fbank(input, input_lengths) # input shape: B T D + if feats.shape[0]: + self.waveforms = waveforms if self.reserve_waveforms is None else np.concatenate( + (self.reserve_waveforms, waveforms), axis=1) + if not self.lfr_splice_cache: + for i in range(batch_size): + self.lfr_splice_cache.append(np.expand_dims(feats[i][0, :], axis=0).repeat((self.lfr_m - 1) // 2, axis=0)) + + if feats_lengths[0] + self.lfr_splice_cache[0].shape[0] >= self.lfr_m: + lfr_splice_cache_np = np.stack(self.lfr_splice_cache) # B T D + feats = np.concatenate((lfr_splice_cache_np, feats), axis=1) + feats_lengths += lfr_splice_cache_np[0].shape[0] + frame_from_waveforms = int( + (self.waveforms.shape[1] - self.frame_sample_length) / self.frame_shift_sample_length + 1) + minus_frame = (self.lfr_m - 1) // 2 if self.reserve_waveforms is None else 0 + feats, feats_lengths, lfr_splice_frame_idxs = self.lfr_cmvn(feats, feats_lengths, is_final) + if self.lfr_m == 1: + self.reserve_waveforms = None + else: + reserve_frame_idx = lfr_splice_frame_idxs[0] - minus_frame + # print('reserve_frame_idx: ' + str(reserve_frame_idx)) + # print('frame_frame: ' + str(frame_from_waveforms)) + self.reserve_waveforms = self.waveforms[:, reserve_frame_idx * self.frame_shift_sample_length:frame_from_waveforms * self.frame_shift_sample_length] + sample_length = (frame_from_waveforms - 1) * self.frame_shift_sample_length + self.frame_sample_length + self.waveforms = self.waveforms[:, :sample_length] + else: + # update self.reserve_waveforms and self.lfr_splice_cache + self.reserve_waveforms = self.waveforms[:, + :-(self.frame_sample_length - self.frame_shift_sample_length)] + for i in range(batch_size): + self.lfr_splice_cache[i] = np.concatenate((self.lfr_splice_cache[i], feats[i]), axis=0) + return np.empty(0, dtype=np.float32), feats_lengths + else: + if is_final: + self.waveforms = waveforms if self.reserve_waveforms is None else self.reserve_waveforms + feats = np.stack(self.lfr_splice_cache) + feats_lengths = np.zeros(batch_size, dtype=np.int32) + feats.shape[1] + feats, feats_lengths, _ = self.lfr_cmvn(feats, feats_lengths, is_final) + if is_final: + self.cache_reset() + return feats, feats_lengths + + def get_waveforms(self): + return self.waveforms + + def cache_reset(self): + self.fbank_fn = knf.OnlineFbank(self.opts) + self.reserve_waveforms = None + self.input_cache = None + self.lfr_splice_cache = [] + def load_bytes(input): middle_data = np.frombuffer(input, dtype=np.int16) middle_data = np.asarray(middle_data) @@ -188,4 +370,4 @@ def test(): return feat, feat_len if __name__ == '__main__': - test() \ No newline at end of file + test() diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py b/funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py index 221867dbf..5ad426687 100644 --- a/funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py +++ b/funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py @@ -59,37 +59,48 @@ class Fsmn_vad(): def __call__(self, audio_in: Union[str, np.ndarray, List[str]], **kwargs) -> List: - # waveform_list = self.load_data(audio_in, self.frontend.opts.frame_opts.samp_freq) - - param_dict = kwargs.get('param_dict', dict()) - is_final = param_dict.get('is_final', False) - audio_in_cache = param_dict.get('audio_in_cache', None) - audio_in_cum = audio_in - if audio_in_cache is not None: - audio_in_cum = np.concatenate((audio_in_cache, audio_in_cum)) - param_dict['audio_in_cache'] = audio_in_cum - feats, feats_len = self.extract_feat([audio_in_cum]) - - in_cache = param_dict.get('in_cache', list()) - in_cache = self.prepare_cache(in_cache) - beg_idx = param_dict.get('beg_idx',0) - feats = feats[:, beg_idx:beg_idx+8, :] - param_dict['beg_idx'] = beg_idx + feats.shape[1] - try: - inputs = [feats] - inputs.extend(in_cache) - scores, out_caches = self.infer(inputs) - param_dict['in_cache'] = out_caches - segments = self.vad_scorer(scores, audio_in[None, :], is_final=is_final, max_end_sil=self.max_end_sil) - # print(segments) - if len(segments) == 1 and segments[0][0][1] != -1: - self.frontend.reset_status() + waveform_list = self.load_data(audio_in, self.frontend.opts.frame_opts.samp_freq) + waveform_nums = len(waveform_list) + is_final = kwargs.get('kwargs', False) + + segments = [[]] * self.batch_size + for beg_idx in range(0, waveform_nums, self.batch_size): - - except ONNXRuntimeError: - logging.warning(traceback.format_exc()) - logging.warning("input wav is silence or noise") - segments = [] + end_idx = min(waveform_nums, beg_idx + self.batch_size) + waveform = waveform_list[beg_idx:end_idx] + feats, feats_len = self.extract_feat(waveform) + waveform = np.array(waveform) + param_dict = kwargs.get('param_dict', dict()) + in_cache = param_dict.get('in_cache', list()) + in_cache = self.prepare_cache(in_cache) + try: + t_offset = 0 + step = int(min(feats_len.max(), 6000)) + for t_offset in range(0, int(feats_len), min(step, feats_len - t_offset)): + if t_offset + step >= feats_len - 1: + step = feats_len - t_offset + is_final = True + else: + is_final = False + feats_package = feats[:, t_offset:int(t_offset + step), :] + waveform_package = waveform[:, t_offset * 160:min(waveform.shape[-1], (int(t_offset + step) - 1) * 160 + 400)] + + inputs = [feats_package] + # inputs = [feats] + inputs.extend(in_cache) + scores, out_caches = self.infer(inputs) + in_cache = out_caches + segments_part = self.vad_scorer(scores, waveform_package, is_final=is_final, max_end_sil=self.max_end_sil, online=False) + # segments = self.vad_scorer(scores, waveform[0][None, :], is_final=is_final, max_end_sil=self.max_end_sil) + + if segments_part: + for batch_num in range(0, self.batch_size): + segments[batch_num] += segments_part[batch_num] + + except ONNXRuntimeError: + # logging.warning(traceback.format_exc()) + logging.warning("input wav is silence or noise") + segments = '' return segments @@ -140,4 +151,4 @@ class Fsmn_vad(): outputs = self.ort_infer(feats) scores, out_caches = outputs[0], outputs[1:] return scores, out_caches - \ No newline at end of file + diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/vad_online_bin.py b/funasr/runtime/python/onnxruntime/funasr_onnx/vad_online_bin.py new file mode 100644 index 000000000..83e9420e6 --- /dev/null +++ b/funasr/runtime/python/onnxruntime/funasr_onnx/vad_online_bin.py @@ -0,0 +1,134 @@ +# -*- encoding: utf-8 -*- + +import os.path +from pathlib import Path +from typing import List, Union, Tuple + +import copy +import librosa +import numpy as np + +from .utils.utils import (ONNXRuntimeError, + OrtInferSession, get_logger, + read_yaml) +from .utils.frontend import WavFrontendOnline +from .utils.e2e_vad import E2EVadModel + +logging = get_logger() + + +class Fsmn_vad(): + def __init__(self, model_dir: Union[str, Path] = None, + batch_size: int = 1, + device_id: Union[str, int] = "-1", + quantize: bool = False, + intra_op_num_threads: int = 4, + max_end_sil: int = None, + ): + + if not Path(model_dir).exists(): + raise FileNotFoundError(f'{model_dir} does not exist.') + + model_file = os.path.join(model_dir, 'model.onnx') + if quantize: + model_file = os.path.join(model_dir, 'model_quant.onnx') + config_file = os.path.join(model_dir, 'vad.yaml') + cmvn_file = os.path.join(model_dir, 'vad.mvn') + config = read_yaml(config_file) + + self.frontend = WavFrontendOnline( + cmvn_file=cmvn_file, + **config['frontend_conf'] + ) + self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads) + self.batch_size = batch_size + self.vad_scorer = E2EVadModel(config["vad_post_conf"]) + self.max_end_sil = max_end_sil if max_end_sil is not None else config["vad_post_conf"]["max_end_silence_time"] + self.encoder_conf = config["encoder_conf"] + + def prepare_cache(self, in_cache: list = []): + if len(in_cache) > 0: + return in_cache + fsmn_layers = self.encoder_conf["fsmn_layers"] + proj_dim = self.encoder_conf["proj_dim"] + lorder = self.encoder_conf["lorder"] + for i in range(fsmn_layers): + cache = np.zeros((1, proj_dim, lorder-1, 1)).astype(np.float32) + in_cache.append(cache) + return in_cache + + + def __call__(self, audio_in: np.ndarray, **kwargs) -> List: + waveforms = np.expand_dims(audio_in, axis=0) + + param_dict = kwargs.get('param_dict', dict()) + is_final = param_dict.get('is_final', False) + feats, feats_len = self.extract_feat(waveforms, is_final) + segments = [] + if feats.size != 0: + in_cache = param_dict.get('in_cache', list()) + in_cache = self.prepare_cache(in_cache) + try: + inputs = [feats] + inputs.extend(in_cache) + scores, out_caches = self.infer(inputs) + param_dict['in_cache'] = out_caches + waveforms = self.frontend.get_waveforms() + segments = self.vad_scorer(scores, waveforms, is_final=is_final, max_end_sil=self.max_end_sil, online=True) + + + except ONNXRuntimeError: + logging.warning(traceback.format_exc()) + logging.warning("input wav is silence or noise") + segments = [] + return segments + + def load_data(self, + wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List: + def load_wav(path: str) -> np.ndarray: + waveform, _ = librosa.load(path, sr=fs) + return waveform + + if isinstance(wav_content, np.ndarray): + return [wav_content] + + if isinstance(wav_content, str): + return [load_wav(wav_content)] + + if isinstance(wav_content, list): + return [load_wav(path) for path in wav_content] + + raise TypeError( + f'The type of {wav_content} is not in [str, np.ndarray, list]') + + def extract_feat(self, + waveforms: np.ndarray, is_final: bool = False + ) -> Tuple[np.ndarray, np.ndarray]: + waveforms_lens = np.zeros(waveforms.shape[0]).astype(np.int32) + for idx, waveform in enumerate(waveforms): + waveforms_lens[idx] = waveform.shape[-1] + + feats, feats_len = self.frontend.extract_fbank(waveforms, waveforms_lens, is_final) + # feats.append(feat) + # feats_len.append(feat_len) + + # feats = self.pad_feats(feats, np.max(feats_len)) + # feats_len = np.array(feats_len).astype(np.int32) + return feats.astype(np.float32), feats_len.astype(np.int32) + + @staticmethod + def pad_feats(feats: List[np.ndarray], max_feat_len: int) -> np.ndarray: + def pad_feat(feat: np.ndarray, cur_len: int) -> np.ndarray: + pad_width = ((0, max_feat_len - cur_len), (0, 0)) + return np.pad(feat, pad_width, 'constant', constant_values=0) + + feat_res = [pad_feat(feat, feat.shape[0]) for feat in feats] + feats = np.array(feat_res).astype(np.float32) + return feats + + def infer(self, feats: List) -> Tuple[np.ndarray, np.ndarray]: + + outputs = self.ort_infer(feats) + scores, out_caches = outputs[0], outputs[1:] + return scores, out_caches +