mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
fix c# demo project to new onnx model files (#1689)
This commit is contained in:
parent
ba0325c004
commit
dfcc5d4758
@ -8,6 +8,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="NAudio" Version="2.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
@ -1,61 +1,107 @@
|
||||
using AliFsmnVadSharp;
|
||||
using AliFsmnVadSharp.Model;
|
||||
using CommandLine;
|
||||
using NAudio.Wave;
|
||||
|
||||
internal static class Program
|
||||
{
|
||||
[STAThread]
|
||||
private static void Main()
|
||||
{
|
||||
string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
|
||||
string modelFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/model.onnx";
|
||||
string configFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/vad.yaml";
|
||||
string mvnFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/vad.mvn";
|
||||
int batchSize = 2;
|
||||
TimeSpan start_time0 = new TimeSpan(DateTime.Now.Ticks);
|
||||
AliFsmnVad aliFsmnVad = new AliFsmnVad(modelFilePath, configFilePath, mvnFilePath, batchSize);
|
||||
TimeSpan end_time0 = new TimeSpan(DateTime.Now.Ticks);
|
||||
double elapsed_milliseconds0 = end_time0.TotalMilliseconds - start_time0.TotalMilliseconds;
|
||||
Console.WriteLine("load model and init config elapsed_milliseconds:{0}", elapsed_milliseconds0.ToString());
|
||||
List<float[]> samples = new List<float[]>();
|
||||
TimeSpan total_duration = new TimeSpan(0L);
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
string wavFilePath = string.Format(applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/example/{0}.wav", i.ToString());//vad_example
|
||||
if (!File.Exists(wavFilePath))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
|
||||
byte[] datas = new byte[_audioFileReader.Length];
|
||||
_audioFileReader.Read(datas, 0, datas.Length);
|
||||
TimeSpan duration = _audioFileReader.TotalTime;
|
||||
float[] wavdata = new float[datas.Length / 4];
|
||||
Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
|
||||
float[] sample = wavdata.Select((float x) => x * 32768f).ToArray();
|
||||
samples.Add(wavdata);
|
||||
total_duration += duration;
|
||||
}
|
||||
TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
//SegmentEntity[] segments_duration = aliFsmnVad.GetSegments(samples);
|
||||
SegmentEntity[] segments_duration = aliFsmnVad.GetSegmentsByStep(samples);
|
||||
TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
Console.WriteLine("vad infer result:");
|
||||
foreach (SegmentEntity segment in segments_duration)
|
||||
{
|
||||
Console.Write("[");
|
||||
foreach (var x in segment.Segment)
|
||||
{
|
||||
Console.Write("[" + string.Join(",", x.ToArray()) + "]");
|
||||
}
|
||||
Console.Write("]\r\n");
|
||||
}
|
||||
public class ProgramParams
|
||||
{
|
||||
[Option('i', "input", Required = true, HelpText = "Input wav file/folder path.")]
|
||||
public string WavFilePath { get; set; }
|
||||
|
||||
[Option('m', "model", Default = "speech_fsmn_vad_zh-cn-16k-common-onnx", HelpText = "Model path.")]
|
||||
public string Model { get; set; }
|
||||
}
|
||||
|
||||
[STAThread]
|
||||
private static void Main(string[] args)
|
||||
{
|
||||
var argParams = Parser.Default.ParseArguments<ProgramParams>(args).Value;
|
||||
|
||||
string modelPath = argParams.Model;
|
||||
if (!Directory.Exists(argParams.Model))
|
||||
{
|
||||
modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, modelPath);
|
||||
if (!Directory.Exists(modelPath))
|
||||
{
|
||||
throw new DirectoryNotFoundException($"Model not found: {argParams.Model}");
|
||||
}
|
||||
}
|
||||
|
||||
string modelFilePath = Path.Combine(modelPath, "model_quant.onnx");
|
||||
string configFilePath = Path.Combine(modelPath, "config.yaml");
|
||||
string mvnFilePath = Path.Combine(modelPath, "am.mvn");
|
||||
|
||||
int batchSize = 1;
|
||||
AliFsmnVad aliFsmnVad = new AliFsmnVad(modelFilePath, configFilePath, mvnFilePath, batchSize);
|
||||
|
||||
List<string> wavFiles = new List<string>();
|
||||
|
||||
if (File.Exists(argParams.WavFilePath))
|
||||
{
|
||||
wavFiles.Add(argParams.WavFilePath);
|
||||
}
|
||||
else if (Directory.Exists(argParams.WavFilePath))
|
||||
{
|
||||
foreach (var wavFilePath in Directory.GetFiles(argParams.WavFilePath, "*.wav"))
|
||||
{
|
||||
wavFiles.Add(wavFilePath);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Invalid wav input path. {argParams.WavFilePath}");
|
||||
}
|
||||
|
||||
var start_time = DateTime.Now;
|
||||
|
||||
TimeSpan total_duration = new TimeSpan(0L);
|
||||
for (int i = 0; i < wavFiles.Count; i += batchSize)
|
||||
{
|
||||
List<float[]> samples = new List<float[]>();
|
||||
|
||||
foreach(var wavFile in wavFiles.Skip(i).Take(batchSize))
|
||||
{
|
||||
(var sample, var duration) = LoadWavFile(wavFile);
|
||||
samples.Add(sample);
|
||||
total_duration += duration;
|
||||
}
|
||||
|
||||
SegmentEntity[] segments_duration = aliFsmnVad.GetSegments(samples);
|
||||
Console.WriteLine("vad infer result:");
|
||||
foreach (SegmentEntity segment in segments_duration)
|
||||
{
|
||||
Console.Write("[");
|
||||
foreach (var x in segment.Segment)
|
||||
{
|
||||
Console.Write("[" + string.Join(",", x.ToArray()) + "]");
|
||||
}
|
||||
Console.Write("]\r\n");
|
||||
}
|
||||
}
|
||||
|
||||
var end_time = DateTime.Now;
|
||||
|
||||
double elapsed_milliseconds = (end_time - start_time).TotalMilliseconds;
|
||||
|
||||
double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
|
||||
double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
|
||||
Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
|
||||
Console.WriteLine("total_duration:{0}", total_duration.TotalMilliseconds.ToString());
|
||||
Console.WriteLine("rtf:{1}", "0".ToString(), rtf.ToString());
|
||||
Console.WriteLine("------------------------");
|
||||
}
|
||||
|
||||
private static (float[] sample, TimeSpan duration) LoadWavFile(string wavFilePath)
|
||||
{
|
||||
AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
|
||||
byte[] datas = new byte[_audioFileReader.Length];
|
||||
_audioFileReader.Read(datas, 0, datas.Length);
|
||||
var duration = _audioFileReader.TotalTime;
|
||||
float[] wavdata = new float[datas.Length / 4];
|
||||
Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
|
||||
var sample = wavdata.Select((float x) => x * 32768f).ToArray();
|
||||
|
||||
return (sample, duration);
|
||||
}
|
||||
}
|
||||
@ -4,6 +4,8 @@ using Microsoft.Extensions.Logging;
|
||||
using Microsoft.ML.OnnxRuntime;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
|
||||
// 模型文件下载地址:https://modelscope.cn/models/iic/speech_fsmn_vad_zh-cn-16k-common-onnx/
|
||||
|
||||
namespace AliFsmnVadSharp
|
||||
{
|
||||
public class AliFsmnVad : IDisposable
|
||||
@ -28,9 +30,9 @@ namespace AliFsmnVadSharp
|
||||
VadYamlEntity vadYamlEntity = YamlHelper.ReadYaml<VadYamlEntity>(configFilePath);
|
||||
_wavFrontend = new WavFrontend(mvnFilePath, vadYamlEntity.frontend_conf);
|
||||
_frontend = vadYamlEntity.frontend;
|
||||
_vad_post_conf = vadYamlEntity.vad_post_conf;
|
||||
_vad_post_conf = vadYamlEntity.model_conf;
|
||||
_batchSize = batchSize;
|
||||
_max_end_sil = _max_end_sil != int.MinValue ? _max_end_sil : vadYamlEntity.vad_post_conf.max_end_silence_time;
|
||||
_max_end_sil = _max_end_sil != int.MinValue ? _max_end_sil : vadYamlEntity.model_conf.max_end_silence_time;
|
||||
_encoderConfEntity = vadYamlEntity.encoder_conf;
|
||||
|
||||
ILoggerFactory loggerFactory = new LoggerFactory();
|
||||
|
||||
@ -22,6 +22,6 @@ namespace AliFsmnVadSharp.Model
|
||||
public string encoder { get => _encoder; set => _encoder = value; }
|
||||
public FrontendConfEntity frontend_conf { get => _frontend_conf; set => _frontend_conf = value; }
|
||||
public EncoderConfEntity encoder_conf { get => _encoder_conf; set => _encoder_conf = value; }
|
||||
public VadPostConfEntity vad_post_conf { get => _vad_post_conf; set => _vad_post_conf = value; }
|
||||
public VadPostConfEntity model_conf { get => _vad_post_conf; set => _vad_post_conf = value; }
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,6 +5,7 @@
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<Platforms>AnyCPU;x64</Platforms>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
@ -12,6 +13,7 @@
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="NAudio" Version="2.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
@ -1,66 +1,116 @@
|
||||
using AliParaformerAsr;
|
||||
using CommandLine;
|
||||
using NAudio.Wave;
|
||||
|
||||
internal static class Program
|
||||
{
|
||||
[STAThread]
|
||||
private static void Main()
|
||||
{
|
||||
string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
|
||||
string modelName = "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx";
|
||||
string modelFilePath = applicationBase + "./"+ modelName + "/model_quant.onnx";
|
||||
string configFilePath = applicationBase + "./" + modelName + "/asr.yaml";
|
||||
string mvnFilePath = applicationBase + "./" + modelName + "/am.mvn";
|
||||
string tokensFilePath = applicationBase + "./" + modelName + "/tokens.txt";
|
||||
AliParaformerAsr.OfflineRecognizer offlineRecognizer = new OfflineRecognizer(modelFilePath, configFilePath, mvnFilePath, tokensFilePath);
|
||||
List<float[]>? samples = null;
|
||||
TimeSpan total_duration = new TimeSpan(0L);
|
||||
if (samples == null)
|
||||
|
||||
public class ProgramParams
|
||||
{
|
||||
[Option('i', "input", Required = true, HelpText = "Input wav file/folder path.")]
|
||||
public string WavFilePath { get; set; }
|
||||
|
||||
[Option('m', "model", Default = "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx", HelpText = "Model path.")]
|
||||
public string Model { get; set; }
|
||||
}
|
||||
|
||||
[STAThread]
|
||||
private static void Main(string[] args)
|
||||
{
|
||||
var argParams = Parser.Default.ParseArguments<ProgramParams>(args).Value;
|
||||
|
||||
string modelPath = argParams.Model;
|
||||
if (!Directory.Exists(argParams.Model))
|
||||
{
|
||||
samples = new List<float[]>();
|
||||
for (int i = 0; i < 5; i++)
|
||||
modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, modelPath);
|
||||
if (!Directory.Exists(modelPath))
|
||||
{
|
||||
string wavFilePath = string.Format(applicationBase + "./" + modelName + "/example/{0}.wav", i.ToString());
|
||||
if (!File.Exists(wavFilePath))
|
||||
{
|
||||
break;
|
||||
}
|
||||
AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
|
||||
byte[] datas = new byte[_audioFileReader.Length];
|
||||
_audioFileReader.Read(datas, 0, datas.Length);
|
||||
TimeSpan duration = _audioFileReader.TotalTime;
|
||||
float[] wavdata = new float[datas.Length / 4];
|
||||
Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
|
||||
float[] sample = wavdata.Select((float x) => x * 32768f).ToArray();
|
||||
samples.Add(sample);
|
||||
total_duration += duration;
|
||||
throw new DirectoryNotFoundException($"Model not found: {argParams.Model}");
|
||||
}
|
||||
}
|
||||
TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
//1.Non batch method
|
||||
foreach (var sample in samples)
|
||||
|
||||
string modelFilePath = Path.Combine(modelPath, "model_quant.onnx");
|
||||
string configFilePath = Path.Combine(modelPath, "asr.yaml");
|
||||
string mvnFilePath = Path.Combine(modelPath, "am.mvn");
|
||||
string tokensFilePath = Path.Combine(modelPath, "tokens.json");
|
||||
|
||||
|
||||
var offlineRecognizer = new OfflineRecognizer(modelFilePath, configFilePath, mvnFilePath, tokensFilePath, OnnxRumtimeTypes.CPU);
|
||||
|
||||
List<float[]> samples = new List<float[]>();
|
||||
TimeSpan total_duration = new TimeSpan(0L);
|
||||
|
||||
if (File.Exists(argParams.WavFilePath))
|
||||
{
|
||||
List<float[]> temp_samples = new List<float[]>();
|
||||
temp_samples.Add(sample);
|
||||
(var sample, var duration) = LoadWavFile(argParams.WavFilePath);
|
||||
|
||||
samples.Add(sample);
|
||||
total_duration += duration;
|
||||
}
|
||||
else if (Directory.Exists(argParams.WavFilePath))
|
||||
{
|
||||
var findWavCount = 0;
|
||||
|
||||
foreach (var wavFilePath in Directory.EnumerateFiles(argParams.WavFilePath, "*.wav"))
|
||||
{
|
||||
(var sample, var duration) = LoadWavFile(wavFilePath);
|
||||
|
||||
samples.Add(sample);
|
||||
total_duration += duration;
|
||||
findWavCount++;
|
||||
}
|
||||
|
||||
Console.WriteLine($"Total WAV files found: {findWavCount} duration:{total_duration}");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Invalid wav input path. {argParams.WavFilePath}");
|
||||
}
|
||||
|
||||
var start_time = DateTime.Now;
|
||||
|
||||
int batchSize = 1; // 输入参数支持批处理,但是实际效果提升有限,感觉还是负优化,等GPU版本优化后再试
|
||||
for (int i = 0; i < samples.Count; i += batchSize)
|
||||
{
|
||||
List<float[]> temp_samples = samples.Skip(i).Take(batchSize).ToList();
|
||||
|
||||
List<string> results = offlineRecognizer.GetResults(temp_samples);
|
||||
|
||||
foreach (string result in results)
|
||||
{
|
||||
Console.WriteLine(result);
|
||||
Console.WriteLine("");
|
||||
}
|
||||
}
|
||||
//2.batch method
|
||||
//List<string> results_batch = offlineRecognizer.GetResults(samples);
|
||||
//foreach (string result in results_batch)
|
||||
//{
|
||||
// Console.WriteLine(result);
|
||||
// Console.WriteLine("");
|
||||
//}
|
||||
TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
|
||||
|
||||
|
||||
var end_time = DateTime.Now;
|
||||
|
||||
double elapsed_milliseconds = (end_time - start_time).TotalMilliseconds;
|
||||
double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
|
||||
|
||||
Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
|
||||
Console.WriteLine("total_duration:{0}", total_duration.TotalMilliseconds.ToString());
|
||||
Console.WriteLine("rtf:{1}", "0".ToString(), rtf.ToString());
|
||||
|
||||
// 实时因子是处理时间与音频时长的比值。
|
||||
// 例如,如果一个 10 秒的音频片段需要 5 秒来处理,那么实时因子就是 0.5。
|
||||
// 如果处理时间和音频时长相等,那么实时因子就是 1,这意味着系统以实时速度进行处理。
|
||||
// 数值越小,表示处理速度越快。
|
||||
// from chatgpt 解释
|
||||
Console.WriteLine("Real-Time Factor :{0}", rtf.ToString());
|
||||
Console.WriteLine("end!");
|
||||
}
|
||||
}
|
||||
|
||||
private static (float[] sample, TimeSpan duration) LoadWavFile(string wavFilePath)
|
||||
{
|
||||
AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
|
||||
byte[] datas = new byte[_audioFileReader.Length];
|
||||
_audioFileReader.Read(datas, 0, datas.Length);
|
||||
var duration = _audioFileReader.TotalTime;
|
||||
float[] wavdata = new float[datas.Length / 4];
|
||||
Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
|
||||
var sample = wavdata.Select((float x) => x * 32768f).ToArray();
|
||||
|
||||
return (sample, duration);
|
||||
}
|
||||
}
|
||||
|
||||
@ -9,7 +9,9 @@
|
||||
<ItemGroup>
|
||||
<PackageReference Include="KaldiNativeFbankSharp" Version="1.0.8" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="7.0.0" />
|
||||
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.15.1" />
|
||||
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.17.3" />
|
||||
<PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.17.3" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
|
||||
<PackageReference Include="YamlDotNet" Version="13.1.1" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
@ -9,9 +9,20 @@ using Microsoft.ML.OnnxRuntime;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Text.RegularExpressions;
|
||||
using Newtonsoft.Json.Linq;
|
||||
|
||||
// 模型文件地址: https://modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
|
||||
namespace AliParaformerAsr
|
||||
{
|
||||
public enum OnnxRumtimeTypes
|
||||
{
|
||||
CPU = 0,
|
||||
|
||||
DML = 1,
|
||||
|
||||
CUDA = 2,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// offline recognizer package
|
||||
/// Copyright (c) 2023 by manyeyes
|
||||
@ -24,35 +35,70 @@ namespace AliParaformerAsr
|
||||
private string _frontend;
|
||||
private FrontendConfEntity _frontendConfEntity;
|
||||
private string[] _tokens;
|
||||
private int _batchSize = 1;
|
||||
|
||||
public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath,string tokensFilePath, int batchSize = 1,int threadsNum=1)
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
/// <param name="modelFilePath"></param>
|
||||
/// <param name="configFilePath"></param>
|
||||
/// <param name="mvnFilePath"></param>
|
||||
/// <param name="tokensFilePath"></param>
|
||||
/// <param name="rumtimeType">可以选择gpu,但是目前情况下,不建议使用,因为性能提升有限</param>
|
||||
/// <param name="deviceId">设备id,多显卡时用于指定执行的显卡</param>
|
||||
/// <param name="batchSize"></param>
|
||||
/// <param name="threadsNum"></param>
|
||||
/// <exception cref="ArgumentException"></exception>
|
||||
public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
|
||||
{
|
||||
Microsoft.ML.OnnxRuntime.SessionOptions options = new Microsoft.ML.OnnxRuntime.SessionOptions();
|
||||
var options = new SessionOptions();
|
||||
switch(rumtimeType)
|
||||
{
|
||||
case OnnxRumtimeTypes.DML:
|
||||
options.AppendExecutionProvider_DML(deviceId);
|
||||
break;
|
||||
case OnnxRumtimeTypes.CUDA:
|
||||
options.AppendExecutionProvider_CUDA(deviceId);
|
||||
break;
|
||||
default:
|
||||
options.AppendExecutionProvider_CPU(deviceId);
|
||||
break;
|
||||
}
|
||||
//options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
|
||||
//options.AppendExecutionProvider_DML(0);
|
||||
options.AppendExecutionProvider_CPU(0);
|
||||
//options.AppendExecutionProvider_CUDA(0);
|
||||
options.InterOpNumThreads = threadsNum;
|
||||
_onnxSession = new InferenceSession(modelFilePath, options);
|
||||
|
||||
_tokens = File.ReadAllLines(tokensFilePath);
|
||||
_onnxSession = new InferenceSession(modelFilePath, options);
|
||||
|
||||
string[] tokenLines;
|
||||
if (tokensFilePath.EndsWith(".txt"))
|
||||
{
|
||||
tokenLines = File.ReadAllLines(tokensFilePath);
|
||||
}
|
||||
else if (tokensFilePath.EndsWith(".json"))
|
||||
{
|
||||
string jsonContent = File.ReadAllText(tokensFilePath);
|
||||
JArray tokenArray = JArray.Parse(jsonContent);
|
||||
tokenLines = tokenArray.Select(t => t.ToString()).ToArray();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new ArgumentException("Invalid tokens file format. Only .txt and .json are supported.");
|
||||
}
|
||||
|
||||
_tokens = tokenLines;
|
||||
|
||||
OfflineYamlEntity offlineYamlEntity = YamlHelper.ReadYaml<OfflineYamlEntity>(configFilePath);
|
||||
_wavFrontend = new WavFrontend(mvnFilePath, offlineYamlEntity.frontend_conf);
|
||||
_frontend = offlineYamlEntity.frontend;
|
||||
_frontendConfEntity = offlineYamlEntity.frontend_conf;
|
||||
_batchSize = batchSize;
|
||||
ILoggerFactory loggerFactory = new LoggerFactory();
|
||||
_logger = new Logger<OfflineRecognizer>(loggerFactory);
|
||||
}
|
||||
|
||||
public List<string> GetResults(List<float[]> samples)
|
||||
{
|
||||
this._logger.LogInformation("get features begin");
|
||||
_logger.LogInformation("get features begin");
|
||||
List<OfflineInputEntity> offlineInputEntities = ExtractFeats(samples);
|
||||
OfflineOutputEntity modelOutput = this.Forward(offlineInputEntities);
|
||||
List<string> text_results = this.DecodeMulti(modelOutput.Token_nums);
|
||||
OfflineOutputEntity modelOutput = Forward(offlineInputEntities);
|
||||
List<string> text_results = DecodeMulti(modelOutput.Token_nums);
|
||||
return text_results;
|
||||
}
|
||||
|
||||
@ -156,15 +202,18 @@ namespace AliParaformerAsr
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (_tokens[token] != "</s>" && _tokens[token] != "<s>" && _tokens[token] != "<blank>")
|
||||
|
||||
string tokenChar = _tokens[token];
|
||||
|
||||
if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>")
|
||||
{
|
||||
if (IsChinese(_tokens[token],true))
|
||||
if (IsChinese(tokenChar, true))
|
||||
{
|
||||
text_result += _tokens[token];
|
||||
text_result += tokenChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
text_result += "▁" + _tokens[token]+ "▁";
|
||||
text_result += "▁" + tokenChar + "▁";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -14,16 +14,19 @@ namespace AliParaformerAsr.Utils
|
||||
/// YamlHelper
|
||||
/// Copyright (c) 2023 by manyeyes
|
||||
/// </summary>
|
||||
internal class YamlHelper
|
||||
internal class YamlHelper
|
||||
{
|
||||
public static T ReadYaml<T>(string yamlFilePath)
|
||||
public static T ReadYaml<T>(string yamlFilePath) where T:new()
|
||||
{
|
||||
if (!File.Exists(yamlFilePath))
|
||||
{
|
||||
#pragma warning disable CS8603 // 可能返回 null 引用。
|
||||
return default(T);
|
||||
#pragma warning restore CS8603 // 可能返回 null 引用。
|
||||
// 如果允许返回默认对象,则新建一个默认对象,否则应该是抛出异常
|
||||
// If allowing to return a default object, create a new default object; otherwise, throw an exception
|
||||
|
||||
return new T();
|
||||
// throw new Exception($"not find yaml config file: {yamlFilePath}");
|
||||
}
|
||||
|
||||
StreamReader yamlReader = File.OpenText(yamlFilePath);
|
||||
Deserializer yamlDeserializer = new Deserializer();
|
||||
T info = yamlDeserializer.Deserialize<T>(yamlReader);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user