mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
commit
7fe37e0352
@ -1 +1 @@
|
||||
../TEMPLATE/README.md
|
||||
../../TEMPLATE/README.md
|
||||
@ -1 +0,0 @@
|
||||
../TEMPLATE/infer.sh
|
||||
@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
stage=1
|
||||
stop_stage=2
|
||||
model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
|
||||
data_dir="./data/test"
|
||||
output_dir="./results"
|
||||
batch_size=64
|
||||
gpu_inference=true # whether to perform gpu decoding
|
||||
gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1"
|
||||
njob=64 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob
|
||||
checkpoint_dir=
|
||||
checkpoint_name="valid.cer_ctc.ave.pb"
|
||||
|
||||
. utils/parse_options.sh || exit 1;
|
||||
|
||||
if ${gpu_inference} == "true"; then
|
||||
nj=$(echo $gpuid_list | awk -F "," '{print NF}')
|
||||
else
|
||||
nj=$njob
|
||||
batch_size=1
|
||||
gpuid_list=""
|
||||
for JOB in $(seq ${nj}); do
|
||||
gpuid_list=$gpuid_list"-1,"
|
||||
done
|
||||
fi
|
||||
|
||||
mkdir -p $output_dir/split
|
||||
split_scps=""
|
||||
for JOB in $(seq ${nj}); do
|
||||
split_scps="$split_scps $output_dir/split/wav.$JOB.scp"
|
||||
done
|
||||
perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps}
|
||||
|
||||
if [ -n "${checkpoint_dir}" ]; then
|
||||
python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name}
|
||||
model=${checkpoint_dir}/${model}
|
||||
fi
|
||||
|
||||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then
|
||||
echo "Decoding ..."
|
||||
gpuid_list_array=(${gpuid_list//,/ })
|
||||
for JOB in $(seq ${nj}); do
|
||||
{
|
||||
id=$((JOB-1))
|
||||
gpuid=${gpuid_list_array[$id]}
|
||||
mkdir -p ${output_dir}/output.$JOB
|
||||
python infer.py \
|
||||
--model ${model} \
|
||||
--audio_in ${output_dir}/split/wav.$JOB.scp \
|
||||
--output_dir ${output_dir}/output.$JOB \
|
||||
--batch_size ${batch_size} \
|
||||
--gpuid ${gpuid}
|
||||
}&
|
||||
done
|
||||
wait
|
||||
|
||||
mkdir -p ${output_dir}/1best_recog
|
||||
for f in token score text; do
|
||||
if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then
|
||||
for i in $(seq "${nj}"); do
|
||||
cat "${output_dir}/output.${i}/1best_recog/${f}"
|
||||
done | sort -k1 >"${output_dir}/1best_recog/${f}"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
|
||||
echo "Computing WER ..."
|
||||
cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
|
||||
cp ${data_dir}/text ${output_dir}/1best_recog/text.ref
|
||||
python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer
|
||||
tail -n 3 ${output_dir}/1best_recog/text.cer
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then
|
||||
echo "SpeechIO TIOBE textnorm"
|
||||
echo "$0 --> Normalizing REF text ..."
|
||||
./utils/textnorm_zh.py \
|
||||
--has_key --to_upper \
|
||||
${data_dir}/text \
|
||||
${output_dir}/1best_recog/ref.txt
|
||||
|
||||
echo "$0 --> Normalizing HYP text ..."
|
||||
./utils/textnorm_zh.py \
|
||||
--has_key --to_upper \
|
||||
${output_dir}/1best_recog/text.proc \
|
||||
${output_dir}/1best_recog/rec.txt
|
||||
grep -v $'\t$' ${output_dir}/1best_recog/rec.txt > ${output_dir}/1best_recog/rec_non_empty.txt
|
||||
|
||||
echo "$0 --> computing WER/CER and alignment ..."
|
||||
./utils/error_rate_zh \
|
||||
--tokenizer char \
|
||||
--ref ${output_dir}/1best_recog/ref.txt \
|
||||
--hyp ${output_dir}/1best_recog/rec_non_empty.txt \
|
||||
${output_dir}/1best_recog/DETAILS.txt | tee ${output_dir}/1best_recog/RESULTS.txt
|
||||
rm -rf ${output_dir}/1best_recog/rec.txt ${output_dir}/1best_recog/rec_non_empty.txt
|
||||
fi
|
||||
|
||||
@ -1 +1 @@
|
||||
../TEMPLATE/README.md
|
||||
../../TEMPLATE/README.md
|
||||
@ -1 +1 @@
|
||||
../TEMPLATE/infer.py
|
||||
../../TEMPLATE/infer.py
|
||||
@ -1 +0,0 @@
|
||||
../TEMPLATE/infer.sh
|
||||
@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
stage=1
|
||||
stop_stage=2
|
||||
model="damo/speech_paraformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch"
|
||||
data_dir="./data/test"
|
||||
output_dir="./results"
|
||||
batch_size=64
|
||||
gpu_inference=true # whether to perform gpu decoding
|
||||
gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1"
|
||||
njob=64 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob
|
||||
checkpoint_dir=
|
||||
checkpoint_name="valid.cer_ctc.ave.pb"
|
||||
|
||||
. utils/parse_options.sh || exit 1;
|
||||
|
||||
if ${gpu_inference} == "true"; then
|
||||
nj=$(echo $gpuid_list | awk -F "," '{print NF}')
|
||||
else
|
||||
nj=$njob
|
||||
batch_size=1
|
||||
gpuid_list=""
|
||||
for JOB in $(seq ${nj}); do
|
||||
gpuid_list=$gpuid_list"-1,"
|
||||
done
|
||||
fi
|
||||
|
||||
mkdir -p $output_dir/split
|
||||
split_scps=""
|
||||
for JOB in $(seq ${nj}); do
|
||||
split_scps="$split_scps $output_dir/split/wav.$JOB.scp"
|
||||
done
|
||||
perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps}
|
||||
|
||||
if [ -n "${checkpoint_dir}" ]; then
|
||||
python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name}
|
||||
model=${checkpoint_dir}/${model}
|
||||
fi
|
||||
|
||||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then
|
||||
echo "Decoding ..."
|
||||
gpuid_list_array=(${gpuid_list//,/ })
|
||||
for JOB in $(seq ${nj}); do
|
||||
{
|
||||
id=$((JOB-1))
|
||||
gpuid=${gpuid_list_array[$id]}
|
||||
mkdir -p ${output_dir}/output.$JOB
|
||||
python infer.py \
|
||||
--model ${model} \
|
||||
--audio_in ${output_dir}/split/wav.$JOB.scp \
|
||||
--output_dir ${output_dir}/output.$JOB \
|
||||
--batch_size ${batch_size} \
|
||||
--gpuid ${gpuid}
|
||||
}&
|
||||
done
|
||||
wait
|
||||
|
||||
mkdir -p ${output_dir}/1best_recog
|
||||
for f in token score text; do
|
||||
if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then
|
||||
for i in $(seq "${nj}"); do
|
||||
cat "${output_dir}/output.${i}/1best_recog/${f}"
|
||||
done | sort -k1 >"${output_dir}/1best_recog/${f}"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
|
||||
echo "Computing WER ..."
|
||||
cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
|
||||
cp ${data_dir}/text ${output_dir}/1best_recog/text.ref
|
||||
python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer
|
||||
tail -n 3 ${output_dir}/1best_recog/text.cer
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then
|
||||
echo "SpeechIO TIOBE textnorm"
|
||||
echo "$0 --> Normalizing REF text ..."
|
||||
./utils/textnorm_zh.py \
|
||||
--has_key --to_upper \
|
||||
${data_dir}/text \
|
||||
${output_dir}/1best_recog/ref.txt
|
||||
|
||||
echo "$0 --> Normalizing HYP text ..."
|
||||
./utils/textnorm_zh.py \
|
||||
--has_key --to_upper \
|
||||
${output_dir}/1best_recog/text.proc \
|
||||
${output_dir}/1best_recog/rec.txt
|
||||
grep -v $'\t$' ${output_dir}/1best_recog/rec.txt > ${output_dir}/1best_recog/rec_non_empty.txt
|
||||
|
||||
echo "$0 --> computing WER/CER and alignment ..."
|
||||
./utils/error_rate_zh \
|
||||
--tokenizer char \
|
||||
--ref ${output_dir}/1best_recog/ref.txt \
|
||||
--hyp ${output_dir}/1best_recog/rec_non_empty.txt \
|
||||
${output_dir}/1best_recog/DETAILS.txt | tee ${output_dir}/1best_recog/RESULTS.txt
|
||||
rm -rf ${output_dir}/1best_recog/rec.txt ${output_dir}/1best_recog/rec_non_empty.txt
|
||||
fi
|
||||
|
||||
@ -1 +1 @@
|
||||
../TEMPLATE/README.md
|
||||
../../TEMPLATE/README.md
|
||||
@ -1 +1 @@
|
||||
../TEMPLATE/infer.py
|
||||
../../TEMPLATE/infer.py
|
||||
@ -1 +0,0 @@
|
||||
../TEMPLATE/infer.sh
|
||||
@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
stage=1
|
||||
stop_stage=2
|
||||
model="damo/speech_paraformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch"
|
||||
data_dir="./data/test"
|
||||
output_dir="./results"
|
||||
batch_size=64
|
||||
gpu_inference=true # whether to perform gpu decoding
|
||||
gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1"
|
||||
njob=64 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob
|
||||
checkpoint_dir=
|
||||
checkpoint_name="valid.cer_ctc.ave.pb"
|
||||
|
||||
. utils/parse_options.sh || exit 1;
|
||||
|
||||
if ${gpu_inference} == "true"; then
|
||||
nj=$(echo $gpuid_list | awk -F "," '{print NF}')
|
||||
else
|
||||
nj=$njob
|
||||
batch_size=1
|
||||
gpuid_list=""
|
||||
for JOB in $(seq ${nj}); do
|
||||
gpuid_list=$gpuid_list"-1,"
|
||||
done
|
||||
fi
|
||||
|
||||
mkdir -p $output_dir/split
|
||||
split_scps=""
|
||||
for JOB in $(seq ${nj}); do
|
||||
split_scps="$split_scps $output_dir/split/wav.$JOB.scp"
|
||||
done
|
||||
perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps}
|
||||
|
||||
if [ -n "${checkpoint_dir}" ]; then
|
||||
python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name}
|
||||
model=${checkpoint_dir}/${model}
|
||||
fi
|
||||
|
||||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then
|
||||
echo "Decoding ..."
|
||||
gpuid_list_array=(${gpuid_list//,/ })
|
||||
for JOB in $(seq ${nj}); do
|
||||
{
|
||||
id=$((JOB-1))
|
||||
gpuid=${gpuid_list_array[$id]}
|
||||
mkdir -p ${output_dir}/output.$JOB
|
||||
python infer.py \
|
||||
--model ${model} \
|
||||
--audio_in ${output_dir}/split/wav.$JOB.scp \
|
||||
--output_dir ${output_dir}/output.$JOB \
|
||||
--batch_size ${batch_size} \
|
||||
--gpuid ${gpuid}
|
||||
}&
|
||||
done
|
||||
wait
|
||||
|
||||
mkdir -p ${output_dir}/1best_recog
|
||||
for f in token score text; do
|
||||
if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then
|
||||
for i in $(seq "${nj}"); do
|
||||
cat "${output_dir}/output.${i}/1best_recog/${f}"
|
||||
done | sort -k1 >"${output_dir}/1best_recog/${f}"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
|
||||
echo "Computing WER ..."
|
||||
cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
|
||||
cp ${data_dir}/text ${output_dir}/1best_recog/text.ref
|
||||
python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer
|
||||
tail -n 3 ${output_dir}/1best_recog/text.cer
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then
|
||||
echo "SpeechIO TIOBE textnorm"
|
||||
echo "$0 --> Normalizing REF text ..."
|
||||
./utils/textnorm_zh.py \
|
||||
--has_key --to_upper \
|
||||
${data_dir}/text \
|
||||
${output_dir}/1best_recog/ref.txt
|
||||
|
||||
echo "$0 --> Normalizing HYP text ..."
|
||||
./utils/textnorm_zh.py \
|
||||
--has_key --to_upper \
|
||||
${output_dir}/1best_recog/text.proc \
|
||||
${output_dir}/1best_recog/rec.txt
|
||||
grep -v $'\t$' ${output_dir}/1best_recog/rec.txt > ${output_dir}/1best_recog/rec_non_empty.txt
|
||||
|
||||
echo "$0 --> computing WER/CER and alignment ..."
|
||||
./utils/error_rate_zh \
|
||||
--tokenizer char \
|
||||
--ref ${output_dir}/1best_recog/ref.txt \
|
||||
--hyp ${output_dir}/1best_recog/rec_non_empty.txt \
|
||||
${output_dir}/1best_recog/DETAILS.txt | tee ${output_dir}/1best_recog/RESULTS.txt
|
||||
rm -rf ${output_dir}/1best_recog/rec.txt ${output_dir}/1best_recog/rec_non_empty.txt
|
||||
fi
|
||||
|
||||
@ -137,7 +137,7 @@ grpc::Status ASRServicer::Recognize(
|
||||
stream->Write(res);
|
||||
}
|
||||
else {
|
||||
FUNASR_RESULT Result= FunOfflineRecogPCMBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, 16000, RASR_NONE, NULL);
|
||||
FUNASR_RESULT Result= FunOfflineInferBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, RASR_NONE, NULL, 16000);
|
||||
std::string asr_result = ((FUNASR_RECOG_RESULT*)Result)->msg;
|
||||
|
||||
auto end_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
@ -46,15 +46,20 @@ typedef enum {
|
||||
FUNASR_MODEL_PARAFORMER = 3,
|
||||
}FUNASR_MODEL_TYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
FSMN_VAD_OFFLINE=0,
|
||||
FSMN_VAD_ONLINE = 1,
|
||||
}FSMN_VAD_MODE;
|
||||
|
||||
typedef void (* QM_CALLBACK)(int cur_step, int n_total); // n_total: total steps; cur_step: Current Step.
|
||||
|
||||
// ASR
|
||||
_FUNASRAPI FUNASR_HANDLE FunASRInit(std::map<std::string, std::string>& model_path, int thread_num);
|
||||
|
||||
_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback);
|
||||
_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback);
|
||||
_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback);
|
||||
_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback);
|
||||
// buffer
|
||||
_FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000);
|
||||
// file, support wav & pcm
|
||||
_FUNASRAPI FUNASR_RESULT FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000);
|
||||
|
||||
_FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index);
|
||||
_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result);
|
||||
@ -63,9 +68,12 @@ _FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle);
|
||||
_FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result);
|
||||
|
||||
// VAD
|
||||
_FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num);
|
||||
_FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num, FSMN_VAD_MODE mode=FSMN_VAD_OFFLINE);
|
||||
// buffer
|
||||
_FUNASRAPI FUNASR_RESULT FsmnVadInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000);
|
||||
// file, support wav & pcm
|
||||
_FUNASRAPI FUNASR_RESULT FsmnVadInfer(FUNASR_HANDLE handle, const char* sz_filename, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000);
|
||||
|
||||
_FUNASRAPI FUNASR_RESULT FsmnVadWavFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback);
|
||||
_FUNASRAPI std::vector<std::vector<int>>* FsmnVadGetResult(FUNASR_RESULT result,int n_index);
|
||||
_FUNASRAPI void FsmnVadFreeResult(FUNASR_RESULT result);
|
||||
_FUNASRAPI void FsmnVadUninit(FUNASR_HANDLE handle);
|
||||
@ -78,8 +86,10 @@ _FUNASRAPI void CTTransformerUninit(FUNASR_HANDLE handle);
|
||||
|
||||
//OfflineStream
|
||||
_FUNASRAPI FUNASR_HANDLE FunOfflineInit(std::map<std::string, std::string>& model_path, int thread_num);
|
||||
_FUNASRAPI FUNASR_RESULT FunOfflineRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback);
|
||||
_FUNASRAPI FUNASR_RESULT FunOfflineRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback);
|
||||
// buffer
|
||||
_FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000);
|
||||
// file, support wav & pcm
|
||||
_FUNASRAPI FUNASR_RESULT FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000);
|
||||
_FUNASRAPI void FunOfflineUninit(FUNASR_HANDLE handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -16,7 +16,7 @@ class VadModel {
|
||||
virtual void LoadConfigFromYaml(const char* filename)=0;
|
||||
virtual void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
|
||||
const std::vector<float> &waves)=0;
|
||||
virtual std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats)=0;
|
||||
virtual void LfrCmvn(std::vector<std::vector<float>> &vad_feats)=0;
|
||||
virtual void Forward(
|
||||
const std::vector<std::vector<float>> &chunk_feats,
|
||||
std::vector<std::vector<float>> *out_prob)=0;
|
||||
@ -24,6 +24,6 @@ class VadModel {
|
||||
virtual void InitCache()=0;
|
||||
};
|
||||
|
||||
VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num);
|
||||
VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num, int mode);
|
||||
} // namespace funasr
|
||||
#endif
|
||||
|
||||
@ -43,11 +43,10 @@ make
|
||||
|
||||
### funasr-onnx-offline
|
||||
```shell
|
||||
./funasr-onnx-offline [--wav-scp <string>] [--wav-path <string>]
|
||||
[--punc-quant <string>] [--punc-dir <string>]
|
||||
[--vad-quant <string>] [--vad-dir <string>]
|
||||
[--quantize <string>] --model-dir <string>
|
||||
[--] [--version] [-h]
|
||||
./funasr-onnx-offline --model-dir <string> [--quantize <string>]
|
||||
[--vad-dir <string>] [--vad-quant <string>]
|
||||
[--punc-dir <string>] [--punc-quant <string>]
|
||||
--wav-path <string> [--] [--version] [-h]
|
||||
Where:
|
||||
--model-dir <string>
|
||||
(required) the asr model path, which contains model.onnx, config.yaml, am.mvn
|
||||
@ -64,12 +63,13 @@ Where:
|
||||
--punc-quant <string>
|
||||
false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir
|
||||
|
||||
--wav-scp <string>
|
||||
wave scp path
|
||||
--wav-path <string>
|
||||
wave file path
|
||||
(required) the input could be:
|
||||
wav_path, e.g.: asr_example.wav;
|
||||
pcm_path, e.g.: asr_example.pcm;
|
||||
wav.scp, kaldi style wav list (wav_id \t wav_path)
|
||||
|
||||
Required: --model-dir <string>
|
||||
Required: --model-dir <string> --wav-path <string>
|
||||
If use vad, please add: --vad-dir <string>
|
||||
If use punc, please add: --punc-dir <string>
|
||||
|
||||
@ -84,20 +84,20 @@ For example:
|
||||
|
||||
### funasr-onnx-offline-vad
|
||||
```shell
|
||||
./funasr-onnx-offline-vad [--wav-scp <string>] [--wav-path <string>]
|
||||
[--quantize <string>] --model-dir <string>
|
||||
[--] [--version] [-h]
|
||||
./funasr-onnx-offline-vad --model-dir <string> [--quantize <string>]
|
||||
--wav-path <string> [--] [--version] [-h]
|
||||
Where:
|
||||
--model-dir <string>
|
||||
(required) the vad model path, which contains model.onnx, vad.yaml, vad.mvn
|
||||
--quantize <string>
|
||||
false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir
|
||||
--wav-scp <string>
|
||||
wave scp path
|
||||
--wav-path <string>
|
||||
wave file path
|
||||
(required) the input could be:
|
||||
wav_path, e.g.: asr_example.wav;
|
||||
pcm_path, e.g.: asr_example.pcm;
|
||||
wav.scp, kaldi style wav list (wav_id \t wav_path)
|
||||
|
||||
Required: --model-dir <string>
|
||||
Required: --model-dir <string> --wav-path <string>
|
||||
|
||||
For example:
|
||||
./funasr-onnx-offline-vad \
|
||||
@ -107,17 +107,17 @@ For example:
|
||||
|
||||
### funasr-onnx-offline-punc
|
||||
```shell
|
||||
./funasr-onnx-offline-punc [--txt-path <string>] [--quantize <string>]
|
||||
--model-dir <string> [--] [--version] [-h]
|
||||
./funasr-onnx-offline-punc --model-dir <string> [--quantize <string>]
|
||||
--txt-path <string> [--] [--version] [-h]
|
||||
Where:
|
||||
--model-dir <string>
|
||||
(required) the punc model path, which contains model.onnx, punc.yaml
|
||||
--quantize <string>
|
||||
false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir
|
||||
--txt-path <string>
|
||||
txt file path, one sentence per line
|
||||
(required) txt file path, one sentence per line
|
||||
|
||||
Required: --model-dir <string>
|
||||
Required: --model-dir <string> --txt-path <string>
|
||||
|
||||
For example:
|
||||
./funasr-onnx-offline-punc \
|
||||
@ -126,8 +126,8 @@ For example:
|
||||
```
|
||||
### funasr-onnx-offline-rtf
|
||||
```shell
|
||||
./funasr-onnx-offline-rtf --thread-num <int32_t> --wav-scp <string>
|
||||
[--quantize <string>] --model-dir <string>
|
||||
./funasr-onnx-offline-rtf --model-dir <string> [--quantize <string>]
|
||||
--wav-path <string> --thread-num <int32_t>
|
||||
[--] [--version] [-h]
|
||||
Where:
|
||||
--thread-num <int32_t>
|
||||
@ -136,8 +136,11 @@ Where:
|
||||
(required) the model path, which contains model.onnx, config.yaml, am.mvn
|
||||
--quantize <string>
|
||||
false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir
|
||||
--wav-scp <string>
|
||||
(required) wave scp path
|
||||
--wav-path <string>
|
||||
(required) the input could be:
|
||||
wav_path, e.g.: asr_example.wav;
|
||||
pcm_path, e.g.: asr_example.pcm;
|
||||
wav.scp, kaldi style wav list (wav_id \t wav_path)
|
||||
|
||||
For example:
|
||||
./funasr-onnx-offline-rtf \
|
||||
|
||||
@ -225,7 +225,7 @@ void FsmnVad::LoadCmvn(const char *filename)
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats) {
|
||||
void FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats) {
|
||||
|
||||
std::vector<std::vector<float>> out_feats;
|
||||
int T = vad_feats.size();
|
||||
@ -264,7 +264,6 @@ std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>
|
||||
}
|
||||
}
|
||||
vad_feats = out_feats;
|
||||
return vad_feats;
|
||||
}
|
||||
|
||||
std::vector<std::vector<int>>
|
||||
@ -272,7 +271,7 @@ FsmnVad::Infer(const std::vector<float> &waves) {
|
||||
std::vector<std::vector<float>> vad_feats;
|
||||
std::vector<std::vector<float>> vad_probs;
|
||||
FbankKaldi(vad_sample_rate_, vad_feats, waves);
|
||||
vad_feats = LfrCmvn(vad_feats);
|
||||
LfrCmvn(vad_feats);
|
||||
Forward(vad_feats, &vad_probs);
|
||||
|
||||
E2EVadModel vad_scorer = E2EVadModel();
|
||||
|
||||
@ -36,7 +36,7 @@ private:
|
||||
void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
|
||||
const std::vector<float> &waves);
|
||||
|
||||
std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats);
|
||||
void LfrCmvn(std::vector<std::vector<float>> &vad_feats);
|
||||
|
||||
void Forward(
|
||||
const std::vector<std::vector<float>> &chunk_feats,
|
||||
|
||||
@ -36,7 +36,7 @@ int main(int argc, char *argv[])
|
||||
TCLAP::CmdLine cmd("funasr-onnx-offline-punc", ' ', "1.0");
|
||||
TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the punc model path, which contains model.onnx, punc.yaml", true, "", "string");
|
||||
TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
|
||||
TCLAP::ValueArg<std::string> txt_path("", TXT_PATH, "txt file path, one sentence per line", false, "", "string");
|
||||
TCLAP::ValueArg<std::string> txt_path("", TXT_PATH, "txt file path, one sentence per line", true, "", "string");
|
||||
|
||||
cmd.add(model_dir);
|
||||
cmd.add(quantize);
|
||||
|
||||
@ -39,7 +39,7 @@ void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list,
|
||||
// warm up
|
||||
for (size_t i = 0; i < 1; i++)
|
||||
{
|
||||
FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL);
|
||||
FUNASR_RESULT result=FunASRInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, 16000);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
@ -50,7 +50,7 @@ void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list,
|
||||
}
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL);
|
||||
FUNASR_RESULT result=FunASRInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, 16000);
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
@ -77,6 +77,15 @@ void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list,
|
||||
}
|
||||
}
|
||||
|
||||
bool is_target_file(const std::string& filename, const std::string target) {
|
||||
std::size_t pos = filename.find_last_of(".");
|
||||
if (pos == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
std::string extension = filename.substr(pos + 1);
|
||||
return (extension == target);
|
||||
}
|
||||
|
||||
void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
|
||||
{
|
||||
if (value_arg.isSet()){
|
||||
@ -94,19 +103,19 @@ int main(int argc, char *argv[])
|
||||
TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string");
|
||||
TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
|
||||
|
||||
TCLAP::ValueArg<std::string> wav_scp("", WAV_SCP, "wave scp path", true, "", "string");
|
||||
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
|
||||
TCLAP::ValueArg<std::int32_t> thread_num("", THREAD_NUM, "multi-thread num for rtf", true, 0, "int32_t");
|
||||
|
||||
cmd.add(model_dir);
|
||||
cmd.add(quantize);
|
||||
cmd.add(wav_scp);
|
||||
cmd.add(wav_path);
|
||||
cmd.add(thread_num);
|
||||
cmd.parse(argc, argv);
|
||||
|
||||
std::map<std::string, std::string> model_path;
|
||||
GetValue(model_dir, MODEL_DIR, model_path);
|
||||
GetValue(quantize, QUANTIZE, model_path);
|
||||
GetValue(wav_scp, WAV_SCP, model_path);
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
@ -125,10 +134,14 @@ int main(int argc, char *argv[])
|
||||
|
||||
// read wav_scp
|
||||
vector<string> wav_list;
|
||||
if(model_path.find(WAV_SCP)!=model_path.end()){
|
||||
ifstream in(model_path.at(WAV_SCP));
|
||||
string wav_path_ = model_path.at(WAV_PATH);
|
||||
if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){
|
||||
wav_list.emplace_back(wav_path_);
|
||||
}
|
||||
else if(is_target_file(wav_path_, "scp")){
|
||||
ifstream in(wav_path_);
|
||||
if (!in.is_open()) {
|
||||
LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP);
|
||||
LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ;
|
||||
return 0;
|
||||
}
|
||||
string line;
|
||||
@ -140,6 +153,9 @@ int main(int argc, char *argv[])
|
||||
wav_list.emplace_back(column2);
|
||||
}
|
||||
in.close();
|
||||
}else{
|
||||
LOG(ERROR)<<"Please check the wav extension!";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// 多线程测试
|
||||
|
||||
@ -21,6 +21,15 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool is_target_file(const std::string& filename, const std::string target) {
|
||||
std::size_t pos = filename.find_last_of(".");
|
||||
if (pos == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
std::string extension = filename.substr(pos + 1);
|
||||
return (extension == target);
|
||||
}
|
||||
|
||||
void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
|
||||
{
|
||||
if (value_arg.isSet()){
|
||||
@ -58,20 +67,17 @@ int main(int argc, char *argv[])
|
||||
TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the vad model path, which contains model.onnx, vad.yaml, vad.mvn", true, "", "string");
|
||||
TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
|
||||
|
||||
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "wave file path", false, "", "string");
|
||||
TCLAP::ValueArg<std::string> wav_scp("", WAV_SCP, "wave scp path", false, "", "string");
|
||||
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
|
||||
|
||||
cmd.add(model_dir);
|
||||
cmd.add(quantize);
|
||||
cmd.add(wav_path);
|
||||
cmd.add(wav_scp);
|
||||
cmd.parse(argc, argv);
|
||||
|
||||
std::map<std::string, std::string> model_path;
|
||||
GetValue(model_dir, MODEL_DIR, model_path);
|
||||
GetValue(quantize, QUANTIZE, model_path);
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
GetValue(wav_scp, WAV_SCP, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
@ -89,14 +95,14 @@ int main(int argc, char *argv[])
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
|
||||
// read wav_path and wav_scp
|
||||
// read wav_path
|
||||
vector<string> wav_list;
|
||||
|
||||
if(model_path.find(WAV_PATH)!=model_path.end()){
|
||||
wav_list.emplace_back(model_path.at(WAV_PATH));
|
||||
string wav_path_ = model_path.at(WAV_PATH);
|
||||
if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){
|
||||
wav_list.emplace_back(wav_path_);
|
||||
}
|
||||
if(model_path.find(WAV_SCP)!=model_path.end()){
|
||||
ifstream in(model_path.at(WAV_SCP));
|
||||
else if(is_target_file(wav_path_, "scp")){
|
||||
ifstream in(wav_path_);
|
||||
if (!in.is_open()) {
|
||||
LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ;
|
||||
return 0;
|
||||
@ -110,13 +116,16 @@ int main(int argc, char *argv[])
|
||||
wav_list.emplace_back(column2);
|
||||
}
|
||||
in.close();
|
||||
}else{
|
||||
LOG(ERROR)<<"Please check the wav extension!";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
float snippet_time = 0.0f;
|
||||
long taking_micros = 0;
|
||||
for(auto& wav_file : wav_list){
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result=FsmnVadWavFile(vad_hanlde, wav_file.c_str(), RASR_NONE, NULL);
|
||||
FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), FSMN_VAD_OFFLINE, NULL, 16000);
|
||||
gettimeofday(&end, NULL);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
|
||||
@ -20,6 +20,15 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool is_target_file(const std::string& filename, const std::string target) {
|
||||
std::size_t pos = filename.find_last_of(".");
|
||||
if (pos == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
std::string extension = filename.substr(pos + 1);
|
||||
return (extension == target);
|
||||
}
|
||||
|
||||
void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
|
||||
{
|
||||
if (value_arg.isSet()){
|
||||
@ -41,8 +50,7 @@ int main(int argc, char** argv)
|
||||
TCLAP::ValueArg<std::string> punc_dir("", PUNC_DIR, "the punc model path, which contains model.onnx, punc.yaml", false, "", "string");
|
||||
TCLAP::ValueArg<std::string> punc_quant("", PUNC_QUANT, "false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "false", "string");
|
||||
|
||||
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "wave file path", false, "", "string");
|
||||
TCLAP::ValueArg<std::string> wav_scp("", WAV_SCP, "wave scp path", false, "", "string");
|
||||
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
|
||||
|
||||
cmd.add(model_dir);
|
||||
cmd.add(quantize);
|
||||
@ -51,7 +59,6 @@ int main(int argc, char** argv)
|
||||
cmd.add(punc_dir);
|
||||
cmd.add(punc_quant);
|
||||
cmd.add(wav_path);
|
||||
cmd.add(wav_scp);
|
||||
cmd.parse(argc, argv);
|
||||
|
||||
std::map<std::string, std::string> model_path;
|
||||
@ -62,7 +69,6 @@ int main(int argc, char** argv)
|
||||
GetValue(punc_dir, PUNC_DIR, model_path);
|
||||
GetValue(punc_quant, PUNC_QUANT, model_path);
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
GetValue(wav_scp, WAV_SCP, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
@ -80,14 +86,14 @@ int main(int argc, char** argv)
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
|
||||
// read wav_path and wav_scp
|
||||
// read wav_path
|
||||
vector<string> wav_list;
|
||||
|
||||
if(model_path.find(WAV_PATH)!=model_path.end()){
|
||||
wav_list.emplace_back(model_path.at(WAV_PATH));
|
||||
string wav_path_ = model_path.at(WAV_PATH);
|
||||
if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){
|
||||
wav_list.emplace_back(wav_path_);
|
||||
}
|
||||
if(model_path.find(WAV_SCP)!=model_path.end()){
|
||||
ifstream in(model_path.at(WAV_SCP));
|
||||
else if(is_target_file(wav_path_, "scp")){
|
||||
ifstream in(wav_path_);
|
||||
if (!in.is_open()) {
|
||||
LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ;
|
||||
return 0;
|
||||
@ -101,13 +107,16 @@ int main(int argc, char** argv)
|
||||
wav_list.emplace_back(column2);
|
||||
}
|
||||
in.close();
|
||||
}else{
|
||||
LOG(ERROR)<<"Please check the wav extension!";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
float snippet_time = 0.0f;
|
||||
long taking_micros = 0;
|
||||
for(auto& wav_file : wav_list){
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result=FunOfflineRecogFile(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL);
|
||||
FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, 16000);
|
||||
gettimeofday(&end, NULL);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
|
||||
@ -11,9 +11,9 @@ extern "C" {
|
||||
return mm;
|
||||
}
|
||||
|
||||
_FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num)
|
||||
_FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num, FSMN_VAD_MODE mode)
|
||||
{
|
||||
funasr::VadModel* mm = funasr::CreateVadModel(model_path, thread_num);
|
||||
funasr::VadModel* mm = funasr::CreateVadModel(model_path, thread_num, mode);
|
||||
return mm;
|
||||
}
|
||||
|
||||
@ -30,36 +30,7 @@ extern "C" {
|
||||
}
|
||||
|
||||
// APIs for ASR Infer
|
||||
_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback)
|
||||
{
|
||||
funasr::Model* recog_obj = (funasr::Model*)handle;
|
||||
if (!recog_obj)
|
||||
return nullptr;
|
||||
|
||||
int32_t sampling_rate = -1;
|
||||
funasr::Audio audio(1);
|
||||
if (!audio.LoadWav(sz_buf, n_len, &sampling_rate))
|
||||
return nullptr;
|
||||
|
||||
float* buff;
|
||||
int len;
|
||||
int flag=0;
|
||||
funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
|
||||
p_result->snippet_time = audio.GetTimeLen();
|
||||
int n_step = 0;
|
||||
int n_total = audio.GetQueueSize();
|
||||
while (audio.Fetch(buff, len, flag) > 0) {
|
||||
string msg = recog_obj->Forward(buff, len, flag);
|
||||
p_result->msg += msg;
|
||||
n_step++;
|
||||
if (fn_callback)
|
||||
fn_callback(n_step, n_total);
|
||||
}
|
||||
|
||||
return p_result;
|
||||
}
|
||||
|
||||
_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback)
|
||||
_FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
|
||||
{
|
||||
funasr::Model* recog_obj = (funasr::Model*)handle;
|
||||
if (!recog_obj)
|
||||
@ -87,23 +58,32 @@ extern "C" {
|
||||
return p_result;
|
||||
}
|
||||
|
||||
_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback)
|
||||
_FUNASRAPI FUNASR_RESULT FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
|
||||
{
|
||||
funasr::Model* recog_obj = (funasr::Model*)handle;
|
||||
if (!recog_obj)
|
||||
return nullptr;
|
||||
|
||||
funasr::Audio audio(1);
|
||||
if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
|
||||
return nullptr;
|
||||
if(funasr::is_target_file(sz_filename, "wav")){
|
||||
int32_t sampling_rate_ = -1;
|
||||
if(!audio.LoadWav(sz_filename, &sampling_rate_))
|
||||
return nullptr;
|
||||
}else if(funasr::is_target_file(sz_filename, "pcm")){
|
||||
if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
|
||||
return nullptr;
|
||||
}else{
|
||||
LOG(ERROR)<<"Wrong wav extension";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
float* buff;
|
||||
int len;
|
||||
int flag = 0;
|
||||
funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
|
||||
p_result->snippet_time = audio.GetTimeLen();
|
||||
int n_step = 0;
|
||||
int n_total = audio.GetQueueSize();
|
||||
funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
|
||||
p_result->snippet_time = audio.GetTimeLen();
|
||||
while (audio.Fetch(buff, len, flag) > 0) {
|
||||
string msg = recog_obj->Forward(buff, len, flag);
|
||||
p_result->msg += msg;
|
||||
@ -115,45 +95,15 @@ extern "C" {
|
||||
return p_result;
|
||||
}
|
||||
|
||||
_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback)
|
||||
{
|
||||
funasr::Model* recog_obj = (funasr::Model*)handle;
|
||||
if (!recog_obj)
|
||||
return nullptr;
|
||||
|
||||
int32_t sampling_rate = -1;
|
||||
funasr::Audio audio(1);
|
||||
if(!audio.LoadWav(sz_wavfile, &sampling_rate))
|
||||
return nullptr;
|
||||
|
||||
float* buff;
|
||||
int len;
|
||||
int flag = 0;
|
||||
int n_step = 0;
|
||||
int n_total = audio.GetQueueSize();
|
||||
funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
|
||||
p_result->snippet_time = audio.GetTimeLen();
|
||||
while (audio.Fetch(buff, len, flag) > 0) {
|
||||
string msg = recog_obj->Forward(buff, len, flag);
|
||||
p_result->msg+= msg;
|
||||
n_step++;
|
||||
if (fn_callback)
|
||||
fn_callback(n_step, n_total);
|
||||
}
|
||||
|
||||
return p_result;
|
||||
}
|
||||
|
||||
// APIs for VAD Infer
|
||||
_FUNASRAPI FUNASR_RESULT FsmnVadWavFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback)
|
||||
_FUNASRAPI FUNASR_RESULT FsmnVadInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
|
||||
{
|
||||
funasr::VadModel* vad_obj = (funasr::VadModel*)handle;
|
||||
if (!vad_obj)
|
||||
return nullptr;
|
||||
|
||||
int32_t sampling_rate = -1;
|
||||
|
||||
funasr::Audio audio(1);
|
||||
if(!audio.LoadWav(sz_wavfile, &sampling_rate))
|
||||
if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
|
||||
return nullptr;
|
||||
|
||||
funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
|
||||
@ -166,6 +116,35 @@ extern "C" {
|
||||
return p_result;
|
||||
}
|
||||
|
||||
_FUNASRAPI FUNASR_RESULT FsmnVadInfer(FUNASR_HANDLE handle, const char* sz_filename, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
|
||||
{
|
||||
funasr::VadModel* vad_obj = (funasr::VadModel*)handle;
|
||||
if (!vad_obj)
|
||||
return nullptr;
|
||||
|
||||
funasr::Audio audio(1);
|
||||
if(funasr::is_target_file(sz_filename, "wav")){
|
||||
int32_t sampling_rate_ = -1;
|
||||
if(!audio.LoadWav(sz_filename, &sampling_rate_))
|
||||
return nullptr;
|
||||
}else if(funasr::is_target_file(sz_filename, "pcm")){
|
||||
if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
|
||||
return nullptr;
|
||||
}else{
|
||||
LOG(ERROR)<<"Wrong wav extension";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
|
||||
p_result->snippet_time = audio.GetTimeLen();
|
||||
|
||||
vector<std::vector<int>> vad_segments;
|
||||
audio.Split(vad_obj, vad_segments);
|
||||
p_result->segments = new vector<std::vector<int>>(vad_segments);
|
||||
|
||||
return p_result;
|
||||
}
|
||||
|
||||
// APIs for PUNC Infer
|
||||
_FUNASRAPI const std::string CTTransformerInfer(FUNASR_HANDLE handle, const char* sz_sentence, FUNASR_MODE mode, QM_CALLBACK fn_callback)
|
||||
{
|
||||
@ -178,43 +157,7 @@ extern "C" {
|
||||
}
|
||||
|
||||
// APIs for Offline-stream Infer
|
||||
_FUNASRAPI FUNASR_RESULT FunOfflineRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback)
|
||||
{
|
||||
funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
|
||||
if (!offline_stream)
|
||||
return nullptr;
|
||||
|
||||
int32_t sampling_rate = -1;
|
||||
funasr::Audio audio(1);
|
||||
if(!audio.LoadWav(sz_wavfile, &sampling_rate))
|
||||
return nullptr;
|
||||
if(offline_stream->UseVad()){
|
||||
audio.Split(offline_stream);
|
||||
}
|
||||
|
||||
float* buff;
|
||||
int len;
|
||||
int flag = 0;
|
||||
int n_step = 0;
|
||||
int n_total = audio.GetQueueSize();
|
||||
funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
|
||||
p_result->snippet_time = audio.GetTimeLen();
|
||||
while (audio.Fetch(buff, len, flag) > 0) {
|
||||
string msg = (offline_stream->asr_handle)->Forward(buff, len, flag);
|
||||
p_result->msg+= msg;
|
||||
n_step++;
|
||||
if (fn_callback)
|
||||
fn_callback(n_step, n_total);
|
||||
}
|
||||
if(offline_stream->UsePunc()){
|
||||
string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str());
|
||||
p_result->msg = punc_res;
|
||||
}
|
||||
|
||||
return p_result;
|
||||
}
|
||||
|
||||
_FUNASRAPI FUNASR_RESULT FunOfflineRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback)
|
||||
_FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
|
||||
{
|
||||
funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
|
||||
if (!offline_stream)
|
||||
@ -249,6 +192,50 @@ extern "C" {
|
||||
return p_result;
|
||||
}
|
||||
|
||||
_FUNASRAPI FUNASR_RESULT FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
|
||||
{
|
||||
funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
|
||||
if (!offline_stream)
|
||||
return nullptr;
|
||||
|
||||
funasr::Audio audio(1);
|
||||
if(funasr::is_target_file(sz_filename, "wav")){
|
||||
int32_t sampling_rate_ = -1;
|
||||
if(!audio.LoadWav(sz_filename, &sampling_rate_))
|
||||
return nullptr;
|
||||
}else if(funasr::is_target_file(sz_filename, "pcm")){
|
||||
if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
|
||||
return nullptr;
|
||||
}else{
|
||||
LOG(ERROR)<<"Wrong wav extension";
|
||||
exit(-1);
|
||||
}
|
||||
if(offline_stream->UseVad()){
|
||||
audio.Split(offline_stream);
|
||||
}
|
||||
|
||||
float* buff;
|
||||
int len;
|
||||
int flag = 0;
|
||||
int n_step = 0;
|
||||
int n_total = audio.GetQueueSize();
|
||||
funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
|
||||
p_result->snippet_time = audio.GetTimeLen();
|
||||
while (audio.Fetch(buff, len, flag) > 0) {
|
||||
string msg = (offline_stream->asr_handle)->Forward(buff, len, flag);
|
||||
p_result->msg+= msg;
|
||||
n_step++;
|
||||
if (fn_callback)
|
||||
fn_callback(n_step, n_total);
|
||||
}
|
||||
if(offline_stream->UsePunc()){
|
||||
string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str());
|
||||
p_result->msg = punc_res;
|
||||
}
|
||||
|
||||
return p_result;
|
||||
}
|
||||
|
||||
_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result)
|
||||
{
|
||||
if (!result)
|
||||
|
||||
@ -180,4 +180,13 @@ void Glu(Tensor<float> *din, Tensor<float> *dout)
|
||||
}
|
||||
}
|
||||
|
||||
bool is_target_file(const std::string& filename, const std::string target) {
|
||||
std::size_t pos = filename.find_last_of(".");
|
||||
if (pos == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
std::string extension = filename.substr(pos + 1);
|
||||
return (extension == target);
|
||||
}
|
||||
|
||||
} // namespace funasr
|
||||
@ -25,6 +25,7 @@ extern void FindMax(float *din, int len, float &max_val, int &max_idx);
|
||||
extern void Glu(Tensor<float> *din, Tensor<float> *dout);
|
||||
|
||||
string PathAppend(const string &p1, const string &p2);
|
||||
bool is_target_file(const std::string& filename, const std::string target);
|
||||
|
||||
} // namespace funasr
|
||||
#endif
|
||||
|
||||
@ -1,10 +1,14 @@
|
||||
#include "precomp.h"
|
||||
|
||||
namespace funasr {
|
||||
VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num)
|
||||
VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num, int mode)
|
||||
{
|
||||
VadModel *mm;
|
||||
mm = new FsmnVad();
|
||||
if(mode == FSMN_VAD_OFFLINE){
|
||||
mm = new FsmnVad();
|
||||
}else{
|
||||
LOG(ERROR)<<"Online fsmn vad not imp!";
|
||||
}
|
||||
|
||||
string vad_model_path;
|
||||
string vad_cmvn_path;
|
||||
|
||||
@ -25,8 +25,8 @@ void WebSocketServer::do_decoder(const std::vector<char>& buffer,
|
||||
if (!buffer.empty()) {
|
||||
// fout.write(buffer.data(), buffer.size());
|
||||
// feed data to asr engine
|
||||
FUNASR_RESULT Result = FunOfflineRecogPCMBuffer(
|
||||
asr_hanlde, buffer.data(), buffer.size(), 16000, RASR_NONE, NULL);
|
||||
FUNASR_RESULT Result = FunOfflineInferBuffer(
|
||||
asr_hanlde, buffer.data(), buffer.size(), RASR_NONE, NULL, 16000);
|
||||
|
||||
std::string asr_result =
|
||||
((FUNASR_RECOG_RESULT*)Result)->msg; // get decode result
|
||||
|
||||
Loading…
Reference in New Issue
Block a user