mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
fix some variables
This commit is contained in:
parent
4175b38a91
commit
2304327a3f
@ -25,6 +25,9 @@ ELSE()
|
||||
link_directories(${ONNXRUNTIME_DIR}/lib)
|
||||
endif()
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/kaldi-native-fbank)
|
||||
|
||||
add_subdirectory("./third_party/yaml-cpp")
|
||||
add_subdirectory(kaldi-native-fbank/kaldi-native-fbank/csrc)
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(tester)
|
||||
|
||||
@ -0,0 +1,9 @@
|
||||
---
|
||||
BasedOnStyle: Google
|
||||
---
|
||||
Language: Cpp
|
||||
Cpp11BracedListStyle: true
|
||||
Standard: Cpp11
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Right
|
||||
---
|
||||
70
funasr/runtime/onnxruntime/kaldi-native-fbank/.github/workflows/linux-macos.yaml
vendored
Normal file
70
funasr/runtime/onnxruntime/kaldi-native-fbank/.github/workflows/linux-macos.yaml
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
name: linux-macos
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: linux-macos-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linux_macos:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure Cmake
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -D CMAKE_BUILD_TYPE=Release ..
|
||||
|
||||
- name: Build kaldi-native-fbank for ubuntu/macos
|
||||
if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos')
|
||||
run: |
|
||||
cd build
|
||||
make -j2
|
||||
ctest --output-on-failure
|
||||
|
||||
- name: Run tests for ubuntu/macos
|
||||
if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos')
|
||||
run: |
|
||||
cd build
|
||||
ctest --output-on-failure
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
|
||||
- name: Install Python dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install wheel twine setuptools
|
||||
|
||||
- name: Build Python
|
||||
shell: bash
|
||||
run: |
|
||||
python3 setup.py install
|
||||
|
||||
- name: Display Python package version
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
|
||||
97
funasr/runtime/onnxruntime/kaldi-native-fbank/.github/workflows/windows-x64.yaml
vendored
Normal file
97
funasr/runtime/onnxruntime/kaldi-native-fbank/.github/workflows/windows-x64.yaml
vendored
Normal file
@ -0,0 +1,97 @@
|
||||
name: windows-x64
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: windows-x64-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
windows_x64:
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: ${{ matrix.vs-version }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- vs-version: vs2015
|
||||
toolset-version: v140
|
||||
os: windows-2019
|
||||
|
||||
- vs-version: vs2017
|
||||
toolset-version: v141
|
||||
os: windows-2019
|
||||
|
||||
- vs-version: vs2019
|
||||
toolset-version: v142
|
||||
os: windows-2022
|
||||
|
||||
- vs-version: vs2022
|
||||
toolset-version: v143
|
||||
os: windows-2022
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure CMake
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -T ${{ matrix.toolset-version}},host=x64 -D CMAKE_BUILD_TYPE=Release ..
|
||||
|
||||
|
||||
- name: Build kaldi-native-fbank for windows
|
||||
shell: bash
|
||||
run: |
|
||||
cd build
|
||||
cmake --build . --target ALL_BUILD --config Release
|
||||
cat CMakeCache.txt
|
||||
|
||||
- name: Run tests for windows
|
||||
shell: bash
|
||||
run: |
|
||||
cd build
|
||||
ctest --verbose -C Release --output-on-failure -E py
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
|
||||
- name: Install Python dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
export KALDI_NATIVE_FBANK_CMAKE_ARGS="-T ${{ matrix.toolset-version}},host=x64 -A x64"
|
||||
python3 -m pip install --upgrade pip pytest
|
||||
python3 -m pip install wheel twine setuptools
|
||||
|
||||
- name: Build Python
|
||||
shell: bash
|
||||
run: |
|
||||
export KALDI_NATIVE_FBANK_CMAKE_ARGS="-T ${{ matrix.toolset-version}},host=x64 -A x64"
|
||||
python3 setup.py install
|
||||
|
||||
- name: Display Python package version
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
|
||||
|
||||
- name: Run Python tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd kaldi-native-fbank/python/tests
|
||||
python3 ./test_fbank_options.py
|
||||
python3 ./test_frame_extraction_options.py
|
||||
python3 ./test_mel_bank_options.py
|
||||
67
funasr/runtime/onnxruntime/kaldi-native-fbank/.github/workflows/windows-x86.yaml
vendored
Normal file
67
funasr/runtime/onnxruntime/kaldi-native-fbank/.github/workflows/windows-x86.yaml
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
name: windows-x86
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: windows-x86-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
windows_x86:
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: ${{ matrix.vs-version }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- vs-version: vs2015
|
||||
toolset-version: v140
|
||||
os: windows-2019
|
||||
|
||||
- vs-version: vs2017
|
||||
toolset-version: v141
|
||||
os: windows-2019
|
||||
|
||||
- vs-version: vs2019
|
||||
toolset-version: v142
|
||||
os: windows-2022
|
||||
|
||||
- vs-version: vs2022
|
||||
toolset-version: v143
|
||||
os: windows-2022
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure CMake
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -T ${{ matrix.toolset-version}},host=x64 -A Win32 -D CMAKE_BUILD_TYPE=Release -DKALDI_NATIVE_FBANK_BUILD_PYTHON=OFF ..
|
||||
cat CMakeCache.txt
|
||||
|
||||
|
||||
- name: Build kaldi-native-fbank for windows
|
||||
shell: bash
|
||||
run: |
|
||||
cd build
|
||||
cmake --build . --target ALL_BUILD --config Release
|
||||
|
||||
- name: Run tests for windows
|
||||
shell: bash
|
||||
run: |
|
||||
cd build
|
||||
|
||||
ctest --verbose -C Release --output-on-failure -E py
|
||||
115
funasr/runtime/onnxruntime/kaldi-native-fbank/CMakeLists.txt
Normal file
115
funasr/runtime/onnxruntime/kaldi-native-fbank/CMakeLists.txt
Normal file
@ -0,0 +1,115 @@
|
||||
if("x${CMAKE_SOURCE_DIR}" STREQUAL "x${CMAKE_BINARY_DIR}")
|
||||
message(FATAL_ERROR "\
|
||||
In-source build is not a good practice.
|
||||
Please use:
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
to build this project"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(CMAKE_TOOLCHAIN_FILE)
|
||||
set(_BUILD_PYTHON OFF)
|
||||
set(_BUILD_TESTS OFF)
|
||||
else()
|
||||
set(_BUILD_PYTHON ON)
|
||||
set(_BUILD_TESTS ON)
|
||||
endif()
|
||||
|
||||
if(POLICY CMP0057)
|
||||
cmake_policy(SET CMP0057 NEW)
|
||||
endif()
|
||||
|
||||
cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
|
||||
|
||||
project(kaldi-native-fbank CXX C)
|
||||
|
||||
set(KALDI_NATIVE_FBANK_VERSION "1.13")
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
|
||||
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
|
||||
|
||||
set(CMAKE_SKIP_BUILD_RPATH FALSE)
|
||||
set(BUILD_RPATH_USE_ORIGIN TRUE)
|
||||
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
||||
|
||||
if(NOT APPLE)
|
||||
set(kaldi_native_fbank_rpath_origin "$ORIGIN")
|
||||
else()
|
||||
set(kaldi_native_fbank_rpath_origin "@loader_path")
|
||||
endif()
|
||||
|
||||
set(CMAKE_INSTALL_RPATH ${kaldi_native_fbank_rpath_origin})
|
||||
set(CMAKE_BUILD_RPATH ${kaldi_native_fbank_rpath_origin})
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
|
||||
|
||||
if(NOT DEFINED BUILD_SHARED_LIBS)
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
endif()
|
||||
message(STATUS "BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
|
||||
|
||||
option(KALDI_NATIVE_FBANK_BUILD_TESTS "Whether to build tests or not" ${_BUILD_TESTS})
|
||||
option(KALDI_NATIVE_FBANK_BUILD_PYTHON "Whether to build Python extension" ${_BUILD_PYTHON})
|
||||
option(KALDI_NATIVE_FBANK_ENABLE_CHECK "Whether to build with log" OFF)
|
||||
|
||||
message(STATUS "KALDI_NATIVE_FBANK_BUILD_TESTS: ${KALDI_NATIVE_FBANK_BUILD_TESTS}")
|
||||
message(STATUS "KALDI_NATIVE_FBANK_BUILD_PYTHON: ${KALDI_NATIVE_FBANK_BUILD_PYTHON}")
|
||||
message(STATUS "KALDI_NATIVE_FBANK_ENABLE_CHECK: ${KALDI_NATIVE_FBANK_ENABLE_CHECK}")
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
|
||||
|
||||
message(STATUS "KALDI_NATIVE_FBANK_ENABLE_CHECK: ${KALDI_NATIVE_FBANK_ENABLE_CHECK}")
|
||||
|
||||
if(WIN32)
|
||||
add_definitions(-DNOMINMAX) # Otherwise, std::max() and std::min() won't work
|
||||
endif()
|
||||
|
||||
if(KALDI_NATIVE_FBANK_BUILD_PYTHON)
|
||||
include(pybind11)
|
||||
endif()
|
||||
|
||||
if(KALDI_NATIVE_FBANK_BUILD_TESTS)
|
||||
enable_testing()
|
||||
include(googletest)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_INSTALL_PREFIX)
|
||||
set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install")
|
||||
endif()
|
||||
|
||||
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
|
||||
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
include(CheckIncludeFileCXX)
|
||||
check_include_file_cxx(cxxabi.h KNF_HAVE_CXXABI_H)
|
||||
check_include_file_cxx(execinfo.h KNF_HAVE_EXECINFO_H)
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
if(WIN32 AND MSVC)
|
||||
# disable various warnings for MSVC
|
||||
# 4244: '=': conversion from 'double' to 'float', possible loss of data
|
||||
# 4267: 'return': conversion from 'size_t' to 'int32_t', possible loss of data
|
||||
# 4624: destructor was implicitly defined as deleted because a base class destructor is inaccessible or deleted
|
||||
set(disabled_warnings
|
||||
/wd4244
|
||||
/wd4267
|
||||
/wd4624
|
||||
)
|
||||
message(STATUS "Disabled warnings: ${disabled_warnings}")
|
||||
foreach(w IN LISTS disabled_warnings)
|
||||
string(APPEND CMAKE_CXX_FLAGS " ${w} ")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
add_subdirectory(kaldi-native-fbank)
|
||||
211
funasr/runtime/onnxruntime/kaldi-native-fbank/LICENSE
Normal file
211
funasr/runtime/onnxruntime/kaldi-native-fbank/LICENSE
Normal file
@ -0,0 +1,211 @@
|
||||
|
||||
Legal Notices
|
||||
|
||||
NOTE (this is not from the Apache License): The copyright model is that
|
||||
authors (or their employers, if noted in individual files) own their
|
||||
individual contributions. The authors' contributions can be discerned
|
||||
from the git history.
|
||||
|
||||
-------------------------------------------------------------------------
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
@ -0,0 +1,5 @@
|
||||
include LICENSE
|
||||
include README.md
|
||||
include CMakeLists.txt
|
||||
recursive-include kaldi-native-fbank *.*
|
||||
recursive-include cmake *.*
|
||||
106
funasr/runtime/onnxruntime/kaldi-native-fbank/README.md
Normal file
106
funasr/runtime/onnxruntime/kaldi-native-fbank/README.md
Normal file
@ -0,0 +1,106 @@
|
||||
# Introduction
|
||||
|
||||
Kaldi-compatible online fbank feature extractor without external dependencies.
|
||||
|
||||
Tested on the following architectures and operating systems:
|
||||
|
||||
- Linux
|
||||
- macOS
|
||||
- Windows
|
||||
- Android
|
||||
- x86
|
||||
- arm
|
||||
- aarch64
|
||||
|
||||
# Usage
|
||||
|
||||
See the following CMake-based speech recognition (i.e., text-to-speech) projects
|
||||
for its usage:
|
||||
|
||||
- <https://github.com/k2-fsa/sherpa-ncnn>
|
||||
- Specifically, please have a look at <https://github.com/k2-fsa/sherpa-ncnn/blob/master/sherpa-ncnn/csrc/features.h>
|
||||
- <https://github.com/k2-fsa/sherpa-onnx>
|
||||
|
||||
They use `kaldi-native-fbank` to compute fbank features for **real-time**
|
||||
speech recognition.
|
||||
|
||||
# Python APIs
|
||||
|
||||
First, please install `kaldi-native-fbank` by
|
||||
|
||||
```bash
|
||||
git clone https://github.com/csukuangfj/kaldi-native-fbank
|
||||
cd kaldi-native-fbank
|
||||
python3 setup.py install
|
||||
```
|
||||
|
||||
or use
|
||||
|
||||
```bash
|
||||
pip install kaldi-native-fbank
|
||||
```
|
||||
|
||||
To check that you have installed `kaldi-native-fbank` successfully, please use
|
||||
|
||||
```
|
||||
python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
|
||||
```
|
||||
|
||||
which should print the version you have installed.
|
||||
|
||||
Please refer to
|
||||
<https://github.com/csukuangfj/kaldi-native-fbank/blob/master/kaldi-native-fbank/python/tests/test_online_fbank.py>
|
||||
for usages.
|
||||
|
||||
For easier reference, we post the above file below:
|
||||
|
||||
```python3
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import kaldifeat
|
||||
except:
|
||||
print("Please install kaldifeat first")
|
||||
sys.exit(0)
|
||||
|
||||
import kaldi_native_fbank as knf
|
||||
import torch
|
||||
|
||||
|
||||
def main():
|
||||
sampling_rate = 16000
|
||||
samples = torch.randn(16000 * 10)
|
||||
|
||||
opts = kaldifeat.FbankOptions()
|
||||
opts.frame_opts.dither = 0
|
||||
opts.mel_opts.num_bins = 80
|
||||
opts.frame_opts.snip_edges = False
|
||||
opts.mel_opts.debug_mel = False
|
||||
|
||||
online_fbank = kaldifeat.OnlineFbank(opts)
|
||||
|
||||
online_fbank.accept_waveform(sampling_rate, samples)
|
||||
|
||||
opts = knf.FbankOptions()
|
||||
opts.frame_opts.dither = 0
|
||||
opts.mel_opts.num_bins = 80
|
||||
opts.frame_opts.snip_edges = False
|
||||
opts.mel_opts.debug_mel = False
|
||||
|
||||
fbank = knf.OnlineFbank(opts)
|
||||
fbank.accept_waveform(sampling_rate, samples.tolist())
|
||||
|
||||
assert online_fbank.num_frames_ready == fbank.num_frames_ready
|
||||
for i in range(fbank.num_frames_ready):
|
||||
f1 = online_fbank.get_frame(i)
|
||||
f2 = torch.from_numpy(fbank.get_frame(i))
|
||||
assert torch.allclose(f1, f2, atol=1e-3), (i, (f1 - f2).abs().max())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
torch.manual_seed(20220825)
|
||||
main()
|
||||
print("success")
|
||||
```
|
||||
@ -0,0 +1,916 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
#[=======================================================================[.rst:
|
||||
FetchContent
|
||||
------------------
|
||||
|
||||
.. only:: html
|
||||
|
||||
.. contents::
|
||||
|
||||
Overview
|
||||
^^^^^^^^
|
||||
|
||||
This module enables populating content at configure time via any method
|
||||
supported by the :module:`ExternalProject` module. Whereas
|
||||
:command:`ExternalProject_Add` downloads at build time, the
|
||||
``FetchContent`` module makes content available immediately, allowing the
|
||||
configure step to use the content in commands like :command:`add_subdirectory`,
|
||||
:command:`include` or :command:`file` operations.
|
||||
|
||||
Content population details would normally be defined separately from the
|
||||
command that performs the actual population. Projects should also
|
||||
check whether the content has already been populated somewhere else in the
|
||||
project hierarchy. Typical usage would look something like this:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
GIT_REPOSITORY https://github.com/google/googletest.git
|
||||
GIT_TAG release-1.8.0
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(googletest)
|
||||
if(NOT googletest_POPULATED)
|
||||
FetchContent_Populate(googletest)
|
||||
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
When using the above pattern with a hierarchical project arrangement,
|
||||
projects at higher levels in the hierarchy are able to define or override
|
||||
the population details of content specified anywhere lower in the project
|
||||
hierarchy. The ability to detect whether content has already been
|
||||
populated ensures that even if multiple child projects want certain content
|
||||
to be available, the first one to populate it wins. The other child project
|
||||
can simply make use of the already available content instead of repeating
|
||||
the population for itself. See the
|
||||
:ref:`Examples <fetch-content-examples>` section which demonstrates
|
||||
this scenario.
|
||||
|
||||
The ``FetchContent`` module also supports defining and populating
|
||||
content in a single call, with no check for whether the content has been
|
||||
populated elsewhere in the project already. This is a more low level
|
||||
operation and would not normally be the way the module is used, but it is
|
||||
sometimes useful as part of implementing some higher level feature or to
|
||||
populate some content in CMake's script mode.
|
||||
|
||||
|
||||
Declaring Content Details
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. command:: FetchContent_Declare
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FetchContent_Declare(<name> <contentOptions>...)
|
||||
|
||||
The ``FetchContent_Declare()`` function records the options that describe
|
||||
how to populate the specified content, but if such details have already
|
||||
been recorded earlier in this project (regardless of where in the project
|
||||
hierarchy), this and all later calls for the same content ``<name>`` are
|
||||
ignored. This "first to record, wins" approach is what allows hierarchical
|
||||
projects to have parent projects override content details of child projects.
|
||||
|
||||
The content ``<name>`` can be any string without spaces, but good practice
|
||||
would be to use only letters, numbers and underscores. The name will be
|
||||
treated case-insensitively and it should be obvious for the content it
|
||||
represents, often being the name of the child project or the value given
|
||||
to its top level :command:`project` command (if it is a CMake project).
|
||||
For well-known public projects, the name should generally be the official
|
||||
name of the project. Choosing an unusual name makes it unlikely that other
|
||||
projects needing that same content will use the same name, leading to
|
||||
the content being populated multiple times.
|
||||
|
||||
The ``<contentOptions>`` can be any of the download or update/patch options
|
||||
that the :command:`ExternalProject_Add` command understands. The configure,
|
||||
build, install and test steps are explicitly disabled and therefore options
|
||||
related to them will be ignored. In most cases, ``<contentOptions>`` will
|
||||
just be a couple of options defining the download method and method-specific
|
||||
details like a commit tag or archive hash. For example:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
GIT_REPOSITORY https://github.com/google/googletest.git
|
||||
GIT_TAG release-1.8.0
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
myCompanyIcons
|
||||
URL https://intranet.mycompany.com/assets/iconset_1.12.tar.gz
|
||||
URL_HASH 5588a7b18261c20068beabfb4f530b87
|
||||
)
|
||||
|
||||
FetchContent_Declare(
|
||||
myCompanyCertificates
|
||||
SVN_REPOSITORY svn+ssh://svn.mycompany.com/srv/svn/trunk/certs
|
||||
SVN_REVISION -r12345
|
||||
)
|
||||
|
||||
Populating The Content
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. command:: FetchContent_Populate
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FetchContent_Populate( <name> )
|
||||
|
||||
In most cases, the only argument given to ``FetchContent_Populate()`` is the
|
||||
``<name>``. When used this way, the command assumes the content details have
|
||||
been recorded by an earlier call to :command:`FetchContent_Declare`. The
|
||||
details are stored in a global property, so they are unaffected by things
|
||||
like variable or directory scope. Therefore, it doesn't matter where in the
|
||||
project the details were previously declared, as long as they have been
|
||||
declared before the call to ``FetchContent_Populate()``. Those saved details
|
||||
are then used to construct a call to :command:`ExternalProject_Add` in a
|
||||
private sub-build to perform the content population immediately. The
|
||||
implementation of ``ExternalProject_Add()`` ensures that if the content has
|
||||
already been populated in a previous CMake run, that content will be reused
|
||||
rather than repopulating them again. For the common case where population
|
||||
involves downloading content, the cost of the download is only paid once.
|
||||
|
||||
An internal global property records when a particular content population
|
||||
request has been processed. If ``FetchContent_Populate()`` is called more
|
||||
than once for the same content name within a configure run, the second call
|
||||
will halt with an error. Projects can and should check whether content
|
||||
population has already been processed with the
|
||||
:command:`FetchContent_GetProperties` command before calling
|
||||
``FetchContent_Populate()``.
|
||||
|
||||
``FetchContent_Populate()`` will set three variables in the scope of the
|
||||
caller; ``<lcName>_POPULATED``, ``<lcName>_SOURCE_DIR`` and
|
||||
``<lcName>_BINARY_DIR``, where ``<lcName>`` is the lowercased ``<name>``.
|
||||
``<lcName>_POPULATED`` will always be set to ``True`` by the call.
|
||||
``<lcName>_SOURCE_DIR`` is the location where the
|
||||
content can be found upon return (it will have already been populated), while
|
||||
``<lcName>_BINARY_DIR`` is a directory intended for use as a corresponding
|
||||
build directory. The main use case for the two directory variables is to
|
||||
call :command:`add_subdirectory` immediately after population, i.e.:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FetchContent_Populate(FooBar ...)
|
||||
add_subdirectory(${foobar_SOURCE_DIR} ${foobar_BINARY_DIR})
|
||||
|
||||
The values of the three variables can also be retrieved from anywhere in the
|
||||
project hierarchy using the :command:`FetchContent_GetProperties` command.
|
||||
|
||||
A number of cache variables influence the behavior of all content population
|
||||
performed using details saved from a :command:`FetchContent_Declare` call:
|
||||
|
||||
``FETCHCONTENT_BASE_DIR``
|
||||
In most cases, the saved details do not specify any options relating to the
|
||||
directories to use for the internal sub-build, final source and build areas.
|
||||
It is generally best to leave these decisions up to the ``FetchContent``
|
||||
module to handle on the project's behalf. The ``FETCHCONTENT_BASE_DIR``
|
||||
cache variable controls the point under which all content population
|
||||
directories are collected, but in most cases developers would not need to
|
||||
change this. The default location is ``${CMAKE_BINARY_DIR}/_deps``, but if
|
||||
developers change this value, they should aim to keep the path short and
|
||||
just below the top level of the build tree to avoid running into path
|
||||
length problems on Windows.
|
||||
|
||||
``FETCHCONTENT_QUIET``
|
||||
The logging output during population can be quite verbose, making the
|
||||
configure stage quite noisy. This cache option (``ON`` by default) hides
|
||||
all population output unless an error is encountered. If experiencing
|
||||
problems with hung downloads, temporarily switching this option off may
|
||||
help diagnose which content population is causing the issue.
|
||||
|
||||
``FETCHCONTENT_FULLY_DISCONNECTED``
|
||||
When this option is enabled, no attempt is made to download or update
|
||||
any content. It is assumed that all content has already been populated in
|
||||
a previous run or the source directories have been pointed at existing
|
||||
contents the developer has provided manually (using options described
|
||||
further below). When the developer knows that no changes have been made to
|
||||
any content details, turning this option ``ON`` can significantly speed up
|
||||
the configure stage. It is ``OFF`` by default.
|
||||
|
||||
``FETCHCONTENT_UPDATES_DISCONNECTED``
|
||||
This is a less severe download/update control compared to
|
||||
``FETCHCONTENT_FULLY_DISCONNECTED``. Instead of bypassing all download and
|
||||
update logic, the ``FETCHCONTENT_UPDATES_DISCONNECTED`` only disables the
|
||||
update stage. Therefore, if content has not been downloaded previously,
|
||||
it will still be downloaded when this option is enabled. This can speed up
|
||||
the configure stage, but not as much as
|
||||
``FETCHCONTENT_FULLY_DISCONNECTED``. It is ``OFF`` by default.
|
||||
|
||||
In addition to the above cache variables, the following cache variables are
|
||||
also defined for each content name (``<ucName>`` is the uppercased value of
|
||||
``<name>``):
|
||||
|
||||
``FETCHCONTENT_SOURCE_DIR_<ucName>``
|
||||
If this is set, no download or update steps are performed for the specified
|
||||
content and the ``<lcName>_SOURCE_DIR`` variable returned to the caller is
|
||||
pointed at this location. This gives developers a way to have a separate
|
||||
checkout of the content that they can modify freely without interference
|
||||
from the build. The build simply uses that existing source, but it still
|
||||
defines ``<lcName>_BINARY_DIR`` to point inside its own build area.
|
||||
Developers are strongly encouraged to use this mechanism rather than
|
||||
editing the sources populated in the default location, as changes to
|
||||
sources in the default location can be lost when content population details
|
||||
are changed by the project.
|
||||
|
||||
``FETCHCONTENT_UPDATES_DISCONNECTED_<ucName>``
|
||||
This is the per-content equivalent of
|
||||
``FETCHCONTENT_UPDATES_DISCONNECTED``. If the global option or this option
|
||||
is ``ON``, then updates will be disabled for the named content.
|
||||
Disabling updates for individual content can be useful for content whose
|
||||
details rarely change, while still leaving other frequently changing
|
||||
content with updates enabled.
|
||||
|
||||
|
||||
The ``FetchContent_Populate()`` command also supports a syntax allowing the
|
||||
content details to be specified directly rather than using any saved
|
||||
details. This is more low-level and use of this form is generally to be
|
||||
avoided in favour of using saved content details as outlined above.
|
||||
Nevertheless, in certain situations it can be useful to invoke the content
|
||||
population as an isolated operation (typically as part of implementing some
|
||||
other higher level feature or when using CMake in script mode):
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FetchContent_Populate( <name>
|
||||
[QUIET]
|
||||
[SUBBUILD_DIR <subBuildDir>]
|
||||
[SOURCE_DIR <srcDir>]
|
||||
[BINARY_DIR <binDir>]
|
||||
...
|
||||
)
|
||||
|
||||
This form has a number of key differences to that where only ``<name>`` is
|
||||
provided:
|
||||
|
||||
- All required population details are assumed to have been provided directly
|
||||
in the call to ``FetchContent_Populate()``. Any saved details for
|
||||
``<name>`` are ignored.
|
||||
- No check is made for whether content for ``<name>`` has already been
|
||||
populated.
|
||||
- No global property is set to record that the population has occurred.
|
||||
- No global properties record the source or binary directories used for the
|
||||
populated content.
|
||||
- The ``FETCHCONTENT_FULLY_DISCONNECTED`` and
|
||||
``FETCHCONTENT_UPDATES_DISCONNECTED`` cache variables are ignored.
|
||||
|
||||
The ``<lcName>_SOURCE_DIR`` and ``<lcName>_BINARY_DIR`` variables are still
|
||||
returned to the caller, but since these locations are not stored as global
|
||||
properties when this form is used, they are only available to the calling
|
||||
scope and below rather than the entire project hierarchy. No
|
||||
``<lcName>_POPULATED`` variable is set in the caller's scope with this form.
|
||||
|
||||
The supported options for ``FetchContent_Populate()`` are the same as those
|
||||
for :command:`FetchContent_Declare()`. Those few options shown just
|
||||
above are either specific to ``FetchContent_Populate()`` or their behavior is
|
||||
slightly modified from how :command:`ExternalProject_Add` treats them.
|
||||
|
||||
``QUIET``
|
||||
The ``QUIET`` option can be given to hide the output associated with
|
||||
populating the specified content. If the population fails, the output will
|
||||
be shown regardless of whether this option was given or not so that the
|
||||
cause of the failure can be diagnosed. The global ``FETCHCONTENT_QUIET``
|
||||
cache variable has no effect on ``FetchContent_Populate()`` calls where the
|
||||
content details are provided directly.
|
||||
|
||||
``SUBBUILD_DIR``
|
||||
The ``SUBBUILD_DIR`` argument can be provided to change the location of the
|
||||
sub-build created to perform the population. The default value is
|
||||
``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-subbuild`` and it would be unusual
|
||||
to need to override this default. If a relative path is specified, it will
|
||||
be interpreted as relative to :variable:`CMAKE_CURRENT_BINARY_DIR`.
|
||||
|
||||
``SOURCE_DIR``, ``BINARY_DIR``
|
||||
The ``SOURCE_DIR`` and ``BINARY_DIR`` arguments are supported by
|
||||
:command:`ExternalProject_Add`, but different default values are used by
|
||||
``FetchContent_Populate()``. ``SOURCE_DIR`` defaults to
|
||||
``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-src`` and ``BINARY_DIR`` defaults to
|
||||
``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-build``. If a relative path is
|
||||
specified, it will be interpreted as relative to
|
||||
:variable:`CMAKE_CURRENT_BINARY_DIR`.
|
||||
|
||||
In addition to the above explicit options, any other unrecognized options are
|
||||
passed through unmodified to :command:`ExternalProject_Add` to perform the
|
||||
download, patch and update steps. The following options are explicitly
|
||||
prohibited (they are disabled by the ``FetchContent_Populate()`` command):
|
||||
|
||||
- ``CONFIGURE_COMMAND``
|
||||
- ``BUILD_COMMAND``
|
||||
- ``INSTALL_COMMAND``
|
||||
- ``TEST_COMMAND``
|
||||
|
||||
If using ``FetchContent_Populate()`` within CMake's script mode, be aware
|
||||
that the implementation sets up a sub-build which therefore requires a CMake
|
||||
generator and build tool to be available. If these cannot be found by
|
||||
default, then the :variable:`CMAKE_GENERATOR` and/or
|
||||
:variable:`CMAKE_MAKE_PROGRAM` variables will need to be set appropriately
|
||||
on the command line invoking the script.
|
||||
|
||||
|
||||
Retrieve Population Properties
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. command:: FetchContent_GetProperties
|
||||
|
||||
When using saved content details, a call to :command:`FetchContent_Populate`
|
||||
records information in global properties which can be queried at any time.
|
||||
This information includes the source and binary directories associated with
|
||||
the content and also whether or not the content population has been processed
|
||||
during the current configure run.
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FetchContent_GetProperties( <name>
|
||||
[SOURCE_DIR <srcDirVar>]
|
||||
[BINARY_DIR <binDirVar>]
|
||||
[POPULATED <doneVar>]
|
||||
)
|
||||
|
||||
The ``SOURCE_DIR``, ``BINARY_DIR`` and ``POPULATED`` options can be used to
|
||||
specify which properties should be retrieved. Each option accepts a value
|
||||
which is the name of the variable in which to store that property. Most of
|
||||
the time though, only ``<name>`` is given, in which case the call will then
|
||||
set the same variables as a call to
|
||||
:command:`FetchContent_Populate(name) <FetchContent_Populate>`. This allows
|
||||
the following canonical pattern to be used, which ensures that the relevant
|
||||
variables will always be defined regardless of whether or not the population
|
||||
has been performed elsewhere in the project already:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FetchContent_GetProperties(foobar)
|
||||
if(NOT foobar_POPULATED)
|
||||
FetchContent_Populate(foobar)
|
||||
|
||||
# Set any custom variables, etc. here, then
|
||||
# populate the content as part of this build
|
||||
|
||||
add_subdirectory(${foobar_SOURCE_DIR} ${foobar_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
The above pattern allows other parts of the overall project hierarchy to
|
||||
re-use the same content and ensure that it is only populated once.
|
||||
|
||||
|
||||
.. _`fetch-content-examples`:
|
||||
|
||||
Examples
|
||||
^^^^^^^^
|
||||
|
||||
Consider a project hierarchy where ``projA`` is the top level project and it
|
||||
depends on projects ``projB`` and ``projC``. Both ``projB`` and ``projC``
|
||||
can be built standalone and they also both depend on another project
|
||||
``projD``. For simplicity, this example will assume that all four projects
|
||||
are available on a company git server. The ``CMakeLists.txt`` of each project
|
||||
might have sections like the following:
|
||||
|
||||
*projA*:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
projB
|
||||
GIT_REPOSITORY git@mycompany.com/git/projB.git
|
||||
GIT_TAG 4a89dc7e24ff212a7b5167bef7ab079d
|
||||
)
|
||||
FetchContent_Declare(
|
||||
projC
|
||||
GIT_REPOSITORY git@mycompany.com/git/projC.git
|
||||
GIT_TAG 4ad4016bd1d8d5412d135cf8ceea1bb9
|
||||
)
|
||||
FetchContent_Declare(
|
||||
projD
|
||||
GIT_REPOSITORY git@mycompany.com/git/projD.git
|
||||
GIT_TAG origin/integrationBranch
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(projB)
|
||||
if(NOT projb_POPULATED)
|
||||
FetchContent_Populate(projB)
|
||||
add_subdirectory(${projb_SOURCE_DIR} ${projb_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
FetchContent_GetProperties(projC)
|
||||
if(NOT projc_POPULATED)
|
||||
FetchContent_Populate(projC)
|
||||
add_subdirectory(${projc_SOURCE_DIR} ${projc_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
*projB*:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
projD
|
||||
GIT_REPOSITORY git@mycompany.com/git/projD.git
|
||||
GIT_TAG 20b415f9034bbd2a2e8216e9a5c9e632
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(projD)
|
||||
if(NOT projd_POPULATED)
|
||||
FetchContent_Populate(projD)
|
||||
add_subdirectory(${projd_SOURCE_DIR} ${projd_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
|
||||
*projC*:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
projD
|
||||
GIT_REPOSITORY git@mycompany.com/git/projD.git
|
||||
GIT_TAG 7d9a17ad2c962aa13e2fbb8043fb6b8a
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(projD)
|
||||
if(NOT projd_POPULATED)
|
||||
FetchContent_Populate(projD)
|
||||
add_subdirectory(${projd_SOURCE_DIR} ${projd_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
A few key points should be noted in the above:
|
||||
|
||||
- ``projB`` and ``projC`` define different content details for ``projD``,
|
||||
but ``projA`` also defines a set of content details for ``projD`` and
|
||||
because ``projA`` will define them first, the details from ``projB`` and
|
||||
``projC`` will not be used. The override details defined by ``projA``
|
||||
are not required to match either of those from ``projB`` or ``projC``, but
|
||||
it is up to the higher level project to ensure that the details it does
|
||||
define still make sense for the child projects.
|
||||
- While ``projA`` defined content details for ``projD``, it did not need
|
||||
to explicitly call ``FetchContent_Populate(projD)`` itself. Instead, it
|
||||
leaves that to a child project to do (in this case it will be ``projB``
|
||||
since it is added to the build ahead of ``projC``). If ``projA`` needed to
|
||||
customize how the ``projD`` content was brought into the build as well
|
||||
(e.g. define some CMake variables before calling
|
||||
:command:`add_subdirectory` after populating), it would do the call to
|
||||
``FetchContent_Populate()``, etc. just as it did for the ``projB`` and
|
||||
``projC`` content. For higher level projects, it is usually enough to
|
||||
just define the override content details and leave the actual population
|
||||
to the child projects. This saves repeating the same thing at each level
|
||||
of the project hierarchy unnecessarily.
|
||||
- Even though ``projA`` is the top level project in this example, it still
|
||||
checks whether ``projB`` and ``projC`` have already been populated before
|
||||
going ahead to do those populations. This makes ``projA`` able to be more
|
||||
easily incorporated as a child of some other higher level project in the
|
||||
future if required. Always protect a call to
|
||||
:command:`FetchContent_Populate` with a check to
|
||||
:command:`FetchContent_GetProperties`, even in what may be considered a top
|
||||
level project at the time.
|
||||
|
||||
|
||||
The following example demonstrates how one might download and unpack a
|
||||
firmware tarball using CMake's :manual:`script mode <cmake(1)>`. The call to
|
||||
:command:`FetchContent_Populate` specifies all the content details and the
|
||||
unpacked firmware will be placed in a ``firmware`` directory below the
|
||||
current working directory.
|
||||
|
||||
*getFirmware.cmake*:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
# NOTE: Intended to be run in script mode with cmake -P
|
||||
include(FetchContent)
|
||||
FetchContent_Populate(
|
||||
firmware
|
||||
URL https://mycompany.com/assets/firmware-1.23-arm.tar.gz
|
||||
URL_HASH MD5=68247684da89b608d466253762b0ff11
|
||||
SOURCE_DIR firmware
|
||||
)
|
||||
|
||||
#]=======================================================================]
|
||||
|
||||
|
||||
set(__FetchContent_privateDir "${CMAKE_CURRENT_LIST_DIR}/FetchContent")
|
||||
|
||||
#=======================================================================
|
||||
# Recording and retrieving content details for later population
|
||||
#=======================================================================
|
||||
|
||||
# Internal use, projects must not call this directly. It is
|
||||
# intended for use by FetchContent_Declare() only.
|
||||
#
|
||||
# Sets a content-specific global property (not meant for use
|
||||
# outside of functions defined here in this file) which can later
|
||||
# be retrieved using __FetchContent_getSavedDetails() with just the
|
||||
# same content name. If there is already a value stored in the
|
||||
# property, it is left unchanged and this call has no effect.
|
||||
# This allows parent projects to define the content details,
|
||||
# overriding anything a child project may try to set (properties
|
||||
# are not cached between runs, so the first thing to set it in a
|
||||
# build will be in control).
|
||||
function(__FetchContent_declareDetails contentName)
|
||||
|
||||
string(TOLOWER ${contentName} contentNameLower)
|
||||
set(propertyName "_FetchContent_${contentNameLower}_savedDetails")
|
||||
get_property(alreadyDefined GLOBAL PROPERTY ${propertyName} DEFINED)
|
||||
if(NOT alreadyDefined)
|
||||
define_property(GLOBAL PROPERTY ${propertyName}
|
||||
BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
|
||||
FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
|
||||
)
|
||||
set_property(GLOBAL PROPERTY ${propertyName} ${ARGN})
|
||||
endif()
|
||||
|
||||
endfunction()
|
||||
|
||||
|
||||
# Internal use, projects must not call this directly. It is
|
||||
# intended for use by the FetchContent_Declare() function.
|
||||
#
|
||||
# Retrieves details saved for the specified content in an
|
||||
# earlier call to __FetchContent_declareDetails().
|
||||
function(__FetchContent_getSavedDetails contentName outVar)
|
||||
|
||||
string(TOLOWER ${contentName} contentNameLower)
|
||||
set(propertyName "_FetchContent_${contentNameLower}_savedDetails")
|
||||
get_property(alreadyDefined GLOBAL PROPERTY ${propertyName} DEFINED)
|
||||
if(NOT alreadyDefined)
|
||||
message(FATAL_ERROR "No content details recorded for ${contentName}")
|
||||
endif()
|
||||
get_property(propertyValue GLOBAL PROPERTY ${propertyName})
|
||||
set(${outVar} "${propertyValue}" PARENT_SCOPE)
|
||||
|
||||
endfunction()
|
||||
|
||||
|
||||
# Saves population details of the content, sets defaults for the
|
||||
# SOURCE_DIR and BUILD_DIR.
|
||||
function(FetchContent_Declare contentName)
|
||||
|
||||
set(options "")
|
||||
set(oneValueArgs SVN_REPOSITORY)
|
||||
set(multiValueArgs "")
|
||||
|
||||
cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
unset(srcDirSuffix)
|
||||
unset(svnRepoArgs)
|
||||
if(ARG_SVN_REPOSITORY)
|
||||
# Add a hash of the svn repository URL to the source dir. This works
|
||||
# around the problem where if the URL changes, the download would
|
||||
# fail because it tries to checkout/update rather than switch the
|
||||
# old URL to the new one. We limit the hash to the first 7 characters
|
||||
# so that the source path doesn't get overly long (which can be a
|
||||
# problem on windows due to path length limits).
|
||||
string(SHA1 urlSHA ${ARG_SVN_REPOSITORY})
|
||||
string(SUBSTRING ${urlSHA} 0 7 urlSHA)
|
||||
set(srcDirSuffix "-${urlSHA}")
|
||||
set(svnRepoArgs SVN_REPOSITORY ${ARG_SVN_REPOSITORY})
|
||||
endif()
|
||||
|
||||
string(TOLOWER ${contentName} contentNameLower)
|
||||
__FetchContent_declareDetails(
|
||||
${contentNameLower}
|
||||
SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src${srcDirSuffix}"
|
||||
BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build"
|
||||
${svnRepoArgs}
|
||||
# List these last so they can override things we set above
|
||||
${ARG_UNPARSED_ARGUMENTS}
|
||||
)
|
||||
|
||||
endfunction()
|
||||
|
||||
|
||||
#=======================================================================
|
||||
# Set/get whether the specified content has been populated yet.
|
||||
# The setter also records the source and binary dirs used.
|
||||
#=======================================================================
|
||||
|
||||
# Internal use, projects must not call this directly. It is
|
||||
# intended for use by the FetchContent_Populate() function to
|
||||
# record when FetchContent_Populate() is called for a particular
|
||||
# content name.
|
||||
function(__FetchContent_setPopulated contentName sourceDir binaryDir)
|
||||
|
||||
string(TOLOWER ${contentName} contentNameLower)
|
||||
set(prefix "_FetchContent_${contentNameLower}")
|
||||
|
||||
set(propertyName "${prefix}_sourceDir")
|
||||
define_property(GLOBAL PROPERTY ${propertyName}
|
||||
BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
|
||||
FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
|
||||
)
|
||||
set_property(GLOBAL PROPERTY ${propertyName} ${sourceDir})
|
||||
|
||||
set(propertyName "${prefix}_binaryDir")
|
||||
define_property(GLOBAL PROPERTY ${propertyName}
|
||||
BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
|
||||
FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
|
||||
)
|
||||
set_property(GLOBAL PROPERTY ${propertyName} ${binaryDir})
|
||||
|
||||
set(propertyName "${prefix}_populated")
|
||||
define_property(GLOBAL PROPERTY ${propertyName}
|
||||
BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
|
||||
FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
|
||||
)
|
||||
set_property(GLOBAL PROPERTY ${propertyName} True)
|
||||
|
||||
endfunction()
|
||||
|
||||
|
||||
# Set variables in the calling scope for any of the retrievable
|
||||
# properties. If no specific properties are requested, variables
|
||||
# will be set for all retrievable properties.
|
||||
#
|
||||
# This function is intended to also be used by projects as the canonical
|
||||
# way to detect whether they should call FetchContent_Populate()
|
||||
# and pull the populated source into the build with add_subdirectory(),
|
||||
# if they are using the populated content in that way.
|
||||
function(FetchContent_GetProperties contentName)
|
||||
|
||||
string(TOLOWER ${contentName} contentNameLower)
|
||||
|
||||
set(options "")
|
||||
set(oneValueArgs SOURCE_DIR BINARY_DIR POPULATED)
|
||||
set(multiValueArgs "")
|
||||
|
||||
cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
if(NOT ARG_SOURCE_DIR AND
|
||||
NOT ARG_BINARY_DIR AND
|
||||
NOT ARG_POPULATED)
|
||||
# No specific properties requested, provide them all
|
||||
set(ARG_SOURCE_DIR ${contentNameLower}_SOURCE_DIR)
|
||||
set(ARG_BINARY_DIR ${contentNameLower}_BINARY_DIR)
|
||||
set(ARG_POPULATED ${contentNameLower}_POPULATED)
|
||||
endif()
|
||||
|
||||
set(prefix "_FetchContent_${contentNameLower}")
|
||||
|
||||
if(ARG_SOURCE_DIR)
|
||||
set(propertyName "${prefix}_sourceDir")
|
||||
get_property(value GLOBAL PROPERTY ${propertyName})
|
||||
if(value)
|
||||
set(${ARG_SOURCE_DIR} ${value} PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ARG_BINARY_DIR)
|
||||
set(propertyName "${prefix}_binaryDir")
|
||||
get_property(value GLOBAL PROPERTY ${propertyName})
|
||||
if(value)
|
||||
set(${ARG_BINARY_DIR} ${value} PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ARG_POPULATED)
|
||||
set(propertyName "${prefix}_populated")
|
||||
get_property(value GLOBAL PROPERTY ${propertyName} DEFINED)
|
||||
set(${ARG_POPULATED} ${value} PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
endfunction()
|
||||
|
||||
|
||||
#=======================================================================
|
||||
# Performing the population
|
||||
#=======================================================================
|
||||
|
||||
# The value of contentName will always have been lowercased by the caller.
|
||||
# All other arguments are assumed to be options that are understood by
|
||||
# ExternalProject_Add(), except for QUIET and SUBBUILD_DIR.
|
||||
function(__FetchContent_directPopulate contentName)
|
||||
|
||||
set(options
|
||||
QUIET
|
||||
)
|
||||
set(oneValueArgs
|
||||
SUBBUILD_DIR
|
||||
SOURCE_DIR
|
||||
BINARY_DIR
|
||||
# Prevent the following from being passed through
|
||||
CONFIGURE_COMMAND
|
||||
BUILD_COMMAND
|
||||
INSTALL_COMMAND
|
||||
TEST_COMMAND
|
||||
)
|
||||
set(multiValueArgs "")
|
||||
|
||||
cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
if(NOT ARG_SUBBUILD_DIR)
|
||||
message(FATAL_ERROR "Internal error: SUBBUILD_DIR not set")
|
||||
elseif(NOT IS_ABSOLUTE "${ARG_SUBBUILD_DIR}")
|
||||
set(ARG_SUBBUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_SUBBUILD_DIR}")
|
||||
endif()
|
||||
|
||||
if(NOT ARG_SOURCE_DIR)
|
||||
message(FATAL_ERROR "Internal error: SOURCE_DIR not set")
|
||||
elseif(NOT IS_ABSOLUTE "${ARG_SOURCE_DIR}")
|
||||
set(ARG_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_SOURCE_DIR}")
|
||||
endif()
|
||||
|
||||
if(NOT ARG_BINARY_DIR)
|
||||
message(FATAL_ERROR "Internal error: BINARY_DIR not set")
|
||||
elseif(NOT IS_ABSOLUTE "${ARG_BINARY_DIR}")
|
||||
set(ARG_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_BINARY_DIR}")
|
||||
endif()
|
||||
|
||||
# Ensure the caller can know where to find the source and build directories
|
||||
# with some convenient variables. Doing this here ensures the caller sees
|
||||
# the correct result in the case where the default values are overridden by
|
||||
# the content details set by the project.
|
||||
set(${contentName}_SOURCE_DIR "${ARG_SOURCE_DIR}" PARENT_SCOPE)
|
||||
set(${contentName}_BINARY_DIR "${ARG_BINARY_DIR}" PARENT_SCOPE)
|
||||
|
||||
# The unparsed arguments may contain spaces, so build up ARG_EXTRA
|
||||
# in such a way that it correctly substitutes into the generated
|
||||
# CMakeLists.txt file with each argument quoted.
|
||||
unset(ARG_EXTRA)
|
||||
foreach(arg IN LISTS ARG_UNPARSED_ARGUMENTS)
|
||||
set(ARG_EXTRA "${ARG_EXTRA} \"${arg}\"")
|
||||
endforeach()
|
||||
|
||||
# Hide output if requested, but save it to a variable in case there's an
|
||||
# error so we can show the output upon failure. When not quiet, don't
|
||||
# capture the output to a variable because the user may want to see the
|
||||
# output as it happens (e.g. progress during long downloads). Combine both
|
||||
# stdout and stderr in the one capture variable so the output stays in order.
|
||||
if (ARG_QUIET)
|
||||
set(outputOptions
|
||||
OUTPUT_VARIABLE capturedOutput
|
||||
ERROR_VARIABLE capturedOutput
|
||||
)
|
||||
else()
|
||||
set(capturedOutput)
|
||||
set(outputOptions)
|
||||
message(STATUS "Populating ${contentName}")
|
||||
endif()
|
||||
|
||||
if(CMAKE_GENERATOR)
|
||||
set(generatorOpts "-G${CMAKE_GENERATOR}")
|
||||
if(CMAKE_GENERATOR_PLATFORM)
|
||||
list(APPEND generatorOpts "-A${CMAKE_GENERATOR_PLATFORM}")
|
||||
endif()
|
||||
if(CMAKE_GENERATOR_TOOLSET)
|
||||
list(APPEND generatorOpts "-T${CMAKE_GENERATOR_TOOLSET}")
|
||||
endif()
|
||||
|
||||
if(CMAKE_MAKE_PROGRAM)
|
||||
list(APPEND generatorOpts "-DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}")
|
||||
endif()
|
||||
|
||||
else()
|
||||
# Likely we've been invoked via CMake's script mode where no
|
||||
# generator is set (and hence CMAKE_MAKE_PROGRAM could not be
|
||||
# trusted even if provided). We will have to rely on being
|
||||
# able to find the default generator and build tool.
|
||||
unset(generatorOpts)
|
||||
endif()
|
||||
|
||||
# Create and build a separate CMake project to carry out the population.
|
||||
# If we've already previously done these steps, they will not cause
|
||||
# anything to be updated, so extra rebuilds of the project won't occur.
|
||||
# Make sure to pass through CMAKE_MAKE_PROGRAM in case the main project
|
||||
# has this set to something not findable on the PATH.
|
||||
configure_file("${__FetchContent_privateDir}/CMakeLists.cmake.in"
|
||||
"${ARG_SUBBUILD_DIR}/CMakeLists.txt")
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} ${generatorOpts} .
|
||||
RESULT_VARIABLE result
|
||||
${outputOptions}
|
||||
WORKING_DIRECTORY "${ARG_SUBBUILD_DIR}"
|
||||
)
|
||||
if(result)
|
||||
if(capturedOutput)
|
||||
message("${capturedOutput}")
|
||||
endif()
|
||||
message(FATAL_ERROR "CMake step for ${contentName} failed: ${result}")
|
||||
endif()
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} --build .
|
||||
RESULT_VARIABLE result
|
||||
${outputOptions}
|
||||
WORKING_DIRECTORY "${ARG_SUBBUILD_DIR}"
|
||||
)
|
||||
if(result)
|
||||
if(capturedOutput)
|
||||
message("${capturedOutput}")
|
||||
endif()
|
||||
message(FATAL_ERROR "Build step for ${contentName} failed: ${result}")
|
||||
endif()
|
||||
|
||||
endfunction()
|
||||
|
||||
|
||||
option(FETCHCONTENT_FULLY_DISCONNECTED "Disables all attempts to download or update content and assumes source dirs already exist")
|
||||
option(FETCHCONTENT_UPDATES_DISCONNECTED "Enables UPDATE_DISCONNECTED behavior for all content population")
|
||||
option(FETCHCONTENT_QUIET "Enables QUIET option for all content population" ON)
|
||||
set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/_deps" CACHE PATH "Directory under which to collect all populated content")
|
||||
|
||||
# Populate the specified content using details stored from
|
||||
# an earlier call to FetchContent_Declare().
|
||||
function(FetchContent_Populate contentName)
|
||||
|
||||
if(NOT contentName)
|
||||
message(FATAL_ERROR "Empty contentName not allowed for FetchContent_Populate()")
|
||||
endif()
|
||||
|
||||
string(TOLOWER ${contentName} contentNameLower)
|
||||
|
||||
if(ARGN)
|
||||
# This is the direct population form with details fully specified
|
||||
# as part of the call, so we already have everything we need
|
||||
__FetchContent_directPopulate(
|
||||
${contentNameLower}
|
||||
SUBBUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-subbuild"
|
||||
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-src"
|
||||
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-build"
|
||||
${ARGN} # Could override any of the above ..._DIR variables
|
||||
)
|
||||
|
||||
# Pass source and binary dir variables back to the caller
|
||||
set(${contentNameLower}_SOURCE_DIR "${${contentNameLower}_SOURCE_DIR}" PARENT_SCOPE)
|
||||
set(${contentNameLower}_BINARY_DIR "${${contentNameLower}_BINARY_DIR}" PARENT_SCOPE)
|
||||
|
||||
# Don't set global properties, or record that we did this population, since
|
||||
# this was a direct call outside of the normal declared details form.
|
||||
# We only want to save values in the global properties for content that
|
||||
# honours the hierarchical details mechanism so that projects are not
|
||||
# robbed of the ability to override details set in nested projects.
|
||||
return()
|
||||
endif()
|
||||
|
||||
# No details provided, so assume they were saved from an earlier call
|
||||
# to FetchContent_Declare(). Do a check that we haven't already
|
||||
# populated this content before in case the caller forgot to check.
|
||||
FetchContent_GetProperties(${contentName})
|
||||
if(${contentNameLower}_POPULATED)
|
||||
message(FATAL_ERROR "Content ${contentName} already populated in ${${contentNameLower}_SOURCE_DIR}")
|
||||
endif()
|
||||
|
||||
string(TOUPPER ${contentName} contentNameUpper)
|
||||
set(FETCHCONTENT_SOURCE_DIR_${contentNameUpper}
|
||||
"${FETCHCONTENT_SOURCE_DIR_${contentNameUpper}}"
|
||||
CACHE PATH "When not empty, overrides where to find pre-populated content for ${contentName}")
|
||||
|
||||
if(FETCHCONTENT_SOURCE_DIR_${contentNameUpper})
|
||||
# The source directory has been explicitly provided in the cache,
|
||||
# so no population is required
|
||||
set(${contentNameLower}_SOURCE_DIR "${FETCHCONTENT_SOURCE_DIR_${contentNameUpper}}")
|
||||
set(${contentNameLower}_BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build")
|
||||
|
||||
elseif(FETCHCONTENT_FULLY_DISCONNECTED)
|
||||
# Bypass population and assume source is already there from a previous run
|
||||
set(${contentNameLower}_SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src")
|
||||
set(${contentNameLower}_BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build")
|
||||
|
||||
else()
|
||||
# Support both a global "disconnect all updates" and a per-content
|
||||
# update test (either one being set disables updates for this content).
|
||||
option(FETCHCONTENT_UPDATES_DISCONNECTED_${contentNameUpper}
|
||||
"Enables UPDATE_DISCONNECTED behavior just for population of ${contentName}")
|
||||
if(FETCHCONTENT_UPDATES_DISCONNECTED OR
|
||||
FETCHCONTENT_UPDATES_DISCONNECTED_${contentNameUpper})
|
||||
set(disconnectUpdates True)
|
||||
else()
|
||||
set(disconnectUpdates False)
|
||||
endif()
|
||||
|
||||
if(FETCHCONTENT_QUIET)
|
||||
set(quietFlag QUIET)
|
||||
else()
|
||||
unset(quietFlag)
|
||||
endif()
|
||||
|
||||
__FetchContent_getSavedDetails(${contentName} contentDetails)
|
||||
if("${contentDetails}" STREQUAL "")
|
||||
message(FATAL_ERROR "No details have been set for content: ${contentName}")
|
||||
endif()
|
||||
|
||||
__FetchContent_directPopulate(
|
||||
${contentNameLower}
|
||||
${quietFlag}
|
||||
UPDATE_DISCONNECTED ${disconnectUpdates}
|
||||
SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-subbuild"
|
||||
SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src"
|
||||
BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build"
|
||||
# Put the saved details last so they can override any of the
|
||||
# the options we set above (this can include SOURCE_DIR or
|
||||
# BUILD_DIR)
|
||||
${contentDetails}
|
||||
)
|
||||
endif()
|
||||
|
||||
__FetchContent_setPopulated(
|
||||
${contentName}
|
||||
${${contentNameLower}_SOURCE_DIR}
|
||||
${${contentNameLower}_BINARY_DIR}
|
||||
)
|
||||
|
||||
# Pass variables back to the caller. The variables passed back here
|
||||
# must match what FetchContent_GetProperties() sets when it is called
|
||||
# with just the content name.
|
||||
set(${contentNameLower}_SOURCE_DIR "${${contentNameLower}_SOURCE_DIR}" PARENT_SCOPE)
|
||||
set(${contentNameLower}_BINARY_DIR "${${contentNameLower}_BINARY_DIR}" PARENT_SCOPE)
|
||||
set(${contentNameLower}_POPULATED True PARENT_SCOPE)
|
||||
|
||||
endfunction()
|
||||
@ -0,0 +1,21 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
cmake_minimum_required(VERSION ${CMAKE_VERSION})
|
||||
|
||||
# We name the project and the target for the ExternalProject_Add() call
|
||||
# to something that will highlight to the user what we are working on if
|
||||
# something goes wrong and an error message is produced.
|
||||
|
||||
project(${contentName}-populate NONE)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(${contentName}-populate
|
||||
${ARG_EXTRA}
|
||||
SOURCE_DIR "${ARG_SOURCE_DIR}"
|
||||
BINARY_DIR "${ARG_BINARY_DIR}"
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
@ -0,0 +1,5 @@
|
||||
|
||||
## FetchContent
|
||||
|
||||
`FetchContent.cmake` and `FetchContent/CMakeLists.cmake.in`
|
||||
are copied from `cmake/3.11.0/share/cmake-3.11/Modules`.
|
||||
@ -0,0 +1,120 @@
|
||||
# Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang)
|
||||
|
||||
import glob
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import setuptools
|
||||
from setuptools.command.build_ext import build_ext
|
||||
|
||||
|
||||
def is_for_pypi():
|
||||
ans = os.environ.get("KALDI_NATIVE_FBANK_IS_FOR_PYPI", None)
|
||||
return ans is not None
|
||||
|
||||
|
||||
def is_macos():
|
||||
return platform.system() == "Darwin"
|
||||
|
||||
|
||||
def is_windows():
|
||||
return platform.system() == "Windows"
|
||||
|
||||
|
||||
try:
|
||||
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
|
||||
|
||||
class bdist_wheel(_bdist_wheel):
|
||||
def finalize_options(self):
|
||||
_bdist_wheel.finalize_options(self)
|
||||
# In this case, the generated wheel has a name in the form
|
||||
# kaldifeat-xxx-pyxx-none-any.whl
|
||||
if is_for_pypi() and not is_macos():
|
||||
self.root_is_pure = True
|
||||
else:
|
||||
# The generated wheel has a name ending with
|
||||
# -linux_x86_64.whl
|
||||
self.root_is_pure = False
|
||||
|
||||
|
||||
except ImportError:
|
||||
bdist_wheel = None
|
||||
|
||||
|
||||
def cmake_extension(name, *args, **kwargs) -> setuptools.Extension:
|
||||
kwargs["language"] = "c++"
|
||||
sources = []
|
||||
return setuptools.Extension(name, sources, *args, **kwargs)
|
||||
|
||||
|
||||
class BuildExtension(build_ext):
|
||||
def build_extension(self, ext: setuptools.extension.Extension):
|
||||
# build/temp.linux-x86_64-3.8
|
||||
os.makedirs(self.build_temp, exist_ok=True)
|
||||
|
||||
# build/lib.linux-x86_64-3.8
|
||||
os.makedirs(self.build_lib, exist_ok=True)
|
||||
|
||||
install_dir = Path(self.build_lib).resolve() / "kaldi_native_fbank"
|
||||
|
||||
kaldi_native_fbank_dir = Path(__file__).parent.parent.resolve()
|
||||
|
||||
cmake_args = os.environ.get("KALDI_NATIVE_FBANK_CMAKE_ARGS", "")
|
||||
make_args = os.environ.get("KALDI_NATIVE_FBANK_MAKE_ARGS", "")
|
||||
system_make_args = os.environ.get("MAKEFLAGS", "")
|
||||
|
||||
if cmake_args == "":
|
||||
cmake_args = "-DCMAKE_BUILD_TYPE=Release"
|
||||
|
||||
extra_cmake_args = f" -DCMAKE_INSTALL_PREFIX={install_dir} "
|
||||
extra_cmake_args += " -DKALDI_NATIVE_FBANK_BUILD_TESTS=OFF "
|
||||
|
||||
if "PYTHON_EXECUTABLE" not in cmake_args:
|
||||
print(f"Setting PYTHON_EXECUTABLE to {sys.executable}")
|
||||
cmake_args += f" -DPYTHON_EXECUTABLE={sys.executable}"
|
||||
|
||||
cmake_args += extra_cmake_args
|
||||
|
||||
if is_windows():
|
||||
build_cmd = f"""
|
||||
cmake {cmake_args} -B {self.build_temp} -S {kaldi_native_fbank_dir}
|
||||
cmake --build {self.build_temp} --target install --config Release -- -m
|
||||
"""
|
||||
print(f"build command is:\n{build_cmd}")
|
||||
ret = os.system(
|
||||
f"cmake {cmake_args} -B {self.build_temp} -S {kaldi_native_fbank_dir}"
|
||||
)
|
||||
if ret != 0:
|
||||
raise Exception("Failed to configure kaldi_native_fbank")
|
||||
|
||||
ret = os.system(
|
||||
f"cmake --build {self.build_temp} --target install --config Release -- -m"
|
||||
)
|
||||
if ret != 0:
|
||||
raise Exception("Failed to install kaldi_native_fbank")
|
||||
else:
|
||||
if make_args == "" and system_make_args == "":
|
||||
print("For fast compilation, run:")
|
||||
print(
|
||||
'export KALDI_NATIVE_FBANK_MAKE_ARGS="-j"; python setup.py install'
|
||||
)
|
||||
|
||||
build_cmd = f"""
|
||||
cd {self.build_temp}
|
||||
|
||||
cmake {cmake_args} {kaldi_native_fbank_dir}
|
||||
|
||||
make {make_args} install
|
||||
"""
|
||||
print(f"build command is:\n{build_cmd}")
|
||||
|
||||
ret = os.system(build_cmd)
|
||||
if ret != 0:
|
||||
raise Exception(
|
||||
"\nBuild kaldi-native-fbank failed. Please check the error message.\n"
|
||||
"You can ask for help by creating an issue on GitHub.\n"
|
||||
"\nClick:\n\thttps://github.com/csukuangfj/kaldi-native-fbank/issues/new\n" # noqa
|
||||
)
|
||||
@ -0,0 +1,57 @@
|
||||
function(download_googltest)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.11)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
|
||||
endif()
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
set(googletest_URL "https://github.com/google/googletest/archive/release-1.10.0.tar.gz")
|
||||
set(googletest_HASH "SHA256=9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb")
|
||||
|
||||
set(BUILD_GMOCK ON CACHE BOOL "" FORCE)
|
||||
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
|
||||
set(gtest_disable_pthreads ON CACHE BOOL "" FORCE)
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_Declare(googletest
|
||||
URL ${googletest_URL}
|
||||
URL_HASH ${googletest_HASH}
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(googletest)
|
||||
if(NOT googletest_POPULATED)
|
||||
message(STATUS "Downloading googletest from ${googletest_URL}")
|
||||
FetchContent_Populate(googletest)
|
||||
endif()
|
||||
message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}")
|
||||
message(STATUS "googletest's binary dir is ${googletest_BINARY_DIR}")
|
||||
|
||||
if(APPLE)
|
||||
set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS
|
||||
endif()
|
||||
#[==[
|
||||
-- Generating done
|
||||
Policy CMP0042 is not set: MACOSX_RPATH is enabled by default. Run "cmake
|
||||
--help-policy CMP0042" for policy details. Use the cmake_policy command to
|
||||
set the policy and suppress this warning.
|
||||
|
||||
MACOSX_RPATH is not specified for the following targets:
|
||||
|
||||
gmock
|
||||
gmock_main
|
||||
gtest
|
||||
gtest_main
|
||||
|
||||
This warning is for project developers. Use -Wno-dev to suppress it.
|
||||
]==]
|
||||
|
||||
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
|
||||
target_include_directories(gtest
|
||||
INTERFACE
|
||||
${googletest_SOURCE_DIR}/googletest/include
|
||||
${googletest_SOURCE_DIR}/googlemock/include
|
||||
)
|
||||
endfunction()
|
||||
|
||||
download_googltest()
|
||||
@ -0,0 +1,35 @@
|
||||
function(download_pybind11)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.11)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
|
||||
endif()
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.tar.gz")
|
||||
set(pybind11_HASH "SHA256=6bd528c4dbe2276635dc787b6b1f2e5316cf6b49ee3e150264e455a0d68d19c1")
|
||||
|
||||
# If you don't have access to the Internet, please download it to your
|
||||
# local drive and modify the following line according to your needs.
|
||||
if(EXISTS "/star-fj/fangjun/download/github/pybind11-2.9.2.tar.gz")
|
||||
set(pybind11_URL "file:///star-fj/fangjun/download/github/pybind11-2.9.2.tar.gz")
|
||||
elseif(EXISTS "/Users/fangjun/Downloads/pybind11-2.9.2.tar.gz")
|
||||
set(pybind11_URL "file:///Users/fangjun/Downloads/pybind11-2.9.2.tar.gz")
|
||||
elseif(EXISTS "/tmp/pybind11-2.9.2.tar.gz")
|
||||
set(pybind11_URL "file:///tmp/pybind11-2.9.2.tar.gz")
|
||||
endif()
|
||||
|
||||
FetchContent_Declare(pybind11
|
||||
URL ${pybind11_URL}
|
||||
URL_HASH ${pybind11_HASH}
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(pybind11)
|
||||
if(NOT pybind11_POPULATED)
|
||||
message(STATUS "Downloading pybind11 from ${pybind11_URL}")
|
||||
FetchContent_Populate(pybind11)
|
||||
endif()
|
||||
message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}")
|
||||
add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
endfunction()
|
||||
|
||||
download_pybind11()
|
||||
@ -0,0 +1,8 @@
|
||||
add_subdirectory(csrc)
|
||||
|
||||
if(KALDI_NATIVE_FBANK_BUILD_PYTHON)
|
||||
message(STATUS "Building Python")
|
||||
add_subdirectory(python)
|
||||
else()
|
||||
message(STATUS "Disable building Python")
|
||||
endif()
|
||||
@ -0,0 +1,8 @@
|
||||
add_library(csrc STATIC
|
||||
feature-fbank.cc
|
||||
feature-functions.cc
|
||||
feature-window.cc
|
||||
fftsg.c
|
||||
mel-computations.cc
|
||||
online-feature.cc
|
||||
rfft.cc)
|
||||
@ -0,0 +1,93 @@
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
set(sources
|
||||
feature-fbank.cc
|
||||
feature-functions.cc
|
||||
feature-window.cc
|
||||
fftsg.c
|
||||
mel-computations.cc
|
||||
online-feature.cc
|
||||
rfft.cc
|
||||
)
|
||||
|
||||
if(KALDI_NATIVE_FBANK_ENABLE_CHECK)
|
||||
list(APPEND sources log.cc)
|
||||
endif()
|
||||
|
||||
add_library(kaldi-native-fbank-core ${sources})
|
||||
if(KALDI_NATIVE_FBANK_ENABLE_CHECK)
|
||||
target_compile_definitions(kaldi-native-fbank-core PUBLIC KNF_ENABLE_CHECK=1)
|
||||
|
||||
if(KNF_HAVE_EXECINFO_H)
|
||||
target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_EXECINFO_H=1)
|
||||
endif()
|
||||
|
||||
if(KNF_HAVE_CXXABI_H)
|
||||
target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_CXXABI_H=1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# We are using std::call_once() in log.h,which requires us to link with -pthread
|
||||
if(NOT WIN32 AND KALDI_NATIVE_FBANK_ENABLE_CHECK)
|
||||
target_link_libraries(kaldi-native-fbank-core -pthread)
|
||||
endif()
|
||||
|
||||
if(KALDI_NATIVE_FBANK_BUILD_TESTS)
|
||||
add_executable(test-online-fbank test-online-fbank.cc)
|
||||
target_link_libraries(test-online-fbank kaldi-native-fbank-core)
|
||||
endif()
|
||||
|
||||
function(kaldi_native_fbank_add_test source)
|
||||
get_filename_component(name ${source} NAME_WE)
|
||||
add_executable(${name} "${source}")
|
||||
target_link_libraries(${name}
|
||||
PRIVATE
|
||||
kaldi-native-fbank-core
|
||||
gtest
|
||||
gtest_main
|
||||
)
|
||||
|
||||
add_test(NAME "Test.${name}"
|
||||
COMMAND
|
||||
$<TARGET_FILE:${name}>
|
||||
)
|
||||
endfunction()
|
||||
|
||||
# please sort the source files alphabetically
|
||||
set(test_srcs
|
||||
# test-online-feature.cc
|
||||
test-log.cc
|
||||
test-rfft.cc
|
||||
)
|
||||
|
||||
if(KALDI_NATIVE_FBANK_BUILD_TESTS)
|
||||
foreach(source IN LISTS test_srcs)
|
||||
kaldi_native_fbank_add_test(${source})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
install(TARGETS kaldi-native-fbank-core
|
||||
DESTINATION lib
|
||||
)
|
||||
|
||||
if(KALDI_NATIVE_FBANK_BUILD_TESTS)
|
||||
install(TARGETS test-online-fbank
|
||||
DESTINATION bin
|
||||
)
|
||||
endif()
|
||||
|
||||
file(MAKE_DIRECTORY
|
||||
DESTINATION
|
||||
${PROJECT_BINARY_DIR}/include/kaldi-native-fbank/csrc
|
||||
)
|
||||
file(GLOB_RECURSE all_headers *.h)
|
||||
|
||||
file(COPY
|
||||
${all_headers}
|
||||
DESTINATION
|
||||
${PROJECT_BINARY_DIR}/include/kaldi-native-fbank/csrc
|
||||
)
|
||||
|
||||
install(FILES ${all_headers}
|
||||
DESTINATION include/kaldi-native-fbank/csrc
|
||||
)
|
||||
@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/feature-fbank.cc
|
||||
//
|
||||
#include "kaldi-native-fbank/csrc/feature-fbank.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-functions.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
static void Sqrt(float *in_out, int32_t n) {
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
in_out[i] = std::sqrt(in_out[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
|
||||
os << opts.ToString();
|
||||
return os;
|
||||
}
|
||||
|
||||
FbankComputer::FbankComputer(const FbankOptions &opts)
|
||||
: opts_(opts), rfft_(opts.frame_opts.PaddedWindowSize()) {
|
||||
if (opts.energy_floor > 0.0f) {
|
||||
log_energy_floor_ = logf(opts.energy_floor);
|
||||
}
|
||||
|
||||
// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
|
||||
// [note: this call caches it.]
|
||||
GetMelBanks(1.0f);
|
||||
}
|
||||
|
||||
FbankComputer::~FbankComputer() {
|
||||
for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter)
|
||||
delete iter->second;
|
||||
}
|
||||
|
||||
const MelBanks *FbankComputer::GetMelBanks(float vtln_warp) {
|
||||
MelBanks *this_mel_banks = nullptr;
|
||||
|
||||
// std::map<float, MelBanks *>::iterator iter = mel_banks_.find(vtln_warp);
|
||||
auto iter = mel_banks_.find(vtln_warp);
|
||||
if (iter == mel_banks_.end()) {
|
||||
this_mel_banks = new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp);
|
||||
mel_banks_[vtln_warp] = this_mel_banks;
|
||||
} else {
|
||||
this_mel_banks = iter->second;
|
||||
}
|
||||
return this_mel_banks;
|
||||
}
|
||||
|
||||
void FbankComputer::Compute(float signal_raw_log_energy, float vtln_warp,
|
||||
std::vector<float> *signal_frame, float *feature) {
|
||||
const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
|
||||
|
||||
KNF_CHECK_EQ(signal_frame->size(), opts_.frame_opts.PaddedWindowSize());
|
||||
|
||||
// Compute energy after window function (not the raw one).
|
||||
if (opts_.use_energy && !opts_.raw_energy) {
|
||||
signal_raw_log_energy = std::log(
|
||||
std::max<float>(InnerProduct(signal_frame->data(), signal_frame->data(),
|
||||
signal_frame->size()),
|
||||
std::numeric_limits<float>::epsilon()));
|
||||
}
|
||||
rfft_.Compute(signal_frame->data()); // signal_frame is modified in-place
|
||||
ComputePowerSpectrum(signal_frame);
|
||||
|
||||
// Use magnitude instead of power if requested.
|
||||
if (!opts_.use_power) {
|
||||
Sqrt(signal_frame->data(), signal_frame->size() / 2 + 1);
|
||||
}
|
||||
|
||||
int32_t mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
|
||||
|
||||
// Its length is opts_.mel_opts.num_bins
|
||||
float *mel_energies = feature + mel_offset;
|
||||
|
||||
// Sum with mel filter banks over the power spectrum
|
||||
mel_banks.Compute(signal_frame->data(), mel_energies);
|
||||
|
||||
if (opts_.use_log_fbank) {
|
||||
// Avoid log of zero (which should be prevented anyway by dithering).
|
||||
for (int32_t i = 0; i != opts_.mel_opts.num_bins; ++i) {
|
||||
auto t = std::max(mel_energies[i], std::numeric_limits<float>::epsilon());
|
||||
mel_energies[i] = std::log(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy energy as first value (or the last, if htk_compat == true).
|
||||
if (opts_.use_energy) {
|
||||
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
|
||||
signal_raw_log_energy = log_energy_floor_;
|
||||
}
|
||||
int32_t energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
|
||||
feature[energy_index] = signal_raw_log_energy;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,134 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/feature-fbank.h
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
|
||||
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||
#include "kaldi-native-fbank/csrc/mel-computations.h"
|
||||
#include "kaldi-native-fbank/csrc/rfft.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
struct FbankOptions {
|
||||
FrameExtractionOptions frame_opts;
|
||||
MelBanksOptions mel_opts;
|
||||
// append an extra dimension with energy to the filter banks
|
||||
bool use_energy = false;
|
||||
float energy_floor = 0.0f; // active iff use_energy==true
|
||||
|
||||
// If true, compute log_energy before preemphasis and windowing
|
||||
// If false, compute log_energy after preemphasis ans windowing
|
||||
bool raw_energy = true; // active iff use_energy==true
|
||||
|
||||
// If true, put energy last (if using energy)
|
||||
// If false, put energy first
|
||||
bool htk_compat = false; // active iff use_energy==true
|
||||
|
||||
// if true (default), produce log-filterbank, else linear
|
||||
bool use_log_fbank = true;
|
||||
|
||||
// if true (default), use power in filterbank
|
||||
// analysis, else magnitude.
|
||||
bool use_power = true;
|
||||
|
||||
FbankOptions() { mel_opts.num_bins = 23; }
|
||||
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
os << "frame_opts: \n";
|
||||
os << frame_opts << "\n";
|
||||
os << "\n";
|
||||
|
||||
os << "mel_opts: \n";
|
||||
os << mel_opts << "\n";
|
||||
|
||||
os << "use_energy: " << use_energy << "\n";
|
||||
os << "energy_floor: " << energy_floor << "\n";
|
||||
os << "raw_energy: " << raw_energy << "\n";
|
||||
os << "htk_compat: " << htk_compat << "\n";
|
||||
os << "use_log_fbank: " << use_log_fbank << "\n";
|
||||
os << "use_power: " << use_power << "\n";
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);
|
||||
|
||||
class FbankComputer {
|
||||
public:
|
||||
using Options = FbankOptions;
|
||||
|
||||
explicit FbankComputer(const FbankOptions &opts);
|
||||
~FbankComputer();
|
||||
|
||||
int32_t Dim() const {
|
||||
return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
|
||||
}
|
||||
|
||||
// if true, compute log_energy_pre_window but after dithering and dc removal
|
||||
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
|
||||
|
||||
const FrameExtractionOptions &GetFrameOptions() const {
|
||||
return opts_.frame_opts;
|
||||
}
|
||||
|
||||
const FbankOptions &GetOptions() const { return opts_; }
|
||||
|
||||
/**
|
||||
Function that computes one frame of features from
|
||||
one frame of signal.
|
||||
|
||||
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
|
||||
prior to windowing and pre-emphasis, or
|
||||
log(numeric_limits<float>::min()), whichever is greater. Must be
|
||||
ignored by this function if this class returns false from
|
||||
this->NeedsRawLogEnergy().
|
||||
@param [in] vtln_warp The VTLN warping factor that the user wants
|
||||
to be applied when computing features for this utterance. Will
|
||||
normally be 1.0, meaning no warping is to be done. The value will
|
||||
be ignored for feature types that don't support VLTN, such as
|
||||
spectrogram features.
|
||||
@param [in] signal_frame One frame of the signal,
|
||||
as extracted using the function ExtractWindow() using the options
|
||||
returned by this->GetFrameOptions(). The function will use the
|
||||
vector as a workspace, which is why it's a non-const pointer.
|
||||
@param [out] feature Pointer to a vector of size this->Dim(), to which
|
||||
the computed feature will be written. It should be pre-allocated.
|
||||
*/
|
||||
void Compute(float signal_raw_log_energy, float vtln_warp,
|
||||
std::vector<float> *signal_frame, float *feature);
|
||||
|
||||
private:
|
||||
const MelBanks *GetMelBanks(float vtln_warp);
|
||||
|
||||
FbankOptions opts_;
|
||||
float log_energy_floor_;
|
||||
std::map<float, MelBanks *> mel_banks_; // float is VTLN coefficient.
|
||||
Rfft rfft_;
|
||||
};
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
|
||||
@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/feature-functions.cc
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-functions.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace knf {
|
||||
|
||||
void ComputePowerSpectrum(std::vector<float> *complex_fft) {
|
||||
int32_t dim = complex_fft->size();
|
||||
|
||||
// now we have in complex_fft, first half of complex spectrum
|
||||
// it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
|
||||
|
||||
float *p = complex_fft->data();
|
||||
int32_t half_dim = dim / 2;
|
||||
float first_energy = p[0] * p[0];
|
||||
float last_energy = p[1] * p[1]; // handle this special case
|
||||
|
||||
for (int32_t i = 1; i < half_dim; ++i) {
|
||||
float real = p[i * 2];
|
||||
float im = p[i * 2 + 1];
|
||||
p[i] = real * real + im * im;
|
||||
}
|
||||
p[0] = first_energy;
|
||||
p[half_dim] = last_energy; // Will actually never be used, and anyway
|
||||
// if the signal has been bandlimited sensibly this should be zero.
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,38 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/feature-functions.h
|
||||
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
|
||||
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
|
||||
|
||||
#include <vector>
|
||||
namespace knf {
|
||||
|
||||
// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
|
||||
// functions in csrc/rfft.h), and converts it into
|
||||
// a power spectrum. If the complex FFT is a vector of size n (representing
|
||||
// half of the complex FFT of a real signal of size n, as described there),
|
||||
// this function computes in the first (n/2) + 1 elements of it, the
|
||||
// energies of the fft bins from zero to the Nyquist frequency. Contents of the
|
||||
// remaining (n/2) - 1 elements are undefined at output.
|
||||
|
||||
void ComputePowerSpectrum(std::vector<float> *complex_fft);
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
|
||||
@ -0,0 +1,247 @@
|
||||
// kaldi-native-fbank/csrc/feature-window.cc
|
||||
//
|
||||
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/feature-window.cc
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#ifndef M_2PI
|
||||
#define M_2PI 6.283185307179586476925286766559005
|
||||
#endif
|
||||
|
||||
namespace knf {
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts) {
|
||||
os << opts.ToString();
|
||||
return os;
|
||||
}
|
||||
|
||||
FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts)
|
||||
: window_(opts.WindowSize()) {
|
||||
int32_t frame_length = opts.WindowSize();
|
||||
KNF_CHECK_GT(frame_length, 0);
|
||||
|
||||
float *window_data = window_.data();
|
||||
|
||||
double a = M_2PI / (frame_length - 1);
|
||||
for (int32_t i = 0; i < frame_length; i++) {
|
||||
double i_fl = static_cast<double>(i);
|
||||
if (opts.window_type == "hanning") {
|
||||
window_data[i] = 0.5 - 0.5 * cos(a * i_fl);
|
||||
} else if (opts.window_type == "sine") {
|
||||
// when you are checking ws wikipedia, please
|
||||
// note that 0.5 * a = M_PI/(frame_length-1)
|
||||
window_data[i] = sin(0.5 * a * i_fl);
|
||||
} else if (opts.window_type == "hamming") {
|
||||
window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
|
||||
} else if (opts.window_type ==
|
||||
"povey") { // like hamming but goes to zero at edges.
|
||||
window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
|
||||
} else if (opts.window_type == "rectangular") {
|
||||
window_data[i] = 1.0;
|
||||
} else if (opts.window_type == "blackman") {
|
||||
window_data[i] = opts.blackman_coeff - 0.5 * cos(a * i_fl) +
|
||||
(0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
|
||||
} else {
|
||||
KNF_LOG(FATAL) << "Invalid window type " << opts.window_type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureWindowFunction::Apply(float *wave) const {
|
||||
int32_t window_size = window_.size();
|
||||
const float *p = window_.data();
|
||||
for (int32_t k = 0; k != window_size; ++k) {
|
||||
wave[k] *= p[k];
|
||||
}
|
||||
}
|
||||
|
||||
int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts) {
|
||||
int64_t frame_shift = opts.WindowShift();
|
||||
if (opts.snip_edges) {
|
||||
return frame * frame_shift;
|
||||
} else {
|
||||
int64_t midpoint_of_frame = frame_shift * frame + frame_shift / 2,
|
||||
beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2;
|
||||
return beginning_of_frame;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
|
||||
bool flush /*= true*/) {
|
||||
int64_t frame_shift = opts.WindowShift();
|
||||
int64_t frame_length = opts.WindowSize();
|
||||
if (opts.snip_edges) {
|
||||
// with --snip-edges=true (the default), we use a HTK-like approach to
|
||||
// determining the number of frames-- all frames have to fit completely into
|
||||
// the waveform, and the first frame begins at sample zero.
|
||||
if (num_samples < frame_length)
|
||||
return 0;
|
||||
else
|
||||
return (1 + ((num_samples - frame_length) / frame_shift));
|
||||
// You can understand the expression above as follows: 'num_samples -
|
||||
// frame_length' is how much room we have to shift the frame within the
|
||||
// waveform; 'frame_shift' is how much we shift it each time; and the ratio
|
||||
// is how many times we can shift it (integer arithmetic rounds down).
|
||||
} else {
|
||||
// if --snip-edges=false, the number of frames is determined by rounding the
|
||||
// (file-length / frame-shift) to the nearest integer. The point of this
|
||||
// formula is to make the number of frames an obvious and predictable
|
||||
// function of the frame shift and signal length, which makes many
|
||||
// segmentation-related questions simpler.
|
||||
//
|
||||
// Because integer division in C++ rounds toward zero, we add (half the
|
||||
// frame-shift minus epsilon) before dividing, to have the effect of
|
||||
// rounding towards the closest integer.
|
||||
int32_t num_frames = (num_samples + (frame_shift / 2)) / frame_shift;
|
||||
|
||||
if (flush) return num_frames;
|
||||
|
||||
// note: 'end' always means the last plus one, i.e. one past the last.
|
||||
int64_t end_sample_of_last_frame =
|
||||
FirstSampleOfFrame(num_frames - 1, opts) + frame_length;
|
||||
|
||||
// the following code is optimized more for clarity than efficiency.
|
||||
// If flush == false, we can't output frames that extend past the end
|
||||
// of the signal.
|
||||
while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
|
||||
num_frames--;
|
||||
end_sample_of_last_frame -= frame_shift;
|
||||
}
|
||||
return num_frames;
|
||||
}
|
||||
}
|
||||
|
||||
void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
|
||||
int32_t f, const FrameExtractionOptions &opts,
|
||||
const FeatureWindowFunction &window_function,
|
||||
std::vector<float> *window,
|
||||
float *log_energy_pre_window /*= nullptr*/) {
|
||||
KNF_CHECK(sample_offset >= 0 && wave.size() != 0);
|
||||
|
||||
int32_t frame_length = opts.WindowSize();
|
||||
int32_t frame_length_padded = opts.PaddedWindowSize();
|
||||
|
||||
int64_t num_samples = sample_offset + wave.size();
|
||||
int64_t start_sample = FirstSampleOfFrame(f, opts);
|
||||
int64_t end_sample = start_sample + frame_length;
|
||||
|
||||
if (opts.snip_edges) {
|
||||
KNF_CHECK(start_sample >= sample_offset && end_sample <= num_samples);
|
||||
} else {
|
||||
KNF_CHECK(sample_offset == 0 || start_sample >= sample_offset);
|
||||
}
|
||||
|
||||
if (window->size() != frame_length_padded) {
|
||||
window->resize(frame_length_padded);
|
||||
}
|
||||
|
||||
// wave_start and wave_end are start and end indexes into 'wave', for the
|
||||
// piece of wave that we're trying to extract.
|
||||
int32_t wave_start = int32_t(start_sample - sample_offset);
|
||||
int32_t wave_end = wave_start + frame_length;
|
||||
|
||||
if (wave_start >= 0 && wave_end <= wave.size()) {
|
||||
// the normal case-- no edge effects to consider.
|
||||
std::copy(wave.begin() + wave_start,
|
||||
wave.begin() + wave_start + frame_length, window->data());
|
||||
} else {
|
||||
// Deal with any end effects by reflection, if needed. This code will only
|
||||
// be reached for about two frames per utterance, so we don't concern
|
||||
// ourselves excessively with efficiency.
|
||||
int32_t wave_dim = wave.size();
|
||||
for (int32_t s = 0; s < frame_length; ++s) {
|
||||
int32_t s_in_wave = s + wave_start;
|
||||
while (s_in_wave < 0 || s_in_wave >= wave_dim) {
|
||||
// reflect around the beginning or end of the wave.
|
||||
// e.g. -1 -> 0, -2 -> 1.
|
||||
// dim -> dim - 1, dim + 1 -> dim - 2.
|
||||
// the code supports repeated reflections, although this
|
||||
// would only be needed in pathological cases.
|
||||
if (s_in_wave < 0)
|
||||
s_in_wave = -s_in_wave - 1;
|
||||
else
|
||||
s_in_wave = 2 * wave_dim - 1 - s_in_wave;
|
||||
}
|
||||
(*window)[s] = wave[s_in_wave];
|
||||
}
|
||||
}
|
||||
|
||||
ProcessWindow(opts, window_function, window->data(), log_energy_pre_window);
|
||||
}
|
||||
|
||||
static void RemoveDcOffset(float *d, int32_t n) {
|
||||
float sum = 0;
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
sum += d[i];
|
||||
}
|
||||
|
||||
float mean = sum / n;
|
||||
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
d[i] -= mean;
|
||||
}
|
||||
}
|
||||
|
||||
float InnerProduct(const float *a, const float *b, int32_t n) {
|
||||
float sum = 0;
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
sum += a[i] * b[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static void Preemphasize(float *d, int32_t n, float preemph_coeff) {
|
||||
if (preemph_coeff == 0.0) {
|
||||
return;
|
||||
}
|
||||
|
||||
KNF_CHECK(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
|
||||
|
||||
for (int32_t i = n - 1; i > 0; --i) {
|
||||
d[i] -= preemph_coeff * d[i - 1];
|
||||
}
|
||||
d[0] -= preemph_coeff * d[0];
|
||||
}
|
||||
|
||||
void ProcessWindow(const FrameExtractionOptions &opts,
|
||||
const FeatureWindowFunction &window_function, float *window,
|
||||
float *log_energy_pre_window /*= nullptr*/) {
|
||||
int32_t frame_length = opts.WindowSize();
|
||||
|
||||
// // TODO(fangjun): Remove dither
|
||||
// KNF_CHECK_EQ(opts.dither, 0);
|
||||
|
||||
// Add dither function
|
||||
// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-window.cc
|
||||
if (opts.dither!=0) {
|
||||
// kaldi::RandomState rstate;
|
||||
// rstate.seed=0;
|
||||
// for (int32 i = 0; i < frame_length; i++)
|
||||
// window[i] += RandGauss(&rstate) * opts.dither;
|
||||
}
|
||||
|
||||
if (opts.remove_dc_offset) {
|
||||
RemoveDcOffset(window, frame_length);
|
||||
}
|
||||
|
||||
if (log_energy_pre_window != NULL) {
|
||||
float energy = std::max<float>(InnerProduct(window, window, frame_length),
|
||||
std::numeric_limits<float>::epsilon());
|
||||
*log_energy_pre_window = std::log(energy);
|
||||
}
|
||||
|
||||
if (opts.preemph_coeff != 0.0) {
|
||||
Preemphasize(window, frame_length, opts.preemph_coeff);
|
||||
}
|
||||
|
||||
window_function.Apply(window);
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,178 @@
|
||||
// kaldi-native-fbank/csrc/feature-window.h
|
||||
//
|
||||
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/feature-window.h
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
|
||||
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/log.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
inline int32_t RoundUpToNearestPowerOfTwo(int32_t n) {
|
||||
// copied from kaldi/src/base/kaldi-math.cc
|
||||
KNF_CHECK_GT(n, 0);
|
||||
n--;
|
||||
n |= n >> 1;
|
||||
n |= n >> 2;
|
||||
n |= n >> 4;
|
||||
n |= n >> 8;
|
||||
n |= n >> 16;
|
||||
return n + 1;
|
||||
}
|
||||
|
||||
struct FrameExtractionOptions {
|
||||
float samp_freq = 16000;
|
||||
float frame_shift_ms = 10.0f; // in milliseconds.
|
||||
float frame_length_ms = 25.0f; // in milliseconds.
|
||||
float dither = 1.0f; // Amount of dithering, 0.0 means no dither.
|
||||
float preemph_coeff = 0.97f; // Preemphasis coefficient.
|
||||
bool remove_dc_offset = true; // Subtract mean of wave before FFT.
|
||||
std::string window_type = "povey"; // e.g. Hamming window
|
||||
// May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
|
||||
// "povey" is a window I made to be similar to Hamming but to go to zero at
|
||||
// the edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) I just don't think the
|
||||
// Hamming window makes sense as a windowing function.
|
||||
bool round_to_power_of_two = true;
|
||||
float blackman_coeff = 0.42f;
|
||||
bool snip_edges = true;
|
||||
// bool allow_downsample = false;
|
||||
// bool allow_upsample = false;
|
||||
|
||||
// Used for streaming feature extraction. It indicates the number
|
||||
// of feature frames to keep in the recycling vector. -1 means to
|
||||
// keep all feature frames.
|
||||
int32_t max_feature_vectors = -1;
|
||||
|
||||
int32_t WindowShift() const {
|
||||
return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
|
||||
}
|
||||
int32_t WindowSize() const {
|
||||
return static_cast<int32_t>(samp_freq * 0.001f * frame_length_ms);
|
||||
}
|
||||
int32_t PaddedWindowSize() const {
|
||||
return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize())
|
||||
: WindowSize());
|
||||
}
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
#define KNF_PRINT(x) os << #x << ": " << x << "\n"
|
||||
KNF_PRINT(samp_freq);
|
||||
KNF_PRINT(frame_shift_ms);
|
||||
KNF_PRINT(frame_length_ms);
|
||||
KNF_PRINT(dither);
|
||||
KNF_PRINT(preemph_coeff);
|
||||
KNF_PRINT(remove_dc_offset);
|
||||
KNF_PRINT(window_type);
|
||||
KNF_PRINT(round_to_power_of_two);
|
||||
KNF_PRINT(blackman_coeff);
|
||||
KNF_PRINT(snip_edges);
|
||||
// KNF_PRINT(allow_downsample);
|
||||
// KNF_PRINT(allow_upsample);
|
||||
KNF_PRINT(max_feature_vectors);
|
||||
#undef KNF_PRINT
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts);
|
||||
|
||||
class FeatureWindowFunction {
|
||||
public:
|
||||
FeatureWindowFunction() = default;
|
||||
explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
|
||||
/**
|
||||
* @param wave Pointer to a 1-D array of shape [window_size].
|
||||
* It is modified in-place: wave[i] = wave[i] * window_[i].
|
||||
* @param
|
||||
*/
|
||||
void Apply(float *wave) const;
|
||||
|
||||
private:
|
||||
std::vector<float> window_; // of size opts.WindowSize()
|
||||
};
|
||||
|
||||
int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts);
|
||||
|
||||
/**
|
||||
This function returns the number of frames that we can extract from a wave
|
||||
file with the given number of samples in it (assumed to have the same
|
||||
sampling rate as specified in 'opts').
|
||||
|
||||
@param [in] num_samples The number of samples in the wave file.
|
||||
@param [in] opts The frame-extraction options class
|
||||
|
||||
@param [in] flush True if we are asserting that this number of samples
|
||||
is 'all there is', false if we expecting more data to possibly come in. This
|
||||
only makes a difference to the answer
|
||||
if opts.snips_edges== false. For offline feature extraction you always want
|
||||
flush == true. In an online-decoding context, once you know (or decide) that
|
||||
no more data is coming in, you'd call it with flush == true at the end to
|
||||
flush out any remaining data.
|
||||
*/
|
||||
int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
|
||||
bool flush = true);
|
||||
|
||||
/*
|
||||
ExtractWindow() extracts a windowed frame of waveform (possibly with a
|
||||
power-of-two, padded size, depending on the config), including all the
|
||||
processing done by ProcessWindow().
|
||||
|
||||
@param [in] sample_offset If 'wave' is not the entire waveform, but
|
||||
part of it to the left has been discarded, then the
|
||||
number of samples prior to 'wave' that we have
|
||||
already discarded. Set this to zero if you are
|
||||
processing the entire waveform in one piece, or
|
||||
if you get 'no matching function' compilation
|
||||
errors when updating the code.
|
||||
@param [in] wave The waveform
|
||||
@param [in] f The frame index to be extracted, with
|
||||
0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
|
||||
@param [in] opts The options class to be used
|
||||
@param [in] window_function The windowing function, as derived from the
|
||||
options class.
|
||||
@param [out] window The windowed, possibly-padded waveform to be
|
||||
extracted. Will be resized as needed.
|
||||
@param [out] log_energy_pre_window If non-NULL, the log-energy of
|
||||
the signal prior to pre-emphasis and multiplying by
|
||||
the windowing function will be written to here.
|
||||
*/
|
||||
void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
|
||||
int32_t f, const FrameExtractionOptions &opts,
|
||||
const FeatureWindowFunction &window_function,
|
||||
std::vector<float> *window,
|
||||
float *log_energy_pre_window = nullptr);
|
||||
|
||||
/**
|
||||
This function does all the windowing steps after actually
|
||||
extracting the windowed signal: depending on the
|
||||
configuration, it does dithering, dc offset removal,
|
||||
preemphasis, and multiplication by the windowing function.
|
||||
@param [in] opts The options class to be used
|
||||
@param [in] window_function The windowing function-- should have
|
||||
been initialized using 'opts'.
|
||||
@param [in,out] window A vector of size opts.WindowSize(). Note:
|
||||
it will typically be a sub-vector of a larger vector of size
|
||||
opts.PaddedWindowSize(), with the remaining samples zero,
|
||||
as the FFT code is more efficient if it operates on data with
|
||||
power-of-two size.
|
||||
@param [out] log_energy_pre_window If non-NULL, then after dithering and
|
||||
DC offset removal, this function will write to this pointer the log of
|
||||
the total energy (i.e. sum-squared) of the frame.
|
||||
*/
|
||||
void ProcessWindow(const FrameExtractionOptions &opts,
|
||||
const FeatureWindowFunction &window_function, float *window,
|
||||
float *log_energy_pre_window = nullptr);
|
||||
|
||||
// Compute the inner product of two vectors
|
||||
float InnerProduct(const float *a, const float *b, int32_t n);
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,142 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Stack trace related stuff is from kaldi.
|
||||
* Refer to
|
||||
* https://github.com/kaldi-asr/kaldi/blob/master/src/base/kaldi-error.cc
|
||||
*/
|
||||
|
||||
#include "kaldi-native-fbank/csrc/log.h"
|
||||
|
||||
#ifdef KNF_HAVE_EXECINFO_H
|
||||
#include <execinfo.h> // To get stack trace in error messages.
|
||||
#ifdef KNF_HAVE_CXXABI_H
|
||||
#include <cxxabi.h> // For name demangling.
|
||||
// Useful to decode the stack trace, but only used if we have execinfo.h
|
||||
#endif // KNF_HAVE_CXXABI_H
|
||||
#endif // KNF_HAVE_EXECINFO_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
|
||||
namespace knf {
|
||||
|
||||
std::string GetDateTimeStr() {
|
||||
std::ostringstream os;
|
||||
std::time_t t = std::time(nullptr);
|
||||
std::tm tm = *std::localtime(&t);
|
||||
os << std::put_time(&tm, "%F %T"); // yyyy-mm-dd hh:mm:ss
|
||||
return os.str();
|
||||
}
|
||||
|
||||
static bool LocateSymbolRange(const std::string &trace_name, std::size_t *begin,
|
||||
std::size_t *end) {
|
||||
// Find the first '_' with leading ' ' or '('.
|
||||
*begin = std::string::npos;
|
||||
for (std::size_t i = 1; i < trace_name.size(); ++i) {
|
||||
if (trace_name[i] != '_') {
|
||||
continue;
|
||||
}
|
||||
if (trace_name[i - 1] == ' ' || trace_name[i - 1] == '(') {
|
||||
*begin = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*begin == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
*end = trace_name.find_first_of(" +", *begin);
|
||||
return *end != std::string::npos;
|
||||
}
|
||||
|
||||
#ifdef KNF_HAVE_EXECINFO_H
|
||||
static std::string Demangle(const std::string &trace_name) {
|
||||
#ifndef KNF_HAVE_CXXABI_H
|
||||
return trace_name;
|
||||
#else // KNF_HAVE_CXXABI_H
|
||||
// Try demangle the symbol. We are trying to support the following formats
|
||||
// produced by different platforms:
|
||||
//
|
||||
// Linux:
|
||||
// ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
|
||||
//
|
||||
// Mac:
|
||||
// 0 server 0x000000010f67614d _ZNK5kaldi13MessageLogger10LogMessageEv + 813
|
||||
//
|
||||
// We want to extract the name e.g., '_ZN5kaldi13UnitTestErrorEv' and
|
||||
// demangle it info a readable name like kaldi::UnitTextError.
|
||||
std::size_t begin, end;
|
||||
if (!LocateSymbolRange(trace_name, &begin, &end)) {
|
||||
return trace_name;
|
||||
}
|
||||
std::string symbol = trace_name.substr(begin, end - begin);
|
||||
int status;
|
||||
char *demangled_name = abi::__cxa_demangle(symbol.c_str(), 0, 0, &status);
|
||||
if (status == 0 && demangled_name != nullptr) {
|
||||
symbol = demangled_name;
|
||||
free(demangled_name);
|
||||
}
|
||||
return trace_name.substr(0, begin) + symbol +
|
||||
trace_name.substr(end, std::string::npos);
|
||||
#endif // KNF_HAVE_CXXABI_H
|
||||
}
|
||||
#endif // KNF_HAVE_EXECINFO_H
|
||||
|
||||
std::string GetStackTrace() {
|
||||
std::string ans;
|
||||
#ifdef KNF_HAVE_EXECINFO_H
|
||||
constexpr const std::size_t kMaxTraceSize = 50;
|
||||
constexpr const std::size_t kMaxTracePrint = 50; // Must be even.
|
||||
// Buffer for the trace.
|
||||
void *trace[kMaxTraceSize];
|
||||
// Get the trace.
|
||||
std::size_t size = backtrace(trace, kMaxTraceSize);
|
||||
// Get the trace symbols.
|
||||
char **trace_symbol = backtrace_symbols(trace, size);
|
||||
if (trace_symbol == nullptr) return ans;
|
||||
|
||||
// Compose a human-readable backtrace string.
|
||||
ans += "[ Stack-Trace: ]\n";
|
||||
if (size <= kMaxTracePrint) {
|
||||
for (std::size_t i = 0; i < size; ++i) {
|
||||
ans += Demangle(trace_symbol[i]) + "\n";
|
||||
}
|
||||
} else { // Print out first+last (e.g.) 5.
|
||||
for (std::size_t i = 0; i < kMaxTracePrint / 2; ++i) {
|
||||
ans += Demangle(trace_symbol[i]) + "\n";
|
||||
}
|
||||
ans += ".\n.\n.\n";
|
||||
for (std::size_t i = size - kMaxTracePrint / 2; i < size; ++i) {
|
||||
ans += Demangle(trace_symbol[i]) + "\n";
|
||||
}
|
||||
if (size == kMaxTraceSize)
|
||||
ans += ".\n.\n.\n"; // Stack was too long, probably a bug.
|
||||
}
|
||||
|
||||
// We must free the array of pointers allocated by backtrace_symbols(),
|
||||
// but not the strings themselves.
|
||||
free(trace_symbol);
|
||||
#endif // KNF_HAVE_EXECINFO_H
|
||||
return ans;
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,383 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// The content in this file is copied/modified from
|
||||
// https://github.com/k2-fsa/k2/blob/master/k2/csrc/log.h
|
||||
#ifndef KALDI_NATIVE_FBANK_CSRC_LOG_H_
|
||||
#define KALDI_NATIVE_FBANK_CSRC_LOG_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <mutex> // NOLINT
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace knf {
|
||||
|
||||
#if KNF_ENABLE_CHECK
|
||||
|
||||
#if defined(NDEBUG)
|
||||
constexpr bool kDisableDebug = true;
|
||||
#else
|
||||
constexpr bool kDisableDebug = false;
|
||||
#endif
|
||||
|
||||
enum class LogLevel {
|
||||
kTrace = 0,
|
||||
kDebug = 1,
|
||||
kInfo = 2,
|
||||
kWarning = 3,
|
||||
kError = 4,
|
||||
kFatal = 5, // print message and abort the program
|
||||
};
|
||||
|
||||
// They are used in KNF_LOG(xxx), so their names
|
||||
// do not follow the google c++ code style
|
||||
//
|
||||
// You can use them in the following way:
|
||||
//
|
||||
// KNF_LOG(TRACE) << "some message";
|
||||
// KNF_LOG(DEBUG) << "some message";
|
||||
#ifndef _MSC_VER
|
||||
constexpr LogLevel TRACE = LogLevel::kTrace;
|
||||
constexpr LogLevel DEBUG = LogLevel::kDebug;
|
||||
constexpr LogLevel INFO = LogLevel::kInfo;
|
||||
constexpr LogLevel WARNING = LogLevel::kWarning;
|
||||
constexpr LogLevel ERROR = LogLevel::kError;
|
||||
constexpr LogLevel FATAL = LogLevel::kFatal;
|
||||
#else
|
||||
#define TRACE LogLevel::kTrace
|
||||
#define DEBUG LogLevel::kDebug
|
||||
#define INFO LogLevel::kInfo
|
||||
#define WARNING LogLevel::kWarning
|
||||
#define ERROR LogLevel::kError
|
||||
#define FATAL LogLevel::kFatal
|
||||
#endif
|
||||
|
||||
std::string GetStackTrace();
|
||||
|
||||
/* Return the current log level.
|
||||
|
||||
|
||||
If the current log level is TRACE, then all logged messages are printed out.
|
||||
|
||||
If the current log level is DEBUG, log messages with "TRACE" level are not
|
||||
shown and all other levels are printed out.
|
||||
|
||||
Similarly, if the current log level is INFO, log message with "TRACE" and
|
||||
"DEBUG" are not shown and all other levels are printed out.
|
||||
|
||||
If it is FATAL, then only FATAL messages are shown.
|
||||
*/
|
||||
inline LogLevel GetCurrentLogLevel() {
|
||||
static LogLevel log_level = INFO;
|
||||
static std::once_flag init_flag;
|
||||
std::call_once(init_flag, []() {
|
||||
const char *env_log_level = std::getenv("KNF_LOG_LEVEL");
|
||||
if (env_log_level == nullptr) return;
|
||||
|
||||
std::string s = env_log_level;
|
||||
if (s == "TRACE")
|
||||
log_level = TRACE;
|
||||
else if (s == "DEBUG")
|
||||
log_level = DEBUG;
|
||||
else if (s == "INFO")
|
||||
log_level = INFO;
|
||||
else if (s == "WARNING")
|
||||
log_level = WARNING;
|
||||
else if (s == "ERROR")
|
||||
log_level = ERROR;
|
||||
else if (s == "FATAL")
|
||||
log_level = FATAL;
|
||||
else
|
||||
fprintf(stderr,
|
||||
"Unknown KNF_LOG_LEVEL: %s"
|
||||
"\nSupported values are: "
|
||||
"TRACE, DEBUG, INFO, WARNING, ERROR, FATAL",
|
||||
s.c_str());
|
||||
});
|
||||
return log_level;
|
||||
}
|
||||
|
||||
inline bool EnableAbort() {
|
||||
static std::once_flag init_flag;
|
||||
static bool enable_abort = false;
|
||||
std::call_once(init_flag, []() {
|
||||
enable_abort = (std::getenv("KNF_ABORT") != nullptr);
|
||||
});
|
||||
return enable_abort;
|
||||
}
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
Logger(const char *filename, const char *func_name, uint32_t line_num,
|
||||
LogLevel level)
|
||||
: filename_(filename),
|
||||
func_name_(func_name),
|
||||
line_num_(line_num),
|
||||
level_(level) {
|
||||
cur_level_ = GetCurrentLogLevel();
|
||||
fprintf(stderr, "here\n");
|
||||
switch (level) {
|
||||
case TRACE:
|
||||
if (cur_level_ <= TRACE) fprintf(stderr, "[T] ");
|
||||
break;
|
||||
case DEBUG:
|
||||
if (cur_level_ <= DEBUG) fprintf(stderr, "[D] ");
|
||||
break;
|
||||
case INFO:
|
||||
if (cur_level_ <= INFO) fprintf(stderr, "[I] ");
|
||||
break;
|
||||
case WARNING:
|
||||
if (cur_level_ <= WARNING) fprintf(stderr, "[W] ");
|
||||
break;
|
||||
case ERROR:
|
||||
if (cur_level_ <= ERROR) fprintf(stderr, "[E] ");
|
||||
break;
|
||||
case FATAL:
|
||||
if (cur_level_ <= FATAL) fprintf(stderr, "[F] ");
|
||||
break;
|
||||
}
|
||||
|
||||
if (cur_level_ <= level_) {
|
||||
fprintf(stderr, "%s:%u:%s ", filename, line_num, func_name);
|
||||
}
|
||||
}
|
||||
|
||||
~Logger() noexcept(false) {
|
||||
static constexpr const char *kErrMsg = R"(
|
||||
Some bad things happened. Please read the above error messages and stack
|
||||
trace. If you are using Python, the following command may be helpful:
|
||||
|
||||
gdb --args python /path/to/your/code.py
|
||||
|
||||
(You can use `gdb` to debug the code. Please consider compiling
|
||||
a debug version of KNF.).
|
||||
|
||||
If you are unable to fix it, please open an issue at:
|
||||
|
||||
https://github.com/csukuangfj/kaldi-native-fbank/issues/new
|
||||
)";
|
||||
fprintf(stderr, "\n");
|
||||
if (level_ == FATAL) {
|
||||
std::string stack_trace = GetStackTrace();
|
||||
if (!stack_trace.empty()) {
|
||||
fprintf(stderr, "\n\n%s\n", stack_trace.c_str());
|
||||
}
|
||||
|
||||
fflush(nullptr);
|
||||
|
||||
#ifndef __ANDROID_API__
|
||||
if (EnableAbort()) {
|
||||
// NOTE: abort() will terminate the program immediately without
|
||||
// printing the Python stack backtrace.
|
||||
abort();
|
||||
}
|
||||
|
||||
throw std::runtime_error(kErrMsg);
|
||||
#else
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
const Logger &operator<<(bool b) const {
|
||||
if (cur_level_ <= level_) {
|
||||
fprintf(stderr, b ? "true" : "false");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Logger &operator<<(int8_t i) const {
|
||||
if (cur_level_ <= level_) fprintf(stderr, "%d", i);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Logger &operator<<(const char *s) const {
|
||||
if (cur_level_ <= level_) fprintf(stderr, "%s", s);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Logger &operator<<(int32_t i) const {
|
||||
if (cur_level_ <= level_) fprintf(stderr, "%d", i);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Logger &operator<<(uint32_t i) const {
|
||||
if (cur_level_ <= level_) fprintf(stderr, "%u", i);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Logger &operator<<(uint64_t i) const {
|
||||
if (cur_level_ <= level_)
|
||||
fprintf(stderr, "%llu", (long long unsigned int)i); // NOLINT
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Logger &operator<<(int64_t i) const {
|
||||
if (cur_level_ <= level_)
|
||||
fprintf(stderr, "%lli", (long long int)i); // NOLINT
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Logger &operator<<(float f) const {
|
||||
if (cur_level_ <= level_) fprintf(stderr, "%f", f);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Logger &operator<<(double d) const {
|
||||
if (cur_level_ <= level_) fprintf(stderr, "%f", d);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const Logger &operator<<(const T &t) const {
|
||||
// require T overloads operator<<
|
||||
std::ostringstream os;
|
||||
os << t;
|
||||
return *this << os.str().c_str();
|
||||
}
|
||||
|
||||
// specialization to fix compile error: `stringstream << nullptr` is ambiguous
|
||||
const Logger &operator<<(const std::nullptr_t &null) const {
|
||||
if (cur_level_ <= level_) *this << "(null)";
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
const char *filename_;
|
||||
const char *func_name_;
|
||||
uint32_t line_num_;
|
||||
LogLevel level_;
|
||||
LogLevel cur_level_;
|
||||
};
|
||||
#endif // KNF_ENABLE_CHECK
|
||||
|
||||
class Voidifier {
|
||||
public:
|
||||
#if KNF_ENABLE_CHECK
|
||||
void operator&(const Logger &) const {}
|
||||
#endif
|
||||
};
|
||||
#if !defined(KNF_ENABLE_CHECK)
|
||||
template <typename T>
|
||||
const Voidifier &operator<<(const Voidifier &v, T &&) {
|
||||
return v;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#define KNF_STATIC_ASSERT(x) static_assert(x, "")
|
||||
|
||||
#ifdef KNF_ENABLE_CHECK
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \
|
||||
defined(__PRETTY_FUNCTION__)
|
||||
// for clang and GCC
|
||||
#define KNF_FUNC __PRETTY_FUNCTION__
|
||||
#else
|
||||
// for other compilers
|
||||
#define KNF_FUNC __func__
|
||||
#endif
|
||||
|
||||
#define KNF_CHECK(x) \
|
||||
(x) ? (void)0 \
|
||||
: ::knf::Voidifier() & \
|
||||
::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
|
||||
<< "Check failed: " << #x << " "
|
||||
|
||||
// WARNING: x and y may be evaluated multiple times, but this happens only
|
||||
// when the check fails. Since the program aborts if it fails, we don't think
|
||||
// the extra evaluation of x and y matters.
|
||||
//
|
||||
// CAUTION: we recommend the following use case:
|
||||
//
|
||||
// auto x = Foo();
|
||||
// auto y = Bar();
|
||||
// KNF_CHECK_EQ(x, y) << "Some message";
|
||||
//
|
||||
// And please avoid
|
||||
//
|
||||
// KNF_CHECK_EQ(Foo(), Bar());
|
||||
//
|
||||
// if `Foo()` or `Bar()` causes some side effects, e.g., changing some
|
||||
// local static variables or global variables.
|
||||
#define _KNF_CHECK_OP(x, y, op) \
|
||||
((x)op(y)) ? (void)0 \
|
||||
: ::knf::Voidifier() & \
|
||||
::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
|
||||
<< "Check failed: " << #x << " " << #op << " " << #y \
|
||||
<< " (" << (x) << " vs. " << (y) << ") "
|
||||
|
||||
#define KNF_CHECK_EQ(x, y) _KNF_CHECK_OP(x, y, ==)
|
||||
#define KNF_CHECK_NE(x, y) _KNF_CHECK_OP(x, y, !=)
|
||||
#define KNF_CHECK_LT(x, y) _KNF_CHECK_OP(x, y, <)
|
||||
#define KNF_CHECK_LE(x, y) _KNF_CHECK_OP(x, y, <=)
|
||||
#define KNF_CHECK_GT(x, y) _KNF_CHECK_OP(x, y, >)
|
||||
#define KNF_CHECK_GE(x, y) _KNF_CHECK_OP(x, y, >=)
|
||||
|
||||
#define KNF_LOG(x) ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::x)
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// For debug check
|
||||
// ------------------------------------------------------------
|
||||
// If you define the macro "-D NDEBUG" while compiling kaldi-native-fbank,
|
||||
// the following macros are in fact empty and does nothing.
|
||||
|
||||
#define KNF_DCHECK(x) ::knf::kDisableDebug ? (void)0 : KNF_CHECK(x)
|
||||
|
||||
#define KNF_DCHECK_EQ(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_EQ(x, y)
|
||||
|
||||
#define KNF_DCHECK_NE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_NE(x, y)
|
||||
|
||||
#define KNF_DCHECK_LT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LT(x, y)
|
||||
|
||||
#define KNF_DCHECK_LE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LE(x, y)
|
||||
|
||||
#define KNF_DCHECK_GT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GT(x, y)
|
||||
|
||||
#define KNF_DCHECK_GE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GE(x, y)
|
||||
|
||||
#define KNF_DLOG(x) \
|
||||
::knf::kDisableDebug ? (void)0 : ::knf::Voidifier() & KNF_LOG(x)
|
||||
|
||||
#else
|
||||
|
||||
#define KNF_CHECK(x) ::knf::Voidifier()
|
||||
#define KNF_LOG(x) ::knf::Voidifier()
|
||||
|
||||
#define KNF_CHECK_EQ(x, y) ::knf::Voidifier()
|
||||
#define KNF_CHECK_NE(x, y) ::knf::Voidifier()
|
||||
#define KNF_CHECK_LT(x, y) ::knf::Voidifier()
|
||||
#define KNF_CHECK_LE(x, y) ::knf::Voidifier()
|
||||
#define KNF_CHECK_GT(x, y) ::knf::Voidifier()
|
||||
#define KNF_CHECK_GE(x, y) ::knf::Voidifier()
|
||||
|
||||
#define KNF_DCHECK(x) ::knf::Voidifier()
|
||||
#define KNF_DLOG(x) ::knf::Voidifier()
|
||||
#define KNF_DCHECK_EQ(x, y) ::knf::Voidifier()
|
||||
#define KNF_DCHECK_NE(x, y) ::knf::Voidifier()
|
||||
#define KNF_DCHECK_LT(x, y) ::knf::Voidifier()
|
||||
#define KNF_DCHECK_LE(x, y) ::knf::Voidifier()
|
||||
#define KNF_DCHECK_GT(x, y) ::knf::Voidifier()
|
||||
#define KNF_DCHECK_GE(x, y) ::knf::Voidifier()
|
||||
|
||||
#endif // KNF_CHECK_NE
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_CSRC_LOG_H_
|
||||
@ -0,0 +1,257 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/mel-computations.cc
|
||||
|
||||
#include "kaldi-native-fbank/csrc/mel-computations.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts) {
|
||||
os << opts.ToString();
|
||||
return os;
|
||||
}
|
||||
|
||||
float MelBanks::VtlnWarpFreq(
|
||||
float vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN.
|
||||
float vtln_high_cutoff,
|
||||
float low_freq, // upper+lower frequency cutoffs in mel computation
|
||||
float high_freq, float vtln_warp_factor, float freq) {
|
||||
/// This computes a VTLN warping function that is not the same as HTK's one,
|
||||
/// but has similar inputs (this function has the advantage of never producing
|
||||
/// empty bins).
|
||||
|
||||
/// This function computes a warp function F(freq), defined between low_freq
|
||||
/// and high_freq inclusive, with the following properties:
|
||||
/// F(low_freq) == low_freq
|
||||
/// F(high_freq) == high_freq
|
||||
/// The function is continuous and piecewise linear with two inflection
|
||||
/// points.
|
||||
/// The lower inflection point (measured in terms of the unwarped
|
||||
/// frequency) is at frequency l, determined as described below.
|
||||
/// The higher inflection point is at a frequency h, determined as
|
||||
/// described below.
|
||||
/// If l <= f <= h, then F(f) = f/vtln_warp_factor.
|
||||
/// If the higher inflection point (measured in terms of the unwarped
|
||||
/// frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
|
||||
/// Since (by the last point) F(h) == h/vtln_warp_factor, then
|
||||
/// max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
|
||||
/// h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
|
||||
/// = vtln_high_cutoff * min(1, vtln_warp_factor).
|
||||
/// If the lower inflection point (measured in terms of the unwarped
|
||||
/// frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
|
||||
/// This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
|
||||
/// = vtln_low_cutoff * max(1, vtln_warp_factor)
|
||||
|
||||
if (freq < low_freq || freq > high_freq)
|
||||
return freq; // in case this gets called
|
||||
// for out-of-range frequencies, just return the freq.
|
||||
|
||||
KNF_CHECK_GT(vtln_low_cutoff, low_freq);
|
||||
KNF_CHECK_LT(vtln_high_cutoff, high_freq);
|
||||
|
||||
float one = 1.0f;
|
||||
float l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
|
||||
float h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
|
||||
float scale = 1.0f / vtln_warp_factor;
|
||||
float Fl = scale * l; // F(l);
|
||||
float Fh = scale * h; // F(h);
|
||||
KNF_CHECK(l > low_freq && h < high_freq);
|
||||
// slope of left part of the 3-piece linear function
|
||||
float scale_left = (Fl - low_freq) / (l - low_freq);
|
||||
// [slope of center part is just "scale"]
|
||||
|
||||
// slope of right part of the 3-piece linear function
|
||||
float scale_right = (high_freq - Fh) / (high_freq - h);
|
||||
|
||||
if (freq < l) {
|
||||
return low_freq + scale_left * (freq - low_freq);
|
||||
} else if (freq < h) {
|
||||
return scale * freq;
|
||||
} else { // freq >= h
|
||||
return high_freq + scale_right * (freq - high_freq);
|
||||
}
|
||||
}
|
||||
|
||||
float MelBanks::VtlnWarpMelFreq(
|
||||
float vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN.
|
||||
float vtln_high_cutoff,
|
||||
float low_freq, // upper+lower frequency cutoffs in mel computation
|
||||
float high_freq, float vtln_warp_factor, float mel_freq) {
|
||||
return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, low_freq,
|
||||
high_freq, vtln_warp_factor,
|
||||
InverseMelScale(mel_freq)));
|
||||
}
|
||||
|
||||
MelBanks::MelBanks(const MelBanksOptions &opts,
|
||||
const FrameExtractionOptions &frame_opts,
|
||||
float vtln_warp_factor)
|
||||
: htk_mode_(opts.htk_mode) {
|
||||
int32_t num_bins = opts.num_bins;
|
||||
if (num_bins < 3) KNF_LOG(FATAL) << "Must have at least 3 mel bins";
|
||||
|
||||
float sample_freq = frame_opts.samp_freq;
|
||||
int32_t window_length_padded = frame_opts.PaddedWindowSize();
|
||||
KNF_CHECK_EQ(window_length_padded % 2, 0);
|
||||
|
||||
int32_t num_fft_bins = window_length_padded / 2;
|
||||
float nyquist = 0.5f * sample_freq;
|
||||
|
||||
float low_freq = opts.low_freq, high_freq;
|
||||
if (opts.high_freq > 0.0f)
|
||||
high_freq = opts.high_freq;
|
||||
else
|
||||
high_freq = nyquist + opts.high_freq;
|
||||
|
||||
if (low_freq < 0.0f || low_freq >= nyquist || high_freq <= 0.0f ||
|
||||
high_freq > nyquist || high_freq <= low_freq) {
|
||||
KNF_LOG(FATAL) << "Bad values in options: low-freq " << low_freq
|
||||
<< " and high-freq " << high_freq << " vs. nyquist "
|
||||
<< nyquist;
|
||||
}
|
||||
|
||||
float fft_bin_width = sample_freq / window_length_padded;
|
||||
// fft-bin width [think of it as Nyquist-freq / half-window-length]
|
||||
|
||||
float mel_low_freq = MelScale(low_freq);
|
||||
float mel_high_freq = MelScale(high_freq);
|
||||
|
||||
debug_ = opts.debug_mel;
|
||||
|
||||
// divide by num_bins+1 in next line because of end-effects where the bins
|
||||
// spread out to the sides.
|
||||
float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
|
||||
|
||||
float vtln_low = opts.vtln_low, vtln_high = opts.vtln_high;
|
||||
if (vtln_high < 0.0f) {
|
||||
vtln_high += nyquist;
|
||||
}
|
||||
|
||||
if (vtln_warp_factor != 1.0f &&
|
||||
(vtln_low < 0.0f || vtln_low <= low_freq || vtln_low >= high_freq ||
|
||||
vtln_high <= 0.0f || vtln_high >= high_freq || vtln_high <= vtln_low)) {
|
||||
KNF_LOG(FATAL) << "Bad values in options: vtln-low " << vtln_low
|
||||
<< " and vtln-high " << vtln_high << ", versus "
|
||||
<< "low-freq " << low_freq << " and high-freq " << high_freq;
|
||||
}
|
||||
|
||||
bins_.resize(num_bins);
|
||||
center_freqs_.resize(num_bins);
|
||||
|
||||
for (int32_t bin = 0; bin < num_bins; ++bin) {
|
||||
float left_mel = mel_low_freq + bin * mel_freq_delta,
|
||||
center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
|
||||
right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
|
||||
|
||||
if (vtln_warp_factor != 1.0f) {
|
||||
left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
|
||||
vtln_warp_factor, left_mel);
|
||||
center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
|
||||
vtln_warp_factor, center_mel);
|
||||
right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
|
||||
vtln_warp_factor, right_mel);
|
||||
}
|
||||
center_freqs_[bin] = InverseMelScale(center_mel);
|
||||
|
||||
// this_bin will be a vector of coefficients that is only
|
||||
// nonzero where this mel bin is active.
|
||||
std::vector<float> this_bin(num_fft_bins);
|
||||
|
||||
int32_t first_index = -1, last_index = -1;
|
||||
for (int32_t i = 0; i < num_fft_bins; ++i) {
|
||||
float freq = (fft_bin_width * i); // Center frequency of this fft
|
||||
// bin.
|
||||
float mel = MelScale(freq);
|
||||
if (mel > left_mel && mel < right_mel) {
|
||||
float weight;
|
||||
if (mel <= center_mel)
|
||||
weight = (mel - left_mel) / (center_mel - left_mel);
|
||||
else
|
||||
weight = (right_mel - mel) / (right_mel - center_mel);
|
||||
this_bin[i] = weight;
|
||||
if (first_index == -1) first_index = i;
|
||||
last_index = i;
|
||||
}
|
||||
}
|
||||
KNF_CHECK(first_index != -1 && last_index >= first_index &&
|
||||
"You may have set num_mel_bins too large.");
|
||||
|
||||
bins_[bin].first = first_index;
|
||||
int32_t size = last_index + 1 - first_index;
|
||||
bins_[bin].second.insert(bins_[bin].second.end(),
|
||||
this_bin.begin() + first_index,
|
||||
this_bin.begin() + first_index + size);
|
||||
|
||||
// Replicate a bug in HTK, for testing purposes.
|
||||
if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f) {
|
||||
bins_[bin].second[0] = 0.0;
|
||||
}
|
||||
} // for (int32_t bin = 0; bin < num_bins; ++bin) {
|
||||
|
||||
if (debug_) {
|
||||
std::ostringstream os;
|
||||
for (size_t i = 0; i < bins_.size(); i++) {
|
||||
os << "bin " << i << ", offset = " << bins_[i].first << ", vec = ";
|
||||
for (auto k : bins_[i].second) os << k << ", ";
|
||||
os << "\n";
|
||||
}
|
||||
KNF_LOG(INFO) << os.str();
|
||||
}
|
||||
}
|
||||
|
||||
// "power_spectrum" contains fft energies.
|
||||
void MelBanks::Compute(const float *power_spectrum,
|
||||
float *mel_energies_out) const {
|
||||
int32_t num_bins = bins_.size();
|
||||
|
||||
for (int32_t i = 0; i < num_bins; i++) {
|
||||
int32_t offset = bins_[i].first;
|
||||
const auto &v = bins_[i].second;
|
||||
float energy = 0;
|
||||
for (int32_t k = 0; k != v.size(); ++k) {
|
||||
energy += v[k] * power_spectrum[k + offset];
|
||||
}
|
||||
|
||||
// HTK-like flooring- for testing purposes (we prefer dither)
|
||||
if (htk_mode_ && energy < 1.0) {
|
||||
energy = 1.0;
|
||||
}
|
||||
|
||||
mel_energies_out[i] = energy;
|
||||
|
||||
// The following assert was added due to a problem with OpenBlas that
|
||||
// we had at one point (it was a bug in that library). Just to detect
|
||||
// it early.
|
||||
KNF_CHECK_EQ(energy, energy); // check that energy is not nan
|
||||
}
|
||||
|
||||
if (debug_) {
|
||||
fprintf(stderr, "MEL BANKS:\n");
|
||||
for (int32_t i = 0; i < num_bins; i++)
|
||||
fprintf(stderr, " %f", mel_energies_out[i]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,117 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
// This file is copied/modified from kaldi/src/feat/mel-computations.h
|
||||
#ifndef KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
|
||||
#define KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
|
||||
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
struct MelBanksOptions {
|
||||
int32_t num_bins = 25; // e.g. 25; number of triangular bins
|
||||
float low_freq = 20; // e.g. 20; lower frequency cutoff
|
||||
|
||||
// an upper frequency cutoff; 0 -> no cutoff, negative
|
||||
// ->added to the Nyquist frequency to get the cutoff.
|
||||
float high_freq = 0;
|
||||
|
||||
float vtln_low = 100; // vtln lower cutoff of warping function.
|
||||
|
||||
// vtln upper cutoff of warping function: if negative, added
|
||||
// to the Nyquist frequency to get the cutoff.
|
||||
float vtln_high = -500;
|
||||
|
||||
bool debug_mel = false;
|
||||
// htk_mode is a "hidden" config, it does not show up on command line.
|
||||
// Enables more exact compatibility with HTK, for testing purposes. Affects
|
||||
// mel-energy flooring and reproduces a bug in HTK.
|
||||
bool htk_mode = false;
|
||||
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
os << "num_bins: " << num_bins << "\n";
|
||||
os << "low_freq: " << low_freq << "\n";
|
||||
os << "high_freq: " << high_freq << "\n";
|
||||
os << "vtln_low: " << vtln_low << "\n";
|
||||
os << "vtln_high: " << vtln_high << "\n";
|
||||
os << "debug_mel: " << debug_mel << "\n";
|
||||
os << "htk_mode: " << htk_mode << "\n";
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts);
|
||||
|
||||
class MelBanks {
|
||||
public:
|
||||
static inline float InverseMelScale(float mel_freq) {
|
||||
return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
|
||||
}
|
||||
|
||||
static inline float MelScale(float freq) {
|
||||
return 1127.0f * logf(1.0f + freq / 700.0f);
|
||||
}
|
||||
|
||||
static float VtlnWarpFreq(
|
||||
float vtln_low_cutoff,
|
||||
float vtln_high_cutoff, // discontinuities in warp func
|
||||
float low_freq,
|
||||
float high_freq, // upper+lower frequency cutoffs in
|
||||
// the mel computation
|
||||
float vtln_warp_factor, float freq);
|
||||
|
||||
static float VtlnWarpMelFreq(float vtln_low_cutoff, float vtln_high_cutoff,
|
||||
float low_freq, float high_freq,
|
||||
float vtln_warp_factor, float mel_freq);
|
||||
|
||||
// TODO(fangjun): Remove vtln_warp_factor
|
||||
MelBanks(const MelBanksOptions &opts,
|
||||
const FrameExtractionOptions &frame_opts, float vtln_warp_factor);
|
||||
|
||||
/// Compute Mel energies (note: not log energies).
|
||||
/// At input, "fft_energies" contains the FFT energies (not log).
|
||||
///
|
||||
/// @param fft_energies 1-D array of size num_fft_bins/2+1
|
||||
/// @param mel_energies_out 1-D array of size num_mel_bins
|
||||
void Compute(const float *fft_energies, float *mel_energies_out) const;
|
||||
|
||||
int32_t NumBins() const { return bins_.size(); }
|
||||
|
||||
private:
|
||||
// center frequencies of bins, numbered from 0 ... num_bins-1.
|
||||
// Needed by GetCenterFreqs().
|
||||
std::vector<float> center_freqs_;
|
||||
|
||||
// the "bins_" vector is a vector, one for each bin, of a pair:
|
||||
// (the first nonzero fft-bin), (the vector of weights).
|
||||
std::vector<std::pair<int32_t, std::vector<float>>> bins_;
|
||||
|
||||
// TODO(fangjun): Remove debug_ and htk_mode_
|
||||
bool debug_;
|
||||
bool htk_mode_;
|
||||
};
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
|
||||
@ -0,0 +1,165 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// The content in this file is copied/modified from
|
||||
// This file is copied/modified from kaldi/src/feat/online-feature.cc
|
||||
|
||||
#include "kaldi-native-fbank/csrc/online-feature.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||
#include "kaldi-native-fbank/csrc/log.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
RecyclingVector::RecyclingVector(int32_t items_to_hold)
|
||||
: items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
|
||||
first_available_index_(0) {}
|
||||
|
||||
const float *RecyclingVector::At(int32_t index) const {
|
||||
if (index < first_available_index_) {
|
||||
KNF_LOG(FATAL) << "Attempted to retrieve feature vector that was "
|
||||
"already removed by the RecyclingVector (index = "
|
||||
<< index << "; "
|
||||
<< "first_available_index = " << first_available_index_
|
||||
<< "; "
|
||||
<< "size = " << Size() << ")";
|
||||
}
|
||||
// 'at' does size checking.
|
||||
return items_.at(index - first_available_index_).data();
|
||||
}
|
||||
|
||||
void RecyclingVector::PushBack(std::vector<float> item) {
|
||||
// Note: -1 is a larger number when treated as unsigned
|
||||
if (items_.size() == static_cast<size_t>(items_to_hold_)) {
|
||||
items_.pop_front();
|
||||
++first_available_index_;
|
||||
}
|
||||
items_.push_back(std::move(item));
|
||||
}
|
||||
|
||||
int32_t RecyclingVector::Size() const {
|
||||
return first_available_index_ + static_cast<int32_t>(items_.size());
|
||||
}
|
||||
|
||||
template <class C>
|
||||
OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
|
||||
const typename C::Options &opts)
|
||||
: computer_(opts),
|
||||
window_function_(computer_.GetFrameOptions()),
|
||||
features_(opts.frame_opts.max_feature_vectors),
|
||||
input_finished_(false),
|
||||
waveform_offset_(0) {
|
||||
// RE the following assert: search for ONLINE_IVECTOR_LIMIT in
|
||||
// online-ivector-feature.cc.
|
||||
// Casting to uint32, an unsigned type, means that -1 would be treated
|
||||
// as `very large`.
|
||||
KNF_CHECK(static_cast<uint32_t>(opts.frame_opts.max_feature_vectors) > 200);
|
||||
}
|
||||
|
||||
template <class C>
|
||||
void OnlineGenericBaseFeature<C>::AcceptWaveform(float sampling_rate,
|
||||
const float *waveform,
|
||||
int32_t n) {
|
||||
if (n == 0) {
|
||||
return; // Nothing to do.
|
||||
}
|
||||
|
||||
if (input_finished_) {
|
||||
KNF_LOG(FATAL) << "AcceptWaveform called after InputFinished() was called.";
|
||||
}
|
||||
|
||||
KNF_CHECK_EQ(sampling_rate, computer_.GetFrameOptions().samp_freq);
|
||||
|
||||
waveform_remainder_.insert(waveform_remainder_.end(), waveform, waveform + n);
|
||||
|
||||
ComputeFeatures();
|
||||
}
|
||||
|
||||
template <class C>
|
||||
void OnlineGenericBaseFeature<C>::InputFinished() {
|
||||
input_finished_ = true;
|
||||
ComputeFeatures();
|
||||
}
|
||||
|
||||
template <class C>
|
||||
void OnlineGenericBaseFeature<C>::ComputeFeatures() {
|
||||
const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
|
||||
|
||||
int64_t num_samples_total = waveform_offset_ + waveform_remainder_.size();
|
||||
|
||||
int32_t num_frames_old = features_.Size();
|
||||
|
||||
int32_t num_frames_new =
|
||||
NumFrames(num_samples_total, frame_opts, input_finished_);
|
||||
|
||||
KNF_CHECK_GE(num_frames_new, num_frames_old);
|
||||
|
||||
// note: this online feature-extraction code does not support VTLN.
|
||||
float vtln_warp = 1.0;
|
||||
|
||||
std::vector<float> window;
|
||||
bool need_raw_log_energy = computer_.NeedRawLogEnergy();
|
||||
|
||||
for (int32_t frame = num_frames_old; frame < num_frames_new; ++frame) {
|
||||
std::fill(window.begin(), window.end(), 0);
|
||||
float raw_log_energy = 0.0;
|
||||
ExtractWindow(waveform_offset_, waveform_remainder_, frame, frame_opts,
|
||||
window_function_, &window,
|
||||
need_raw_log_energy ? &raw_log_energy : nullptr);
|
||||
|
||||
std::vector<float> this_feature(computer_.Dim());
|
||||
|
||||
computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature.data());
|
||||
features_.PushBack(std::move(this_feature));
|
||||
}
|
||||
|
||||
// OK, we will now discard any portion of the signal that will not be
|
||||
// necessary to compute frames in the future.
|
||||
int64_t first_sample_of_next_frame =
|
||||
FirstSampleOfFrame(num_frames_new, frame_opts);
|
||||
|
||||
int32_t samples_to_discard = first_sample_of_next_frame - waveform_offset_;
|
||||
|
||||
if (samples_to_discard > 0) {
|
||||
// discard the leftmost part of the waveform that we no longer need.
|
||||
int32_t new_num_samples =
|
||||
static_cast<int32_t>(waveform_remainder_.size()) - samples_to_discard;
|
||||
|
||||
if (new_num_samples <= 0) {
|
||||
// odd, but we'll try to handle it.
|
||||
waveform_offset_ += waveform_remainder_.size();
|
||||
waveform_remainder_.resize(0);
|
||||
} else {
|
||||
std::vector<float> new_remainder(new_num_samples);
|
||||
|
||||
std::copy(waveform_remainder_.begin() + samples_to_discard,
|
||||
waveform_remainder_.end(), new_remainder.begin());
|
||||
waveform_offset_ += samples_to_discard;
|
||||
|
||||
waveform_remainder_.swap(new_remainder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template class OnlineGenericBaseFeature<FbankComputer>;
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,142 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// The content in this file is copied/modified from
|
||||
// This file is copied/modified from kaldi/src/feat/online-feature.h
|
||||
#ifndef KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
|
||||
#define KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-fbank.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
/// This class serves as a storage for feature vectors with an option to limit
|
||||
/// the memory usage by removing old elements. The deleted frames indices are
|
||||
/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
|
||||
/// provides the indices as if no deletion was being performed.
|
||||
/// This is useful when processing very long recordings which would otherwise
|
||||
/// cause the memory to eventually blow up when the features are not being
|
||||
/// removed.
|
||||
class RecyclingVector {
|
||||
public:
|
||||
/// By default it does not remove any elements.
|
||||
explicit RecyclingVector(int32_t items_to_hold = -1);
|
||||
|
||||
~RecyclingVector() = default;
|
||||
RecyclingVector(const RecyclingVector &) = delete;
|
||||
RecyclingVector &operator=(const RecyclingVector &) = delete;
|
||||
|
||||
// The pointer is owned by RecyclingVector
|
||||
// Users should not free it
|
||||
const float *At(int32_t index) const;
|
||||
|
||||
void PushBack(std::vector<float> item);
|
||||
|
||||
/// This method returns the size as if no "recycling" had happened,
|
||||
/// i.e. equivalent to the number of times the PushBack method has been
|
||||
/// called.
|
||||
int32_t Size() const;
|
||||
|
||||
private:
|
||||
std::deque<std::vector<float>> items_;
|
||||
int32_t items_to_hold_;
|
||||
int32_t first_available_index_;
|
||||
};
|
||||
|
||||
/// This is a templated class for online feature extraction;
|
||||
/// it's templated on a class like MfccComputer or PlpComputer
|
||||
/// that does the basic feature extraction.
|
||||
template <class C>
|
||||
class OnlineGenericBaseFeature {
|
||||
public:
|
||||
// Constructor from options class
|
||||
explicit OnlineGenericBaseFeature(const typename C::Options &opts);
|
||||
|
||||
int32_t Dim() const { return computer_.Dim(); }
|
||||
|
||||
float FrameShiftInSeconds() const {
|
||||
return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
|
||||
}
|
||||
|
||||
int32_t NumFramesReady() const { return features_.Size(); }
|
||||
|
||||
// Note: IsLastFrame() will only ever return true if you have called
|
||||
// InputFinished() (and this frame is the last frame).
|
||||
bool IsLastFrame(int32_t frame) const {
|
||||
return input_finished_ && frame == NumFramesReady() - 1;
|
||||
}
|
||||
|
||||
const float *GetFrame(int32_t frame) const { return features_.At(frame); }
|
||||
|
||||
// This would be called from the application, when you get
|
||||
// more wave data. Note: the sampling_rate is only provided so
|
||||
// the code can assert that it matches the sampling rate
|
||||
// expected in the options.
|
||||
//
|
||||
// @param sampling_rate The sampling_rate of the input waveform
|
||||
// @param waveform Pointer to a 1-D array of size n
|
||||
// @param n Number of entries in waveform
|
||||
void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
|
||||
|
||||
// InputFinished() tells the class you won't be providing any
|
||||
// more waveform. This will help flush out the last frame or two
|
||||
// of features, in the case where snip-edges == false; it also
|
||||
// affects the return value of IsLastFrame().
|
||||
void InputFinished();
|
||||
|
||||
private:
|
||||
// This function computes any additional feature frames that it is possible to
|
||||
// compute from 'waveform_remainder_', which at this point may contain more
|
||||
// than just a remainder-sized quantity (because AcceptWaveform() appends to
|
||||
// waveform_remainder_ before calling this function). It adds these feature
|
||||
// frames to features_, and shifts off any now-unneeded samples of input from
|
||||
// waveform_remainder_ while incrementing waveform_offset_ by the same amount.
|
||||
void ComputeFeatures();
|
||||
|
||||
C computer_; // class that does the MFCC or PLP or filterbank computation
|
||||
|
||||
FeatureWindowFunction window_function_;
|
||||
|
||||
// features_ is the Mfcc or Plp or Fbank features that we have already
|
||||
// computed.
|
||||
|
||||
RecyclingVector features_;
|
||||
|
||||
// True if the user has called "InputFinished()"
|
||||
bool input_finished_;
|
||||
|
||||
// waveform_offset_ is the number of samples of waveform that we have
|
||||
// already discarded, i.e. that were prior to 'waveform_remainder_'.
|
||||
int64_t waveform_offset_;
|
||||
|
||||
// waveform_remainder_ is a short piece of waveform that we may need to keep
|
||||
// after extracting all the whole frames we can (whatever length of feature
|
||||
// will be required for the next phase of computation).
|
||||
// It is a 1-D tensor
|
||||
std::vector<float> waveform_remainder_;
|
||||
};
|
||||
|
||||
using OnlineFbank = OnlineGenericBaseFeature<FbankComputer>;
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
|
||||
@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kaldi-native-fbank/csrc/rfft.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/log.h"
|
||||
|
||||
// see fftsg.c
|
||||
#ifdef __cplusplus
|
||||
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
|
||||
#else
|
||||
void rdft(int n, int isgn, double *a, int *ip, double *w);
|
||||
#endif
|
||||
|
||||
namespace knf {
|
||||
class Rfft::RfftImpl {
|
||||
public:
|
||||
explicit RfftImpl(int32_t n) : n_(n), ip_(2 + std::sqrt(n / 2)), w_(n / 2) {
|
||||
KNF_CHECK_EQ(n & (n - 1), 0);
|
||||
}
|
||||
|
||||
void Compute(float *in_out) {
|
||||
std::vector<double> d(in_out, in_out + n_);
|
||||
|
||||
Compute(d.data());
|
||||
|
||||
std::copy(d.begin(), d.end(), in_out);
|
||||
}
|
||||
|
||||
void Compute(double *in_out) {
|
||||
// 1 means forward fft
|
||||
rdft(n_, 1, in_out, ip_.data(), w_.data());
|
||||
}
|
||||
|
||||
private:
|
||||
int32_t n_;
|
||||
std::vector<int32_t> ip_;
|
||||
std::vector<double> w_;
|
||||
};
|
||||
|
||||
Rfft::Rfft(int32_t n) : impl_(std::make_unique<RfftImpl>(n)) {}
|
||||
|
||||
Rfft::~Rfft() = default;
|
||||
|
||||
void Rfft::Compute(float *in_out) { impl_->Compute(in_out); }
|
||||
void Rfft::Compute(double *in_out) { impl_->Compute(in_out); }
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_CSRC_RFFT_H_
|
||||
#define KALDI_NATIVE_FBANK_CSRC_RFFT_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace knf {
|
||||
|
||||
// n-point Real discrete Fourier transform
|
||||
// where n is a power of 2. n >= 2
|
||||
//
|
||||
// R[k] = sum_j=0^n-1 in[j]*cos(2*pi*j*k/n), 0<=k<=n/2
|
||||
// I[k] = sum_j=0^n-1 in[j]*sin(2*pi*j*k/n), 0<k<n/2
|
||||
class Rfft {
|
||||
public:
|
||||
// @param n Number of fft bins. it should be a power of 2.
|
||||
explicit Rfft(int32_t n);
|
||||
~Rfft();
|
||||
|
||||
/** @param in_out A 1-D array of size n.
|
||||
* On return:
|
||||
* in_out[0] = R[0]
|
||||
* in_out[1] = R[n/2]
|
||||
* for 1 < k < n/2,
|
||||
* in_out[2*k] = R[k]
|
||||
* in_out[2*k+1] = I[k]
|
||||
*
|
||||
*/
|
||||
void Compute(float *in_out);
|
||||
void Compute(double *in_out);
|
||||
|
||||
private:
|
||||
class RfftImpl;
|
||||
std::unique_ptr<RfftImpl> impl_;
|
||||
};
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_CSRC_RFFT_H_
|
||||
@ -0,0 +1,73 @@
|
||||
/**
|
||||
* Copyright 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "kaldi-native-fbank/csrc/log.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
#if KNF_ENABLE_CHECK
|
||||
|
||||
TEST(Log, TestLog) {
|
||||
KNF_LOG(TRACE) << "this is a trace message";
|
||||
KNF_LOG(DEBUG) << "this is a debug message";
|
||||
KNF_LOG(INFO) << "this is an info message";
|
||||
KNF_LOG(WARNING) << "this is a warning message";
|
||||
KNF_LOG(ERROR) << "this is an error message";
|
||||
|
||||
ASSERT_THROW(KNF_LOG(FATAL) << "This will crash the program",
|
||||
std::runtime_error);
|
||||
|
||||
// For debug build
|
||||
|
||||
KNF_DLOG(TRACE) << "this is a trace message for debug build";
|
||||
KNF_DLOG(DEBUG) << "this is a trace message for debug build";
|
||||
KNF_DLOG(INFO) << "this is a trace message for debug build";
|
||||
KNF_DLOG(ERROR) << "this is an error message for debug build";
|
||||
KNF_DLOG(WARNING) << "this is a trace message for debug build";
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
ASSERT_THROW(KNF_DLOG(FATAL) << "this is a trace message for debug build",
|
||||
std::runtime_error);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(Log, TestCheck) {
|
||||
KNF_CHECK_EQ(1, 1) << "ok";
|
||||
KNF_CHECK_LE(1, 3) << "ok";
|
||||
KNF_CHECK_LT(1, 2) << "ok";
|
||||
KNF_CHECK_GT(2, 1) << "ok";
|
||||
KNF_CHECK_GE(2, 1) << "ok";
|
||||
|
||||
ASSERT_THROW(KNF_CHECK_EQ(2, 1) << "bad things happened", std::runtime_error);
|
||||
|
||||
// for debug build
|
||||
KNF_DCHECK_EQ(1, 1) << "ok";
|
||||
KNF_DCHECK_LE(1, 3) << "ok";
|
||||
KNF_DCHECK_LT(1, 2) << "ok";
|
||||
KNF_DCHECK_GT(2, 1) << "ok";
|
||||
KNF_DCHECK_GE(2, 1) << "ok";
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
ASSERT_THROW(KNF_CHECK_EQ(2, 1) << "bad things happened", std::runtime_error);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/online-feature.h"
|
||||
|
||||
int main() {
|
||||
knf::FbankOptions opts;
|
||||
opts.frame_opts.dither = 0;
|
||||
opts.mel_opts.num_bins = 10;
|
||||
|
||||
knf::OnlineFbank fbank(opts);
|
||||
for (int32_t i = 0; i < 1600; ++i) {
|
||||
float s = (i * i - i / 2) / 32767.;
|
||||
fbank.AcceptWaveform(16000, &s, 1);
|
||||
}
|
||||
|
||||
std::ostringstream os;
|
||||
|
||||
int32_t n = fbank.NumFramesReady();
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
const float *frame = fbank.GetFrame(i);
|
||||
for (int32_t k = 0; k != opts.mel_opts.num_bins; ++k) {
|
||||
os << frame[k] << ", ";
|
||||
}
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
std::cout << os.str() << "\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "kaldi-native-fbank/csrc/online-feature.h"
|
||||
namespace knf {
|
||||
|
||||
TEST(RecyclingVector, TestUnlimited) {
|
||||
RecyclingVector v(-1);
|
||||
constexpr int32_t N = 100;
|
||||
for (int32_t i = 0; i != N; ++i) {
|
||||
std::unique_ptr<float[]> p(new float[3]{i, i + 1, i + 2});
|
||||
v.PushBack(std::move(p));
|
||||
}
|
||||
ASSERT_EQ(v.Size(), N);
|
||||
|
||||
for (int32_t i = 0; i != N; ++i) {
|
||||
const float *t = v.At(i);
|
||||
for (int32_t k = 0; k != 3; ++k) {
|
||||
EXPECT_EQ(t[k], (i + k));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RecyclingVector, Testlimited) {
|
||||
constexpr int32_t K = 3;
|
||||
constexpr int32_t N = 10;
|
||||
RecyclingVector v(K);
|
||||
for (int32_t i = 0; i != N; ++i) {
|
||||
std::unique_ptr<float[]> p(new float[3]{i, i + 1, i + 2});
|
||||
v.PushBack(std::move(p));
|
||||
}
|
||||
|
||||
ASSERT_EQ(v.Size(), N);
|
||||
|
||||
for (int32_t i = N - K; i != N; ++i) {
|
||||
const float *t = v.At(i);
|
||||
|
||||
for (int32_t k = 0; k != 3; ++k) {
|
||||
EXPECT_EQ(t[k], (i + k));
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace knf
|
||||
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Copyright 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "kaldi-native-fbank/csrc/rfft.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
#if 0
|
||||
>>> import torch
|
||||
>>> a = torch.tensor([1., -1, 3, 8, 20, 6, 0, 2])
|
||||
>>> torch.fft.rfft(a)
|
||||
tensor([ 39.0000+0.0000j, -28.1924-2.2929j, 18.0000+5.0000j, -9.8076+3.7071j,
|
||||
9.0000+0.0000j])
|
||||
#endif
|
||||
|
||||
TEST(Rfft, TestRfft) {
|
||||
knf::Rfft fft(8);
|
||||
for (int32_t i = 0; i != 10; ++i) {
|
||||
std::vector<float> d = {1, -1, 3, 8, 20, 6, 0, 2};
|
||||
fft.Compute(d.data());
|
||||
|
||||
EXPECT_EQ(d[0], 39);
|
||||
EXPECT_EQ(d[1], 9);
|
||||
|
||||
EXPECT_NEAR(d[2], -28.1924, 1e-3);
|
||||
EXPECT_NEAR(-d[3], -2.2929, 1e-3);
|
||||
|
||||
EXPECT_NEAR(d[4], 18, 1e-3);
|
||||
EXPECT_NEAR(-d[5], 5, 1e-3);
|
||||
|
||||
EXPECT_NEAR(d[6], -9.8076, 1e-3);
|
||||
EXPECT_NEAR(-d[7], 3.7071, 1e-3);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,2 @@
|
||||
add_subdirectory(csrc)
|
||||
add_subdirectory(tests)
|
||||
@ -0,0 +1,28 @@
|
||||
pybind11_add_module(_kaldi_native_fbank
|
||||
feature-fbank.cc
|
||||
feature-window.cc
|
||||
kaldi-native-fbank.cc
|
||||
mel-computations.cc
|
||||
online-feature.cc
|
||||
utils.cc
|
||||
)
|
||||
|
||||
if(APPLE)
|
||||
execute_process(
|
||||
COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
|
||||
)
|
||||
message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
|
||||
target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${kaldi_native_fbank_rpath_origin}/kaldi_native_fbank/lib")
|
||||
endif()
|
||||
|
||||
target_link_libraries(_kaldi_native_fbank PRIVATE kaldi-native-fbank-core)
|
||||
|
||||
install(TARGETS _kaldi_native_fbank
|
||||
DESTINATION ../
|
||||
)
|
||||
@ -0,0 +1,57 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-fbank.h"
|
||||
#include "kaldi-native-fbank/python/csrc/utils.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
static void PybindFbankOptions(py::module &m) { // NOLINT
|
||||
using PyClass = FbankOptions;
|
||||
py::class_<PyClass>(m, "FbankOptions")
|
||||
.def(py::init<>())
|
||||
.def_readwrite("frame_opts", &PyClass::frame_opts)
|
||||
.def_readwrite("mel_opts", &PyClass::mel_opts)
|
||||
.def_readwrite("use_energy", &PyClass::use_energy)
|
||||
.def_readwrite("energy_floor", &PyClass::energy_floor)
|
||||
.def_readwrite("raw_energy", &PyClass::raw_energy)
|
||||
.def_readwrite("htk_compat", &PyClass::htk_compat)
|
||||
.def_readwrite("use_log_fbank", &PyClass::use_log_fbank)
|
||||
.def_readwrite("use_power", &PyClass::use_power)
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static(
|
||||
"from_dict",
|
||||
[](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }));
|
||||
}
|
||||
|
||||
void PybindFeatureFbank(py::module &m) { // NOLINT
|
||||
PybindFbankOptions(m);
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
|
||||
#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
void PybindFeatureFbank(py::module &m); // NOLINT
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
|
||||
@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/feature-window.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||
#include "kaldi-native-fbank/python/csrc/utils.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
static void PybindFrameExtractionOptions(py::module &m) { // NOLINT
|
||||
using PyClass = FrameExtractionOptions;
|
||||
py::class_<PyClass>(m, "FrameExtractionOptions")
|
||||
.def(py::init<>())
|
||||
.def_readwrite("samp_freq", &PyClass::samp_freq)
|
||||
.def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
|
||||
.def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
|
||||
.def_readwrite("dither", &PyClass::dither)
|
||||
.def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
|
||||
.def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset)
|
||||
.def_readwrite("window_type", &PyClass::window_type)
|
||||
.def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two)
|
||||
.def_readwrite("blackman_coeff", &PyClass::blackman_coeff)
|
||||
.def_readwrite("snip_edges", &PyClass::snip_edges)
|
||||
.def_readwrite("max_feature_vectors", &PyClass::max_feature_vectors)
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static("from_dict",
|
||||
[](py::dict dict) -> PyClass {
|
||||
return FrameExtractionOptionsFromDict(dict);
|
||||
})
|
||||
#if 0
|
||||
.def_readwrite("allow_downsample",
|
||||
&PyClass::allow_downsample)
|
||||
.def_readwrite("allow_upsample", &PyClass::allow_upsample)
|
||||
#endif
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass {
|
||||
return FrameExtractionOptionsFromDict(dict);
|
||||
}));
|
||||
}
|
||||
|
||||
void PybindFeatureWindow(py::module &m) { // NOLINT
|
||||
PybindFrameExtractionOptions(m);
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
|
||||
#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
void PybindFeatureWindow(py::module &m); // NOLINT
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
|
||||
@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
|
||||
#include "kaldi-native-fbank/python/csrc/feature-window.h"
|
||||
#include "kaldi-native-fbank/python/csrc/mel-computations.h"
|
||||
#include "kaldi-native-fbank/python/csrc/online-feature.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
PYBIND11_MODULE(_kaldi_native_fbank, m) {
|
||||
m.doc() = "Python wrapper for kaldi native fbank";
|
||||
PybindFeatureWindow(m);
|
||||
PybindMelComputations(m);
|
||||
PybindFeatureFbank(m);
|
||||
|
||||
PybindOnlineFeature(m);
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
|
||||
#define KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
|
||||
|
||||
#include "pybind11/numpy.h"
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl.h"
|
||||
namespace py = pybind11;
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
|
||||
@ -0,0 +1,58 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/mel-computations.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/mel-computations.h"
|
||||
#include "kaldi-native-fbank/python/csrc/utils.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
static void PybindMelBanksOptions(py::module &m) { // NOLINT
|
||||
using PyClass = MelBanksOptions;
|
||||
py::class_<PyClass>(m, "MelBanksOptions")
|
||||
.def(py::init<>())
|
||||
.def_readwrite("num_bins", &PyClass::num_bins)
|
||||
.def_readwrite("low_freq", &PyClass::low_freq)
|
||||
.def_readwrite("high_freq", &PyClass::high_freq)
|
||||
.def_readwrite("vtln_low", &PyClass::vtln_low)
|
||||
.def_readwrite("vtln_high", &PyClass::vtln_high)
|
||||
.def_readwrite("debug_mel", &PyClass::debug_mel)
|
||||
.def_readwrite("htk_mode", &PyClass::htk_mode)
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static("from_dict",
|
||||
[](py::dict dict) -> PyClass {
|
||||
return MelBanksOptionsFromDict(dict);
|
||||
})
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass {
|
||||
return MelBanksOptionsFromDict(dict);
|
||||
}));
|
||||
}
|
||||
|
||||
void PybindMelComputations(py::module &m) { // NOLINT
|
||||
PybindMelBanksOptions(m);
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
|
||||
#define KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
void PybindMelComputations(py::module &m); // NOLINT
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
|
||||
@ -0,0 +1,68 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/online-feature.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/online-feature.h"
|
||||
namespace knf {
|
||||
|
||||
template <typename C>
|
||||
void PybindOnlineFeatureTpl(py::module &m, // NOLINT
|
||||
const std::string &class_name,
|
||||
const std::string &class_help_doc = "") {
|
||||
using PyClass = OnlineGenericBaseFeature<C>;
|
||||
using Options = typename C::Options;
|
||||
py::class_<PyClass>(m, class_name.c_str(), class_help_doc.c_str())
|
||||
.def(py::init<const Options &>(), py::arg("opts"))
|
||||
.def_property_readonly("dim", &PyClass::Dim)
|
||||
.def_property_readonly("frame_shift_in_seconds",
|
||||
&PyClass::FrameShiftInSeconds)
|
||||
.def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
|
||||
.def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
|
||||
.def(
|
||||
"get_frame",
|
||||
[](py::object obj, int32_t frame) {
|
||||
auto *self = obj.cast<PyClass *>();
|
||||
const float *f = self->GetFrame(frame);
|
||||
return py::array_t<float>({self->Dim()}, // shape
|
||||
{sizeof(float)}, // stride in bytes
|
||||
f, // ptr
|
||||
obj); // it will increase the reference
|
||||
// count of **this** vector
|
||||
},
|
||||
py::arg("frame"))
|
||||
.def(
|
||||
"accept_waveform",
|
||||
[](PyClass &self, float sampling_rate,
|
||||
const std::vector<float> &waveform) {
|
||||
self.AcceptWaveform(sampling_rate, waveform.data(),
|
||||
waveform.size());
|
||||
},
|
||||
py::arg("sampling_rate"), py::arg("waveform"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def("input_finished", &PyClass::InputFinished);
|
||||
}
|
||||
|
||||
void PybindOnlineFeature(py::module &m) { // NOLINT
|
||||
PybindOnlineFeatureTpl<FbankComputer>(m, "OnlineFbank");
|
||||
}
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
|
||||
#define KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
|
||||
|
||||
namespace knf {
|
||||
|
||||
void PybindOnlineFeature(py::module &m); // NOLINT
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
|
||||
@ -0,0 +1,136 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kaldi-native-fbank/python/csrc/utils.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||
|
||||
#define FROM_DICT(type, key) \
|
||||
if (dict.contains(#key)) { \
|
||||
opts.key = py::type(dict[#key]); \
|
||||
}
|
||||
|
||||
#define AS_DICT(key) dict[#key] = opts.key
|
||||
|
||||
namespace knf {
|
||||
|
||||
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict) {
|
||||
FrameExtractionOptions opts;
|
||||
|
||||
FROM_DICT(float_, samp_freq);
|
||||
FROM_DICT(float_, frame_shift_ms);
|
||||
FROM_DICT(float_, frame_length_ms);
|
||||
FROM_DICT(float_, dither);
|
||||
FROM_DICT(float_, preemph_coeff);
|
||||
FROM_DICT(bool_, remove_dc_offset);
|
||||
FROM_DICT(str, window_type);
|
||||
FROM_DICT(bool_, round_to_power_of_two);
|
||||
FROM_DICT(float_, blackman_coeff);
|
||||
FROM_DICT(bool_, snip_edges);
|
||||
FROM_DICT(int_, max_feature_vectors);
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
py::dict AsDict(const FrameExtractionOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
AS_DICT(samp_freq);
|
||||
AS_DICT(frame_shift_ms);
|
||||
AS_DICT(frame_length_ms);
|
||||
AS_DICT(dither);
|
||||
AS_DICT(preemph_coeff);
|
||||
AS_DICT(remove_dc_offset);
|
||||
AS_DICT(window_type);
|
||||
AS_DICT(round_to_power_of_two);
|
||||
AS_DICT(blackman_coeff);
|
||||
AS_DICT(snip_edges);
|
||||
AS_DICT(max_feature_vectors);
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
MelBanksOptions MelBanksOptionsFromDict(py::dict dict) {
|
||||
MelBanksOptions opts;
|
||||
|
||||
FROM_DICT(int_, num_bins);
|
||||
FROM_DICT(float_, low_freq);
|
||||
FROM_DICT(float_, high_freq);
|
||||
FROM_DICT(float_, vtln_low);
|
||||
FROM_DICT(float_, vtln_high);
|
||||
FROM_DICT(bool_, debug_mel);
|
||||
FROM_DICT(bool_, htk_mode);
|
||||
|
||||
return opts;
|
||||
}
|
||||
py::dict AsDict(const MelBanksOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
AS_DICT(num_bins);
|
||||
AS_DICT(low_freq);
|
||||
AS_DICT(high_freq);
|
||||
AS_DICT(vtln_low);
|
||||
AS_DICT(vtln_high);
|
||||
AS_DICT(debug_mel);
|
||||
AS_DICT(htk_mode);
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
FbankOptions FbankOptionsFromDict(py::dict dict) {
|
||||
FbankOptions opts;
|
||||
|
||||
if (dict.contains("frame_opts")) {
|
||||
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
|
||||
}
|
||||
|
||||
if (dict.contains("mel_opts")) {
|
||||
opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
|
||||
}
|
||||
|
||||
FROM_DICT(bool_, use_energy);
|
||||
FROM_DICT(float_, energy_floor);
|
||||
FROM_DICT(bool_, raw_energy);
|
||||
FROM_DICT(bool_, htk_compat);
|
||||
FROM_DICT(bool_, use_log_fbank);
|
||||
FROM_DICT(bool_, use_power);
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
py::dict AsDict(const FbankOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
dict["frame_opts"] = AsDict(opts.frame_opts);
|
||||
dict["mel_opts"] = AsDict(opts.mel_opts);
|
||||
AS_DICT(use_energy);
|
||||
AS_DICT(energy_floor);
|
||||
AS_DICT(raw_energy);
|
||||
AS_DICT(htk_compat);
|
||||
AS_DICT(use_log_fbank);
|
||||
AS_DICT(use_power);
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
#undef FROM_DICT
|
||||
#undef AS_DICT
|
||||
|
||||
} // namespace knf
|
||||
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
|
||||
#define KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-fbank.h"
|
||||
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||
#include "kaldi-native-fbank/csrc/mel-computations.h"
|
||||
#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
|
||||
|
||||
/*
|
||||
* This file contains code about `from_dict` and
|
||||
* `as_dict` for various options in kaldi-native-fbank.
|
||||
*
|
||||
* Regarding `from_dict`, users don't need to provide
|
||||
* all the fields in the options. If some fields
|
||||
* are not provided, it just uses the default one.
|
||||
*
|
||||
* If the provided dict in `from_dict` is empty,
|
||||
* all fields use their default values.
|
||||
*/
|
||||
|
||||
namespace knf {
|
||||
|
||||
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const FrameExtractionOptions &opts);
|
||||
|
||||
MelBanksOptions MelBanksOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const MelBanksOptions &opts);
|
||||
|
||||
FbankOptions FbankOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const FbankOptions &opts);
|
||||
|
||||
} // namespace knf
|
||||
|
||||
#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
|
||||
@ -0,0 +1,6 @@
|
||||
from _kaldi_native_fbank import (
|
||||
FrameExtractionOptions,
|
||||
MelBanksOptions,
|
||||
OnlineFbank,
|
||||
FbankOptions,
|
||||
)
|
||||
@ -0,0 +1,31 @@
|
||||
function(kaldi_native_fbank_add_py_test source)
|
||||
get_filename_component(name ${source} NAME_WE)
|
||||
set(name "${name}_py")
|
||||
|
||||
message(STATUS "source: ${source}")
|
||||
|
||||
add_test(NAME ${name}
|
||||
COMMAND
|
||||
"${PYTHON_EXECUTABLE}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/${source}"
|
||||
)
|
||||
|
||||
get_filename_component(kaldi_native_fbank_path ${CMAKE_CURRENT_LIST_DIR} DIRECTORY)
|
||||
|
||||
set_property(TEST ${name}
|
||||
PROPERTY ENVIRONMENT "PYTHONPATH=${kaldi_native_fbank_path}:$<TARGET_FILE_DIR:_kaldi_native_fbank>:$ENV{PYTHONPATH}"
|
||||
)
|
||||
endfunction()
|
||||
|
||||
# please sort the files in alphabetic order
|
||||
set(py_test_files
|
||||
test_frame_extraction_options.py
|
||||
test_mel_bank_options.py
|
||||
test_fbank_options.py
|
||||
)
|
||||
|
||||
if(KALDI_NATIVE_FBANK_BUILD_TESTS)
|
||||
foreach(source IN LISTS py_test_files)
|
||||
kaldi_native_fbank_add_py_test(${source})
|
||||
endforeach()
|
||||
endif()
|
||||
@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
|
||||
import pickle
|
||||
|
||||
import kaldi_native_fbank as knf
|
||||
|
||||
|
||||
def test_default():
|
||||
opts = knf.FbankOptions()
|
||||
assert opts.frame_opts.samp_freq == 16000
|
||||
assert opts.frame_opts.frame_shift_ms == 10.0
|
||||
assert opts.frame_opts.frame_length_ms == 25.0
|
||||
assert opts.frame_opts.dither == 1.0
|
||||
assert abs(opts.frame_opts.preemph_coeff - 0.97) < 1e-6
|
||||
assert opts.frame_opts.remove_dc_offset is True
|
||||
assert opts.frame_opts.window_type == "povey"
|
||||
assert opts.frame_opts.round_to_power_of_two is True
|
||||
assert abs(opts.frame_opts.blackman_coeff - 0.42) < 1e-6
|
||||
assert opts.frame_opts.snip_edges is True
|
||||
|
||||
assert opts.mel_opts.num_bins == 23
|
||||
assert opts.mel_opts.low_freq == 20
|
||||
assert opts.mel_opts.high_freq == 0
|
||||
assert opts.mel_opts.vtln_low == 100
|
||||
assert opts.mel_opts.vtln_high == -500
|
||||
assert opts.mel_opts.debug_mel is False
|
||||
assert opts.mel_opts.htk_mode is False
|
||||
|
||||
assert opts.use_energy is False
|
||||
assert opts.energy_floor == 0.0
|
||||
assert opts.raw_energy is True
|
||||
assert opts.htk_compat is False
|
||||
assert opts.use_log_fbank is True
|
||||
assert opts.use_power is True
|
||||
|
||||
|
||||
def test_set_get():
|
||||
opts = knf.FbankOptions()
|
||||
opts.use_energy = True
|
||||
assert opts.use_energy is True
|
||||
|
||||
opts.energy_floor = 1
|
||||
assert opts.energy_floor == 1
|
||||
|
||||
opts.raw_energy = False
|
||||
assert opts.raw_energy is False
|
||||
|
||||
opts.htk_compat = True
|
||||
assert opts.htk_compat is True
|
||||
|
||||
opts.use_log_fbank = False
|
||||
assert opts.use_log_fbank is False
|
||||
|
||||
opts.use_power = False
|
||||
assert opts.use_power is False
|
||||
|
||||
|
||||
def test_set_get_frame_opts():
|
||||
opts = knf.FbankOptions()
|
||||
|
||||
opts.frame_opts.samp_freq = 44100
|
||||
assert opts.frame_opts.samp_freq == 44100
|
||||
|
||||
opts.frame_opts.frame_shift_ms = 20.5
|
||||
assert opts.frame_opts.frame_shift_ms == 20.5
|
||||
|
||||
opts.frame_opts.frame_length_ms = 1
|
||||
assert opts.frame_opts.frame_length_ms == 1
|
||||
|
||||
opts.frame_opts.dither = 0.5
|
||||
assert opts.frame_opts.dither == 0.5
|
||||
|
||||
opts.frame_opts.preemph_coeff = 0.25
|
||||
assert opts.frame_opts.preemph_coeff == 0.25
|
||||
|
||||
opts.frame_opts.remove_dc_offset = False
|
||||
assert opts.frame_opts.remove_dc_offset is False
|
||||
|
||||
opts.frame_opts.window_type = "hanning"
|
||||
assert opts.frame_opts.window_type == "hanning"
|
||||
|
||||
opts.frame_opts.round_to_power_of_two = False
|
||||
assert opts.frame_opts.round_to_power_of_two is False
|
||||
|
||||
opts.frame_opts.blackman_coeff = 0.25
|
||||
assert opts.frame_opts.blackman_coeff == 0.25
|
||||
|
||||
opts.frame_opts.snip_edges = False
|
||||
assert opts.frame_opts.snip_edges is False
|
||||
|
||||
|
||||
def test_set_get_mel_opts():
|
||||
opts = knf.FbankOptions()
|
||||
|
||||
opts.mel_opts.num_bins = 100
|
||||
assert opts.mel_opts.num_bins == 100
|
||||
|
||||
opts.mel_opts.low_freq = 22
|
||||
assert opts.mel_opts.low_freq == 22
|
||||
|
||||
opts.mel_opts.high_freq = 1
|
||||
assert opts.mel_opts.high_freq == 1
|
||||
|
||||
opts.mel_opts.vtln_low = 101
|
||||
assert opts.mel_opts.vtln_low == 101
|
||||
|
||||
opts.mel_opts.vtln_high = -100
|
||||
assert opts.mel_opts.vtln_high == -100
|
||||
|
||||
opts.mel_opts.debug_mel = True
|
||||
assert opts.mel_opts.debug_mel is True
|
||||
|
||||
opts.mel_opts.htk_mode = True
|
||||
assert opts.mel_opts.htk_mode is True
|
||||
|
||||
|
||||
def test_from_empty_dict():
|
||||
opts = knf.FbankOptions.from_dict({})
|
||||
opts2 = knf.FbankOptions()
|
||||
|
||||
assert str(opts) == str(opts2)
|
||||
|
||||
|
||||
def test_from_dict_partial():
|
||||
d = {
|
||||
"energy_floor": 10.5,
|
||||
"htk_compat": True,
|
||||
"mel_opts": {"num_bins": 80, "vtln_low": 1},
|
||||
"frame_opts": {"window_type": "hanning"},
|
||||
}
|
||||
opts = knf.FbankOptions.from_dict(d)
|
||||
assert opts.energy_floor == 10.5
|
||||
assert opts.htk_compat is True
|
||||
assert opts.mel_opts.num_bins == 80
|
||||
assert opts.mel_opts.vtln_low == 1
|
||||
assert opts.frame_opts.window_type == "hanning"
|
||||
|
||||
mel_opts = knf.MelBanksOptions.from_dict(d["mel_opts"])
|
||||
assert str(opts.mel_opts) == str(mel_opts)
|
||||
|
||||
|
||||
def test_from_dict_full_and_as_dict():
|
||||
opts = knf.FbankOptions()
|
||||
opts.htk_compat = True
|
||||
opts.mel_opts.num_bins = 80
|
||||
opts.frame_opts.samp_freq = 10
|
||||
|
||||
d = opts.as_dict()
|
||||
assert d["htk_compat"] is True
|
||||
assert d["mel_opts"]["num_bins"] == 80
|
||||
assert d["frame_opts"]["samp_freq"] == 10
|
||||
|
||||
mel_opts = knf.MelBanksOptions()
|
||||
mel_opts.num_bins = 80
|
||||
assert d["mel_opts"] == mel_opts.as_dict()
|
||||
|
||||
frame_opts = knf.FrameExtractionOptions()
|
||||
frame_opts.samp_freq = 10
|
||||
assert d["frame_opts"] == frame_opts.as_dict()
|
||||
|
||||
opts2 = knf.FbankOptions.from_dict(d)
|
||||
assert str(opts2) == str(opts)
|
||||
|
||||
d["htk_compat"] = False
|
||||
opts3 = knf.FbankOptions.from_dict(d)
|
||||
assert opts3.htk_compat is False
|
||||
|
||||
|
||||
def test_pickle():
|
||||
opts = knf.FbankOptions()
|
||||
opts.use_energy = True
|
||||
opts.use_power = False
|
||||
|
||||
opts.frame_opts.samp_freq = 44100
|
||||
opts.mel_opts.num_bins = 100
|
||||
|
||||
data = pickle.dumps(opts)
|
||||
|
||||
opts2 = pickle.loads(data)
|
||||
assert str(opts) == str(opts2)
|
||||
|
||||
|
||||
def main():
|
||||
test_default()
|
||||
test_set_get()
|
||||
test_set_get_frame_opts()
|
||||
test_set_get_mel_opts()
|
||||
test_from_empty_dict()
|
||||
test_from_dict_partial()
|
||||
test_from_dict_full_and_as_dict()
|
||||
test_pickle()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
import pickle
|
||||
|
||||
import kaldi_native_fbank as knf
|
||||
|
||||
|
||||
def test_default():
|
||||
opts = knf.FrameExtractionOptions()
|
||||
assert opts.samp_freq == 16000
|
||||
assert opts.frame_shift_ms == 10.0
|
||||
assert opts.frame_length_ms == 25.0
|
||||
assert opts.dither == 1.0
|
||||
assert abs(opts.preemph_coeff - 0.97) < 1e-6
|
||||
assert opts.remove_dc_offset is True
|
||||
assert opts.window_type == "povey"
|
||||
assert opts.round_to_power_of_two is True
|
||||
assert abs(opts.blackman_coeff - 0.42) < 1e-6
|
||||
assert opts.snip_edges is True
|
||||
|
||||
|
||||
def test_set_get():
|
||||
opts = knf.FrameExtractionOptions()
|
||||
opts.samp_freq = 44100
|
||||
assert opts.samp_freq == 44100
|
||||
|
||||
opts.frame_shift_ms = 20.5
|
||||
assert opts.frame_shift_ms == 20.5
|
||||
|
||||
opts.frame_length_ms = 1
|
||||
assert opts.frame_length_ms == 1
|
||||
|
||||
opts.dither = 0.5
|
||||
assert opts.dither == 0.5
|
||||
|
||||
opts.preemph_coeff = 0.25
|
||||
assert opts.preemph_coeff == 0.25
|
||||
|
||||
opts.remove_dc_offset = False
|
||||
assert opts.remove_dc_offset is False
|
||||
|
||||
opts.window_type = "hanning"
|
||||
assert opts.window_type == "hanning"
|
||||
|
||||
opts.round_to_power_of_two = False
|
||||
assert opts.round_to_power_of_two is False
|
||||
|
||||
opts.blackman_coeff = 0.25
|
||||
assert opts.blackman_coeff == 0.25
|
||||
|
||||
opts.snip_edges = False
|
||||
assert opts.snip_edges is False
|
||||
|
||||
|
||||
def test_from_empty_dict():
|
||||
opts = knf.FrameExtractionOptions.from_dict({})
|
||||
opts2 = knf.FrameExtractionOptions()
|
||||
|
||||
assert str(opts) == str(opts2)
|
||||
|
||||
|
||||
def test_from_dict_partial():
|
||||
d = {"samp_freq": 10, "frame_shift_ms": 2}
|
||||
|
||||
opts = knf.FrameExtractionOptions.from_dict(d)
|
||||
|
||||
opts2 = knf.FrameExtractionOptions()
|
||||
assert str(opts) != str(opts2)
|
||||
|
||||
opts2.samp_freq = 10
|
||||
assert str(opts) != str(opts2)
|
||||
|
||||
opts2.frame_shift_ms = 2
|
||||
assert str(opts) == str(opts2)
|
||||
|
||||
opts2.frame_shift_ms = 3
|
||||
assert str(opts) != str(opts2)
|
||||
|
||||
|
||||
def test_from_dict_full_and_as_dict():
|
||||
opts = knf.FrameExtractionOptions()
|
||||
opts.samp_freq = 20
|
||||
opts.frame_length_ms = 100
|
||||
|
||||
d = opts.as_dict()
|
||||
for key, value in d.items():
|
||||
assert value == getattr(opts, key)
|
||||
|
||||
opts2 = knf.FrameExtractionOptions.from_dict(d)
|
||||
assert str(opts2) == str(opts)
|
||||
|
||||
d["window_type"] = "hanning"
|
||||
opts3 = knf.FrameExtractionOptions.from_dict(d)
|
||||
assert opts3.window_type == "hanning"
|
||||
|
||||
|
||||
def test_pickle():
|
||||
opts = knf.FrameExtractionOptions()
|
||||
opts.samp_freq = 44100
|
||||
opts.dither = 5.5
|
||||
data = pickle.dumps(opts)
|
||||
|
||||
opts2 = pickle.loads(data)
|
||||
assert str(opts) == str(opts2)
|
||||
|
||||
|
||||
def main():
|
||||
test_default()
|
||||
test_set_get()
|
||||
test_from_empty_dict()
|
||||
test_from_dict_partial()
|
||||
test_from_dict_full_and_as_dict()
|
||||
test_pickle()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
import pickle
|
||||
|
||||
import kaldi_native_fbank as knf
|
||||
|
||||
|
||||
def test_default():
|
||||
opts = knf.MelBanksOptions()
|
||||
assert opts.num_bins == 25
|
||||
assert opts.low_freq == 20
|
||||
assert opts.high_freq == 0
|
||||
assert opts.vtln_low == 100
|
||||
assert opts.vtln_high == -500
|
||||
assert opts.debug_mel is False
|
||||
assert opts.htk_mode is False
|
||||
|
||||
|
||||
def test_set_get():
|
||||
opts = knf.MelBanksOptions()
|
||||
opts.num_bins = 100
|
||||
assert opts.num_bins == 100
|
||||
|
||||
opts.low_freq = 22
|
||||
assert opts.low_freq == 22
|
||||
|
||||
opts.high_freq = 1
|
||||
assert opts.high_freq == 1
|
||||
|
||||
opts.vtln_low = 101
|
||||
assert opts.vtln_low == 101
|
||||
|
||||
opts.vtln_high = -100
|
||||
assert opts.vtln_high == -100
|
||||
|
||||
opts.debug_mel = True
|
||||
assert opts.debug_mel is True
|
||||
|
||||
opts.htk_mode = True
|
||||
assert opts.htk_mode is True
|
||||
|
||||
|
||||
def test_from_empty_dict():
|
||||
opts = knf.MelBanksOptions.from_dict({})
|
||||
opts2 = knf.MelBanksOptions()
|
||||
|
||||
assert str(opts) == str(opts2)
|
||||
|
||||
|
||||
def test_from_dict_partial():
|
||||
d = {"num_bins": 10, "debug_mel": True}
|
||||
|
||||
opts = knf.MelBanksOptions.from_dict(d)
|
||||
|
||||
opts2 = knf.MelBanksOptions()
|
||||
assert str(opts) != str(opts2)
|
||||
|
||||
opts2.num_bins = 10
|
||||
assert str(opts) != str(opts2)
|
||||
|
||||
opts2.debug_mel = True
|
||||
assert str(opts) == str(opts2)
|
||||
|
||||
opts2.debug_mel = False
|
||||
assert str(opts) != str(opts2)
|
||||
|
||||
|
||||
def test_from_dict_full_and_as_dict():
|
||||
opts = knf.MelBanksOptions()
|
||||
opts.num_bins = 80
|
||||
opts.vtln_high = 2
|
||||
|
||||
d = opts.as_dict()
|
||||
for key, value in d.items():
|
||||
assert value == getattr(opts, key)
|
||||
|
||||
opts2 = knf.MelBanksOptions.from_dict(d)
|
||||
assert str(opts2) == str(opts)
|
||||
|
||||
d["htk_mode"] = True
|
||||
opts3 = knf.MelBanksOptions.from_dict(d)
|
||||
assert opts3.htk_mode is True
|
||||
|
||||
|
||||
def test_pickle():
|
||||
opts = knf.MelBanksOptions()
|
||||
opts.num_bins = 100
|
||||
opts.low_freq = 22
|
||||
data = pickle.dumps(opts)
|
||||
|
||||
opts2 = pickle.loads(data)
|
||||
assert str(opts) == str(opts2)
|
||||
|
||||
|
||||
def main():
|
||||
test_default()
|
||||
test_set_get()
|
||||
test_from_empty_dict()
|
||||
test_from_dict_partial()
|
||||
test_from_dict_full_and_as_dict()
|
||||
test_pickle()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import kaldifeat
|
||||
except:
|
||||
print("Please install kaldifeat first")
|
||||
sys.exit(0)
|
||||
|
||||
import kaldi_native_fbank as knf
|
||||
import torch
|
||||
|
||||
|
||||
def main():
|
||||
sampling_rate = 16000
|
||||
samples = torch.randn(16000 * 10)
|
||||
|
||||
opts = kaldifeat.FbankOptions()
|
||||
opts.frame_opts.dither = 0
|
||||
opts.mel_opts.num_bins = 80
|
||||
opts.frame_opts.snip_edges = False
|
||||
opts.mel_opts.debug_mel = False
|
||||
|
||||
online_fbank = kaldifeat.OnlineFbank(opts)
|
||||
|
||||
online_fbank.accept_waveform(sampling_rate, samples)
|
||||
|
||||
opts = knf.FbankOptions()
|
||||
opts.frame_opts.dither = 0
|
||||
opts.mel_opts.num_bins = 80
|
||||
opts.frame_opts.snip_edges = False
|
||||
opts.mel_opts.debug_mel = False
|
||||
|
||||
fbank = knf.OnlineFbank(opts)
|
||||
fbank.accept_waveform(sampling_rate, samples.tolist())
|
||||
|
||||
assert online_fbank.num_frames_ready == fbank.num_frames_ready
|
||||
for i in range(fbank.num_frames_ready):
|
||||
f1 = online_fbank.get_frame(i)
|
||||
f2 = torch.from_numpy(fbank.get_frame(i))
|
||||
assert torch.allclose(f1, f2, atol=1e-3), (i, (f1 - f2).abs().max())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
torch.manual_seed(20220825)
|
||||
main()
|
||||
print("success")
|
||||
126
funasr/runtime/onnxruntime/kaldi-native-fbank/scripts/check_style_cpplint.sh
Executable file
126
funasr/runtime/onnxruntime/kaldi-native-fbank/scripts/check_style_cpplint.sh
Executable file
@ -0,0 +1,126 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# (1) To check files of the last commit
|
||||
# ./scripts/check_style_cpplint.sh
|
||||
#
|
||||
# (2) To check changed files not committed yet
|
||||
# ./scripts/check_style_cpplint.sh 1
|
||||
#
|
||||
# (3) To check all files in the project
|
||||
# ./scripts/check_style_cpplint.sh 2
|
||||
|
||||
|
||||
cpplint_version="1.5.4"
|
||||
cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd)
|
||||
kaldi_native_fbank_dir=$(cd $cur_dir/.. && pwd)
|
||||
|
||||
build_dir=$kaldi_native_fbank_dir/build
|
||||
mkdir -p $build_dir
|
||||
|
||||
cpplint_src=$build_dir/cpplint-${cpplint_version}/cpplint.py
|
||||
|
||||
if [ ! -d "$build_dir/cpplint-${cpplint_version}" ]; then
|
||||
pushd $build_dir
|
||||
if command -v wget &> /dev/null; then
|
||||
wget https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz
|
||||
elif command -v curl &> /dev/null; then
|
||||
curl -O -SL https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz
|
||||
else
|
||||
echo "Please install wget or curl to download cpplint"
|
||||
exit 1
|
||||
fi
|
||||
tar xf ${cpplint_version}.tar.gz
|
||||
rm ${cpplint_version}.tar.gz
|
||||
|
||||
# cpplint will report the following error for: __host__ __device__ (
|
||||
#
|
||||
# Extra space before ( in function call [whitespace/parens] [4]
|
||||
#
|
||||
# the following patch disables the above error
|
||||
sed -i "3490i\ not Search(r'__host__ __device__\\\s+\\\(', fncall) and" $cpplint_src
|
||||
popd
|
||||
fi
|
||||
|
||||
source $kaldi_native_fbank_dir/scripts/utils.sh
|
||||
|
||||
# return true if the given file is a c++ source file
|
||||
# return false otherwise
|
||||
function is_source_code_file() {
|
||||
case "$1" in
|
||||
*.cc|*.h|*.cu)
|
||||
echo true;;
|
||||
*)
|
||||
echo false;;
|
||||
esac
|
||||
}
|
||||
|
||||
function check_style() {
|
||||
python3 $cpplint_src $1 || abort $1
|
||||
}
|
||||
|
||||
function check_last_commit() {
|
||||
files=$(git diff HEAD^1 --name-only --diff-filter=ACDMRUXB)
|
||||
echo $files
|
||||
}
|
||||
|
||||
function check_current_dir() {
|
||||
files=$(git status -s -uno --porcelain | awk '{
|
||||
if (NF == 4) {
|
||||
# a file has been renamed
|
||||
print $NF
|
||||
} else {
|
||||
print $2
|
||||
}}')
|
||||
|
||||
echo $files
|
||||
}
|
||||
|
||||
function do_check() {
|
||||
case "$1" in
|
||||
1)
|
||||
echo "Check changed files"
|
||||
files=$(check_current_dir)
|
||||
;;
|
||||
2)
|
||||
echo "Check all files"
|
||||
files=$(find $kaldi_native_fbank_dir/kaldi-native-fbank -name "*.h" -o -name "*.cc" -o -name "*.cu")
|
||||
;;
|
||||
*)
|
||||
echo "Check last commit"
|
||||
files=$(check_last_commit)
|
||||
;;
|
||||
esac
|
||||
|
||||
for f in $files; do
|
||||
need_check=$(is_source_code_file $f)
|
||||
if $need_check; then
|
||||
[[ -f $f ]] && check_style $f
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function main() {
|
||||
do_check $1
|
||||
|
||||
ok "Great! Style check passed!"
|
||||
}
|
||||
|
||||
cd $kaldi_native_fbank_dir
|
||||
|
||||
main $1
|
||||
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
default='\033[0m'
|
||||
bold='\033[1m'
|
||||
red='\033[31m'
|
||||
green='\033[32m'
|
||||
|
||||
function ok() {
|
||||
printf "${bold}${green}[OK]${default} $1\n"
|
||||
}
|
||||
|
||||
function error() {
|
||||
printf "${bold}${red}[FAILED]${default} $1\n"
|
||||
}
|
||||
|
||||
function abort() {
|
||||
printf "${bold}${red}[FAILED]${default} $1\n"
|
||||
exit 1
|
||||
}
|
||||
64
funasr/runtime/onnxruntime/kaldi-native-fbank/setup.py
Normal file
64
funasr/runtime/onnxruntime/kaldi-native-fbank/setup.py
Normal file
@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang)
|
||||
|
||||
import re
|
||||
|
||||
import setuptools
|
||||
|
||||
from cmake.cmake_extension import BuildExtension, bdist_wheel, cmake_extension
|
||||
|
||||
|
||||
def read_long_description():
|
||||
with open("README.md", encoding="utf8") as f:
|
||||
readme = f.read()
|
||||
return readme
|
||||
|
||||
|
||||
def get_package_version():
|
||||
with open("CMakeLists.txt") as f:
|
||||
content = f.read()
|
||||
|
||||
match = re.search(r"set\(KALDI_NATIVE_FBANK_VERSION (.*)\)", content)
|
||||
latest_version = match.group(1).strip('"')
|
||||
return latest_version
|
||||
|
||||
|
||||
package_name = "kaldi-native-fbank"
|
||||
|
||||
with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "a") as f:
|
||||
f.write(f"__version__ = '{get_package_version()}'\n")
|
||||
|
||||
setuptools.setup(
|
||||
name=package_name,
|
||||
version=get_package_version(),
|
||||
author="Fangjun Kuang",
|
||||
author_email="csukuangfj@gmail.com",
|
||||
package_dir={"kaldi_native_fbank": "kaldi-native-fbank/python/kaldi_native_fbank"},
|
||||
packages=["kaldi_native_fbank"],
|
||||
url="https://github.com/csukuangfj/kaldi-native-fbank",
|
||||
long_description=read_long_description(),
|
||||
long_description_content_type="text/markdown",
|
||||
ext_modules=[cmake_extension("_kaldi_native_fbank")],
|
||||
cmdclass={"build_ext": BuildExtension, "bdist_wheel": bdist_wheel},
|
||||
zip_safe=False,
|
||||
classifiers=[
|
||||
"Programming Language :: C++",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
],
|
||||
python_requires=">=3.6.0",
|
||||
license="Apache licensed, as found in the LICENSE file",
|
||||
)
|
||||
|
||||
# remove the line __version__ from kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
|
||||
with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "r") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "w") as f:
|
||||
for line in lines:
|
||||
if "__version__" in line:
|
||||
# skip __version__ = "x.x.x"
|
||||
continue
|
||||
f.write(line)
|
||||
@ -0,0 +1,11 @@
|
||||
# gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf.tar.xz
|
||||
|
||||
Go to <https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads/8-3-2019-03> to download the toolchain.
|
||||
|
||||
```bash
|
||||
mkdir /ceph-fj/fangjun/software
|
||||
cd /ceph-fj/fangjun/software
|
||||
tar xvf /path/to/gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf.tar.xz
|
||||
|
||||
export PATH=/ceph-fj/fangjun/software/gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf/bin:$PATH
|
||||
```
|
||||
@ -0,0 +1,17 @@
|
||||
# Copied from https://github.com/Tencent/ncnn/blob/master/toolchains/arm-linux-gnueabihf.toolchain.cmake
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR arm)
|
||||
|
||||
set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
|
||||
set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
|
||||
set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon")
|
||||
set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon")
|
||||
|
||||
# cache flags
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")
|
||||
@ -22,7 +22,7 @@ if(WIN32)
|
||||
target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
|
||||
else()
|
||||
|
||||
set(EXTRA_LIBS fftw3f pthread yaml-cpp)
|
||||
set(EXTRA_LIBS fftw3f pthread yaml-cpp csrc)
|
||||
target_include_directories(funasr PUBLIC "/usr/local/opt/fftw/include")
|
||||
target_link_directories(funasr PUBLIC "/usr/local/opt/fftw/lib")
|
||||
|
||||
@ -34,7 +34,7 @@ else()
|
||||
|
||||
target_include_directories(funasr PUBLIC ${FFTW3F_INCLUDE_DIR})
|
||||
target_link_directories(funasr PUBLIC ${FFTW3F_LIBRARY_DIR})
|
||||
include_directories(${ONNXRUNTIME_DIR}/include)
|
||||
include_directories(${ONNXRUNTIME_DIR}/include)
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR}/include)
|
||||
|
||||
@ -1,396 +0,0 @@
|
||||
|
||||
#include "precomp.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
FeatureExtract::FeatureExtract(int mode) : mode(mode)
|
||||
{
|
||||
}
|
||||
|
||||
FeatureExtract::~FeatureExtract()
|
||||
{
|
||||
}
|
||||
|
||||
void FeatureExtract::reset()
|
||||
{
|
||||
speech.reset();
|
||||
fqueue.reset();
|
||||
}
|
||||
|
||||
int FeatureExtract::size()
|
||||
{
|
||||
return fqueue.size();
|
||||
}
|
||||
|
||||
void FeatureExtract::insert(fftwf_plan plan, float *din, int len, int flag)
|
||||
{
|
||||
float* fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
|
||||
fftwf_complex* fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
|
||||
memset(fft_input, 0, sizeof(float) * fft_size);
|
||||
|
||||
const float *window = (const float *)&window_hex;
|
||||
if (mode == 3)
|
||||
window = (const float *)&window_hamm_hex;
|
||||
|
||||
speech.load(din, len);
|
||||
int i, j;
|
||||
float tmp_feature[80];
|
||||
if (mode == 0 || mode == 2 || mode == 3) {
|
||||
int ll = (speech.size() - window_size) / window_shift + 1;
|
||||
fqueue.reinit(ll);
|
||||
}
|
||||
|
||||
for (i = 0; i <= speech.size() - window_size; i = i + window_shift) {
|
||||
float tmp_mean = 0;
|
||||
for (j = 0; j < window_size; j++) {
|
||||
tmp_mean += speech[i + j];
|
||||
}
|
||||
|
||||
tmp_mean = tmp_mean / window_size;
|
||||
|
||||
float pre_val = (float)speech[i] - tmp_mean;
|
||||
|
||||
for (j = 0; j < window_size; j++) {
|
||||
float win = window[j];
|
||||
float cur_val = (float)speech[i + j] - tmp_mean;
|
||||
fft_input[j] = win * (cur_val - 0.97 * pre_val);
|
||||
pre_val = cur_val;
|
||||
}
|
||||
|
||||
fftwf_execute_dft_r2c(plan, fft_input, fft_out);
|
||||
|
||||
melspect((float *)fft_out, tmp_feature);
|
||||
int tmp_flag = S_MIDDLE;
|
||||
if (flag == S_END && i > speech.size() - 560)
|
||||
tmp_flag = S_END;
|
||||
|
||||
fqueue.push(tmp_feature, tmp_flag);
|
||||
}
|
||||
speech.update(i);
|
||||
fftwf_free(fft_input);
|
||||
fftwf_free(fft_out);
|
||||
}
|
||||
|
||||
bool FeatureExtract::fetch(Tensor<float> *&dout)
|
||||
{
|
||||
if (fqueue.size() < 1) {
|
||||
return false;
|
||||
} else {
|
||||
dout = fqueue.pop();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureExtract::global_cmvn(float *din)
|
||||
{
|
||||
const float *std;
|
||||
const float *mean;
|
||||
|
||||
if (mode < 2) {
|
||||
if (mode == 0) {
|
||||
std = (const float *)global_cmvn_std_hex;
|
||||
mean = (const float *)global_cmvn_mean_hex;
|
||||
} else {
|
||||
std = (const float *)global_cmvn_std_online_hex;
|
||||
mean = (const float *)global_cmvn_mean_online_hex;
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i = 0; i < 80; i++) {
|
||||
float tmp = din[i] < 1e-7 ? 1e-7 : din[i];
|
||||
tmp = log(tmp);
|
||||
din[i] = (tmp - mean[i]) / std[i];
|
||||
}
|
||||
} else {
|
||||
int i;
|
||||
|
||||
int val = 0x34000000;
|
||||
float min_resol = *((float *)&val);
|
||||
|
||||
for (i = 0; i < 80; i++) {
|
||||
float tmp = din[i] < min_resol ? min_resol : din[i];
|
||||
din[i] = log(tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureExtract::melspect(float *din, float *dout)
|
||||
{
|
||||
float fftmag[256];
|
||||
const float *melcoe = (const float *)melcoe_hex;
|
||||
int i;
|
||||
for (i = 0; i < 256; i++) {
|
||||
float real = din[2 * i];
|
||||
float imag = din[2 * i + 1];
|
||||
fftmag[i] = real * real + imag * imag;
|
||||
}
|
||||
dout[0] = melcoe[0] * fftmag[1] + melcoe[1] * fftmag[2];
|
||||
dout[1] = melcoe[2] * fftmag[2];
|
||||
dout[2] = melcoe[3] * fftmag[3];
|
||||
dout[3] = melcoe[4] * fftmag[3] + melcoe[5] * fftmag[4];
|
||||
dout[4] = melcoe[6] * fftmag[4] + melcoe[7] * fftmag[5];
|
||||
dout[5] = melcoe[8] * fftmag[5] + melcoe[9] * fftmag[6];
|
||||
dout[6] = melcoe[10] * fftmag[6] + melcoe[11] * fftmag[7];
|
||||
dout[7] = melcoe[12] * fftmag[7];
|
||||
dout[8] = melcoe[13] * fftmag[8];
|
||||
dout[9] = melcoe[14] * fftmag[8] + melcoe[15] * fftmag[9];
|
||||
dout[10] = melcoe[16] * fftmag[9] + melcoe[17] * fftmag[10];
|
||||
dout[11] = melcoe[18] * fftmag[10] + melcoe[19] * fftmag[11];
|
||||
dout[12] = melcoe[20] * fftmag[11] + melcoe[21] * fftmag[12] +
|
||||
melcoe[22] * fftmag[13];
|
||||
dout[13] = melcoe[23] * fftmag[12] + melcoe[24] * fftmag[13] +
|
||||
melcoe[25] * fftmag[14];
|
||||
dout[14] = melcoe[26] * fftmag[14] + melcoe[27] * fftmag[15];
|
||||
dout[15] = melcoe[28] * fftmag[15] + melcoe[29] * fftmag[16];
|
||||
dout[16] = melcoe[30] * fftmag[16] + melcoe[31] * fftmag[17];
|
||||
dout[17] = melcoe[32] * fftmag[17] + melcoe[33] * fftmag[18];
|
||||
dout[18] = melcoe[34] * fftmag[18] + melcoe[35] * fftmag[19] +
|
||||
melcoe[36] * fftmag[20];
|
||||
dout[19] = melcoe[37] * fftmag[19] + melcoe[38] * fftmag[20] +
|
||||
melcoe[39] * fftmag[21];
|
||||
dout[20] = melcoe[40] * fftmag[21] + melcoe[41] * fftmag[22];
|
||||
dout[21] = melcoe[42] * fftmag[22] + melcoe[43] * fftmag[23] +
|
||||
melcoe[44] * fftmag[24];
|
||||
dout[22] = melcoe[45] * fftmag[23] + melcoe[46] * fftmag[24] +
|
||||
melcoe[47] * fftmag[25];
|
||||
dout[23] = melcoe[48] * fftmag[25] + melcoe[49] * fftmag[26] +
|
||||
melcoe[50] * fftmag[27];
|
||||
dout[24] = melcoe[51] * fftmag[26] + melcoe[52] * fftmag[27] +
|
||||
melcoe[53] * fftmag[28];
|
||||
dout[25] = melcoe[54] * fftmag[28] + melcoe[55] * fftmag[29] +
|
||||
melcoe[56] * fftmag[30];
|
||||
dout[26] = melcoe[57] * fftmag[29] + melcoe[58] * fftmag[30] +
|
||||
melcoe[59] * fftmag[31] + melcoe[60] * fftmag[32];
|
||||
dout[27] = melcoe[61] * fftmag[31] + melcoe[62] * fftmag[32] +
|
||||
melcoe[63] * fftmag[33];
|
||||
dout[28] = melcoe[64] * fftmag[33] + melcoe[65] * fftmag[34] +
|
||||
melcoe[66] * fftmag[35];
|
||||
dout[29] = melcoe[67] * fftmag[34] + melcoe[68] * fftmag[35] +
|
||||
melcoe[69] * fftmag[36] + melcoe[70] * fftmag[37];
|
||||
dout[30] = melcoe[71] * fftmag[36] + melcoe[72] * fftmag[37] +
|
||||
melcoe[73] * fftmag[38] + melcoe[74] * fftmag[39];
|
||||
dout[31] = melcoe[75] * fftmag[38] + melcoe[76] * fftmag[39] +
|
||||
melcoe[77] * fftmag[40] + melcoe[78] * fftmag[41];
|
||||
dout[32] = melcoe[79] * fftmag[40] + melcoe[80] * fftmag[41] +
|
||||
melcoe[81] * fftmag[42] + melcoe[82] * fftmag[43];
|
||||
dout[33] = melcoe[83] * fftmag[42] + melcoe[84] * fftmag[43] +
|
||||
melcoe[85] * fftmag[44] + melcoe[86] * fftmag[45];
|
||||
dout[34] = melcoe[87] * fftmag[44] + melcoe[88] * fftmag[45] +
|
||||
melcoe[89] * fftmag[46] + melcoe[90] * fftmag[47];
|
||||
dout[35] = melcoe[91] * fftmag[46] + melcoe[92] * fftmag[47] +
|
||||
melcoe[93] * fftmag[48] + melcoe[94] * fftmag[49];
|
||||
dout[36] = melcoe[95] * fftmag[48] + melcoe[96] * fftmag[49] +
|
||||
melcoe[97] * fftmag[50] + melcoe[98] * fftmag[51];
|
||||
dout[37] = melcoe[99] * fftmag[50] + melcoe[100] * fftmag[51] +
|
||||
melcoe[101] * fftmag[52] + melcoe[102] * fftmag[53] +
|
||||
melcoe[103] * fftmag[54];
|
||||
dout[38] = melcoe[104] * fftmag[52] + melcoe[105] * fftmag[53] +
|
||||
melcoe[106] * fftmag[54] + melcoe[107] * fftmag[55] +
|
||||
melcoe[108] * fftmag[56];
|
||||
dout[39] = melcoe[109] * fftmag[55] + melcoe[110] * fftmag[56] +
|
||||
melcoe[111] * fftmag[57] + melcoe[112] * fftmag[58];
|
||||
dout[40] = melcoe[113] * fftmag[57] + melcoe[114] * fftmag[58] +
|
||||
melcoe[115] * fftmag[59] + melcoe[116] * fftmag[60] +
|
||||
melcoe[117] * fftmag[61];
|
||||
dout[41] = melcoe[118] * fftmag[59] + melcoe[119] * fftmag[60] +
|
||||
melcoe[120] * fftmag[61] + melcoe[121] * fftmag[62] +
|
||||
melcoe[122] * fftmag[63] + melcoe[123] * fftmag[64];
|
||||
dout[42] = melcoe[124] * fftmag[62] + melcoe[125] * fftmag[63] +
|
||||
melcoe[126] * fftmag[64] + melcoe[127] * fftmag[65] +
|
||||
melcoe[128] * fftmag[66];
|
||||
dout[43] = melcoe[129] * fftmag[65] + melcoe[130] * fftmag[66] +
|
||||
melcoe[131] * fftmag[67] + melcoe[132] * fftmag[68] +
|
||||
melcoe[133] * fftmag[69];
|
||||
dout[44] = melcoe[134] * fftmag[67] + melcoe[135] * fftmag[68] +
|
||||
melcoe[136] * fftmag[69] + melcoe[137] * fftmag[70] +
|
||||
melcoe[138] * fftmag[71] + melcoe[139] * fftmag[72];
|
||||
dout[45] = melcoe[140] * fftmag[70] + melcoe[141] * fftmag[71] +
|
||||
melcoe[142] * fftmag[72] + melcoe[143] * fftmag[73] +
|
||||
melcoe[144] * fftmag[74] + melcoe[145] * fftmag[75];
|
||||
dout[46] = melcoe[146] * fftmag[73] + melcoe[147] * fftmag[74] +
|
||||
melcoe[148] * fftmag[75] + melcoe[149] * fftmag[76] +
|
||||
melcoe[150] * fftmag[77] + melcoe[151] * fftmag[78];
|
||||
dout[47] = melcoe[152] * fftmag[76] + melcoe[153] * fftmag[77] +
|
||||
melcoe[154] * fftmag[78] + melcoe[155] * fftmag[79] +
|
||||
melcoe[156] * fftmag[80] + melcoe[157] * fftmag[81];
|
||||
dout[48] = melcoe[158] * fftmag[79] + melcoe[159] * fftmag[80] +
|
||||
melcoe[160] * fftmag[81] + melcoe[161] * fftmag[82] +
|
||||
melcoe[162] * fftmag[83] + melcoe[163] * fftmag[84];
|
||||
dout[49] = melcoe[164] * fftmag[82] + melcoe[165] * fftmag[83] +
|
||||
melcoe[166] * fftmag[84] + melcoe[167] * fftmag[85] +
|
||||
melcoe[168] * fftmag[86] + melcoe[169] * fftmag[87] +
|
||||
melcoe[170] * fftmag[88];
|
||||
dout[50] = melcoe[171] * fftmag[85] + melcoe[172] * fftmag[86] +
|
||||
melcoe[173] * fftmag[87] + melcoe[174] * fftmag[88] +
|
||||
melcoe[175] * fftmag[89] + melcoe[176] * fftmag[90] +
|
||||
melcoe[177] * fftmag[91];
|
||||
dout[51] = melcoe[178] * fftmag[89] + melcoe[179] * fftmag[90] +
|
||||
melcoe[180] * fftmag[91] + melcoe[181] * fftmag[92] +
|
||||
melcoe[182] * fftmag[93] + melcoe[183] * fftmag[94] +
|
||||
melcoe[184] * fftmag[95];
|
||||
dout[52] = melcoe[185] * fftmag[92] + melcoe[186] * fftmag[93] +
|
||||
melcoe[187] * fftmag[94] + melcoe[188] * fftmag[95] +
|
||||
melcoe[189] * fftmag[96] + melcoe[190] * fftmag[97] +
|
||||
melcoe[191] * fftmag[98];
|
||||
dout[53] = melcoe[192] * fftmag[96] + melcoe[193] * fftmag[97] +
|
||||
melcoe[194] * fftmag[98] + melcoe[195] * fftmag[99] +
|
||||
melcoe[196] * fftmag[100] + melcoe[197] * fftmag[101] +
|
||||
melcoe[198] * fftmag[102];
|
||||
dout[54] = melcoe[199] * fftmag[99] + melcoe[200] * fftmag[100] +
|
||||
melcoe[201] * fftmag[101] + melcoe[202] * fftmag[102] +
|
||||
melcoe[203] * fftmag[103] + melcoe[204] * fftmag[104] +
|
||||
melcoe[205] * fftmag[105] + melcoe[206] * fftmag[106];
|
||||
dout[55] = melcoe[207] * fftmag[103] + melcoe[208] * fftmag[104] +
|
||||
melcoe[209] * fftmag[105] + melcoe[210] * fftmag[106] +
|
||||
melcoe[211] * fftmag[107] + melcoe[212] * fftmag[108] +
|
||||
melcoe[213] * fftmag[109] + melcoe[214] * fftmag[110];
|
||||
dout[56] = melcoe[215] * fftmag[107] + melcoe[216] * fftmag[108] +
|
||||
melcoe[217] * fftmag[109] + melcoe[218] * fftmag[110] +
|
||||
melcoe[219] * fftmag[111] + melcoe[220] * fftmag[112] +
|
||||
melcoe[221] * fftmag[113] + melcoe[222] * fftmag[114];
|
||||
dout[57] = melcoe[223] * fftmag[111] + melcoe[224] * fftmag[112] +
|
||||
melcoe[225] * fftmag[113] + melcoe[226] * fftmag[114] +
|
||||
melcoe[227] * fftmag[115] + melcoe[228] * fftmag[116] +
|
||||
melcoe[229] * fftmag[117] + melcoe[230] * fftmag[118] +
|
||||
melcoe[231] * fftmag[119];
|
||||
dout[58] = melcoe[232] * fftmag[115] + melcoe[233] * fftmag[116] +
|
||||
melcoe[234] * fftmag[117] + melcoe[235] * fftmag[118] +
|
||||
melcoe[236] * fftmag[119] + melcoe[237] * fftmag[120] +
|
||||
melcoe[238] * fftmag[121] + melcoe[239] * fftmag[122] +
|
||||
melcoe[240] * fftmag[123];
|
||||
dout[59] = melcoe[241] * fftmag[120] + melcoe[242] * fftmag[121] +
|
||||
melcoe[243] * fftmag[122] + melcoe[244] * fftmag[123] +
|
||||
melcoe[245] * fftmag[124] + melcoe[246] * fftmag[125] +
|
||||
melcoe[247] * fftmag[126] + melcoe[248] * fftmag[127] +
|
||||
melcoe[249] * fftmag[128];
|
||||
dout[60] = melcoe[250] * fftmag[124] + melcoe[251] * fftmag[125] +
|
||||
melcoe[252] * fftmag[126] + melcoe[253] * fftmag[127] +
|
||||
melcoe[254] * fftmag[128] + melcoe[255] * fftmag[129] +
|
||||
melcoe[256] * fftmag[130] + melcoe[257] * fftmag[131] +
|
||||
melcoe[258] * fftmag[132];
|
||||
dout[61] = melcoe[259] * fftmag[129] + melcoe[260] * fftmag[130] +
|
||||
melcoe[261] * fftmag[131] + melcoe[262] * fftmag[132] +
|
||||
melcoe[263] * fftmag[133] + melcoe[264] * fftmag[134] +
|
||||
melcoe[265] * fftmag[135] + melcoe[266] * fftmag[136] +
|
||||
melcoe[267] * fftmag[137];
|
||||
dout[62] = melcoe[268] * fftmag[133] + melcoe[269] * fftmag[134] +
|
||||
melcoe[270] * fftmag[135] + melcoe[271] * fftmag[136] +
|
||||
melcoe[272] * fftmag[137] + melcoe[273] * fftmag[138] +
|
||||
melcoe[274] * fftmag[139] + melcoe[275] * fftmag[140] +
|
||||
melcoe[276] * fftmag[141] + melcoe[277] * fftmag[142];
|
||||
dout[63] = melcoe[278] * fftmag[138] + melcoe[279] * fftmag[139] +
|
||||
melcoe[280] * fftmag[140] + melcoe[281] * fftmag[141] +
|
||||
melcoe[282] * fftmag[142] + melcoe[283] * fftmag[143] +
|
||||
melcoe[284] * fftmag[144] + melcoe[285] * fftmag[145] +
|
||||
melcoe[286] * fftmag[146] + melcoe[287] * fftmag[147];
|
||||
dout[64] = melcoe[288] * fftmag[143] + melcoe[289] * fftmag[144] +
|
||||
melcoe[290] * fftmag[145] + melcoe[291] * fftmag[146] +
|
||||
melcoe[292] * fftmag[147] + melcoe[293] * fftmag[148] +
|
||||
melcoe[294] * fftmag[149] + melcoe[295] * fftmag[150] +
|
||||
melcoe[296] * fftmag[151] + melcoe[297] * fftmag[152] +
|
||||
melcoe[298] * fftmag[153];
|
||||
dout[65] = melcoe[299] * fftmag[148] + melcoe[300] * fftmag[149] +
|
||||
melcoe[301] * fftmag[150] + melcoe[302] * fftmag[151] +
|
||||
melcoe[303] * fftmag[152] + melcoe[304] * fftmag[153] +
|
||||
melcoe[305] * fftmag[154] + melcoe[306] * fftmag[155] +
|
||||
melcoe[307] * fftmag[156] + melcoe[308] * fftmag[157] +
|
||||
melcoe[309] * fftmag[158];
|
||||
dout[66] = melcoe[310] * fftmag[154] + melcoe[311] * fftmag[155] +
|
||||
melcoe[312] * fftmag[156] + melcoe[313] * fftmag[157] +
|
||||
melcoe[314] * fftmag[158] + melcoe[315] * fftmag[159] +
|
||||
melcoe[316] * fftmag[160] + melcoe[317] * fftmag[161] +
|
||||
melcoe[318] * fftmag[162] + melcoe[319] * fftmag[163] +
|
||||
melcoe[320] * fftmag[164];
|
||||
dout[67] = melcoe[321] * fftmag[159] + melcoe[322] * fftmag[160] +
|
||||
melcoe[323] * fftmag[161] + melcoe[324] * fftmag[162] +
|
||||
melcoe[325] * fftmag[163] + melcoe[326] * fftmag[164] +
|
||||
melcoe[327] * fftmag[165] + melcoe[328] * fftmag[166] +
|
||||
melcoe[329] * fftmag[167] + melcoe[330] * fftmag[168] +
|
||||
melcoe[331] * fftmag[169] + melcoe[332] * fftmag[170];
|
||||
dout[68] = melcoe[333] * fftmag[165] + melcoe[334] * fftmag[166] +
|
||||
melcoe[335] * fftmag[167] + melcoe[336] * fftmag[168] +
|
||||
melcoe[337] * fftmag[169] + melcoe[338] * fftmag[170] +
|
||||
melcoe[339] * fftmag[171] + melcoe[340] * fftmag[172] +
|
||||
melcoe[341] * fftmag[173] + melcoe[342] * fftmag[174] +
|
||||
melcoe[343] * fftmag[175] + melcoe[344] * fftmag[176];
|
||||
dout[69] = melcoe[345] * fftmag[171] + melcoe[346] * fftmag[172] +
|
||||
melcoe[347] * fftmag[173] + melcoe[348] * fftmag[174] +
|
||||
melcoe[349] * fftmag[175] + melcoe[350] * fftmag[176] +
|
||||
melcoe[351] * fftmag[177] + melcoe[352] * fftmag[178] +
|
||||
melcoe[353] * fftmag[179] + melcoe[354] * fftmag[180] +
|
||||
melcoe[355] * fftmag[181] + melcoe[356] * fftmag[182];
|
||||
dout[70] = melcoe[357] * fftmag[177] + melcoe[358] * fftmag[178] +
|
||||
melcoe[359] * fftmag[179] + melcoe[360] * fftmag[180] +
|
||||
melcoe[361] * fftmag[181] + melcoe[362] * fftmag[182] +
|
||||
melcoe[363] * fftmag[183] + melcoe[364] * fftmag[184] +
|
||||
melcoe[365] * fftmag[185] + melcoe[366] * fftmag[186] +
|
||||
melcoe[367] * fftmag[187] + melcoe[368] * fftmag[188];
|
||||
dout[71] = melcoe[369] * fftmag[183] + melcoe[370] * fftmag[184] +
|
||||
melcoe[371] * fftmag[185] + melcoe[372] * fftmag[186] +
|
||||
melcoe[373] * fftmag[187] + melcoe[374] * fftmag[188] +
|
||||
melcoe[375] * fftmag[189] + melcoe[376] * fftmag[190] +
|
||||
melcoe[377] * fftmag[191] + melcoe[378] * fftmag[192] +
|
||||
melcoe[379] * fftmag[193] + melcoe[380] * fftmag[194] +
|
||||
melcoe[381] * fftmag[195];
|
||||
dout[72] = melcoe[382] * fftmag[189] + melcoe[383] * fftmag[190] +
|
||||
melcoe[384] * fftmag[191] + melcoe[385] * fftmag[192] +
|
||||
melcoe[386] * fftmag[193] + melcoe[387] * fftmag[194] +
|
||||
melcoe[388] * fftmag[195] + melcoe[389] * fftmag[196] +
|
||||
melcoe[390] * fftmag[197] + melcoe[391] * fftmag[198] +
|
||||
melcoe[392] * fftmag[199] + melcoe[393] * fftmag[200] +
|
||||
melcoe[394] * fftmag[201] + melcoe[395] * fftmag[202];
|
||||
dout[73] = melcoe[396] * fftmag[196] + melcoe[397] * fftmag[197] +
|
||||
melcoe[398] * fftmag[198] + melcoe[399] * fftmag[199] +
|
||||
melcoe[400] * fftmag[200] + melcoe[401] * fftmag[201] +
|
||||
melcoe[402] * fftmag[202] + melcoe[403] * fftmag[203] +
|
||||
melcoe[404] * fftmag[204] + melcoe[405] * fftmag[205] +
|
||||
melcoe[406] * fftmag[206] + melcoe[407] * fftmag[207] +
|
||||
melcoe[408] * fftmag[208] + melcoe[409] * fftmag[209];
|
||||
dout[74] = melcoe[410] * fftmag[203] + melcoe[411] * fftmag[204] +
|
||||
melcoe[412] * fftmag[205] + melcoe[413] * fftmag[206] +
|
||||
melcoe[414] * fftmag[207] + melcoe[415] * fftmag[208] +
|
||||
melcoe[416] * fftmag[209] + melcoe[417] * fftmag[210] +
|
||||
melcoe[418] * fftmag[211] + melcoe[419] * fftmag[212] +
|
||||
melcoe[420] * fftmag[213] + melcoe[421] * fftmag[214] +
|
||||
melcoe[422] * fftmag[215] + melcoe[423] * fftmag[216];
|
||||
dout[75] = melcoe[424] * fftmag[210] + melcoe[425] * fftmag[211] +
|
||||
melcoe[426] * fftmag[212] + melcoe[427] * fftmag[213] +
|
||||
melcoe[428] * fftmag[214] + melcoe[429] * fftmag[215] +
|
||||
melcoe[430] * fftmag[216] + melcoe[431] * fftmag[217] +
|
||||
melcoe[432] * fftmag[218] + melcoe[433] * fftmag[219] +
|
||||
melcoe[434] * fftmag[220] + melcoe[435] * fftmag[221] +
|
||||
melcoe[436] * fftmag[222] + melcoe[437] * fftmag[223];
|
||||
dout[76] = melcoe[438] * fftmag[217] + melcoe[439] * fftmag[218] +
|
||||
melcoe[440] * fftmag[219] + melcoe[441] * fftmag[220] +
|
||||
melcoe[442] * fftmag[221] + melcoe[443] * fftmag[222] +
|
||||
melcoe[444] * fftmag[223] + melcoe[445] * fftmag[224] +
|
||||
melcoe[446] * fftmag[225] + melcoe[447] * fftmag[226] +
|
||||
melcoe[448] * fftmag[227] + melcoe[449] * fftmag[228] +
|
||||
melcoe[450] * fftmag[229] + melcoe[451] * fftmag[230] +
|
||||
melcoe[452] * fftmag[231];
|
||||
dout[77] = melcoe[453] * fftmag[224] + melcoe[454] * fftmag[225] +
|
||||
melcoe[455] * fftmag[226] + melcoe[456] * fftmag[227] +
|
||||
melcoe[457] * fftmag[228] + melcoe[458] * fftmag[229] +
|
||||
melcoe[459] * fftmag[230] + melcoe[460] * fftmag[231] +
|
||||
melcoe[461] * fftmag[232] + melcoe[462] * fftmag[233] +
|
||||
melcoe[463] * fftmag[234] + melcoe[464] * fftmag[235] +
|
||||
melcoe[465] * fftmag[236] + melcoe[466] * fftmag[237] +
|
||||
melcoe[467] * fftmag[238] + melcoe[468] * fftmag[239];
|
||||
dout[78] = melcoe[469] * fftmag[232] + melcoe[470] * fftmag[233] +
|
||||
melcoe[471] * fftmag[234] + melcoe[472] * fftmag[235] +
|
||||
melcoe[473] * fftmag[236] + melcoe[474] * fftmag[237] +
|
||||
melcoe[475] * fftmag[238] + melcoe[476] * fftmag[239] +
|
||||
melcoe[477] * fftmag[240] + melcoe[478] * fftmag[241] +
|
||||
melcoe[479] * fftmag[242] + melcoe[480] * fftmag[243] +
|
||||
melcoe[481] * fftmag[244] + melcoe[482] * fftmag[245] +
|
||||
melcoe[483] * fftmag[246] + melcoe[484] * fftmag[247];
|
||||
dout[79] = melcoe[485] * fftmag[240] + melcoe[486] * fftmag[241] +
|
||||
melcoe[487] * fftmag[242] + melcoe[488] * fftmag[243] +
|
||||
melcoe[489] * fftmag[244] + melcoe[490] * fftmag[245] +
|
||||
melcoe[491] * fftmag[246] + melcoe[492] * fftmag[247] +
|
||||
melcoe[493] * fftmag[248] + melcoe[494] * fftmag[249] +
|
||||
melcoe[495] * fftmag[250] + melcoe[496] * fftmag[251] +
|
||||
melcoe[497] * fftmag[252] + melcoe[498] * fftmag[253] +
|
||||
melcoe[499] * fftmag[254] + melcoe[500] * fftmag[255];
|
||||
global_cmvn(dout);
|
||||
}
|
||||
@ -1,35 +0,0 @@
|
||||
|
||||
#ifndef FEATUREEXTRACT_H
|
||||
#define FEATUREEXTRACT_H
|
||||
|
||||
#include <fftw3.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "FeatureQueue.h"
|
||||
#include "SpeechWrap.h"
|
||||
#include "Tensor.h"
|
||||
|
||||
class FeatureExtract {
|
||||
private:
|
||||
SpeechWrap speech;
|
||||
FeatureQueue fqueue;
|
||||
int mode;
|
||||
int fft_size = 512;
|
||||
int window_size = 400;
|
||||
int window_shift = 160;
|
||||
|
||||
//void fftw_init();
|
||||
void melspect(float *din, float *dout);
|
||||
void global_cmvn(float *din);
|
||||
|
||||
public:
|
||||
FeatureExtract(int mode);
|
||||
~FeatureExtract();
|
||||
int size();
|
||||
//int status();
|
||||
void reset();
|
||||
void insert(fftwf_plan plan, float *din, int len, int flag);
|
||||
bool fetch(Tensor<float> *&dout);
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -122,7 +122,6 @@ extern "C" {
|
||||
FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
|
||||
pResult->snippet_time = audio.get_time_len();
|
||||
while (audio.fetch(buff, len, flag) > 0) {
|
||||
//pRecogObj->reset();
|
||||
string msg = pRecogObj->forward(buff, len, flag);
|
||||
pResult->msg+= msg;
|
||||
nStep++;
|
||||
|
||||
@ -18,10 +18,16 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize)
|
||||
cmvn_path = pathAppend(path, "am.mvn");
|
||||
config_path = pathAppend(path, "config.yaml");
|
||||
|
||||
fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
|
||||
fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
|
||||
memset(fft_input, 0, sizeof(float) * fft_size);
|
||||
plan = fftwf_plan_dft_r2c_1d(fft_size, fft_input, fft_out, FFTW_ESTIMATE);
|
||||
// knf options
|
||||
fbank_opts.frame_opts.dither = 0;
|
||||
fbank_opts.mel_opts.num_bins = 80;
|
||||
fbank_opts.frame_opts.samp_freq = model_sample_rate;
|
||||
fbank_opts.frame_opts.window_type = "hamming";
|
||||
fbank_opts.frame_opts.frame_shift_ms = 10;
|
||||
fbank_opts.frame_opts.frame_length_ms = 25;
|
||||
fbank_opts.energy_floor = 0;
|
||||
fbank_opts.mel_opts.debug_mel = false;
|
||||
//fbank_ = std::make_unique<knf::OnlineFbank>(fbank_opts);
|
||||
|
||||
//sessionOptions.SetInterOpNumThreads(1);
|
||||
sessionOptions.SetIntraOpNumThreads(nNumThread);
|
||||
@ -57,38 +63,28 @@ ModelImp::~ModelImp()
|
||||
{
|
||||
if(vocab)
|
||||
delete vocab;
|
||||
fftwf_free(fft_input);
|
||||
fftwf_free(fft_out);
|
||||
fftwf_destroy_plan(plan);
|
||||
fftwf_cleanup();
|
||||
}
|
||||
|
||||
void ModelImp::reset()
|
||||
{
|
||||
}
|
||||
|
||||
void ModelImp::apply_lfr(Tensor<float>*& din)
|
||||
{
|
||||
int mm = din->size[2];
|
||||
int ll = ceil(mm / 6.0);
|
||||
Tensor<float>* tmp = new Tensor<float>(ll, 560);
|
||||
int out_offset = 0;
|
||||
for (int i = 0; i < ll; i++) {
|
||||
for (int j = 0; j < 7; j++) {
|
||||
int idx = i * 6 + j - 3;
|
||||
if (idx < 0) {
|
||||
idx = 0;
|
||||
}
|
||||
if (idx >= mm) {
|
||||
idx = mm - 1;
|
||||
}
|
||||
memcpy(tmp->buff + out_offset, din->buff + idx * 80,
|
||||
sizeof(float) * 80);
|
||||
out_offset += 80;
|
||||
}
|
||||
vector<float> ModelImp::FbankKaldi(float sample_rate, const float* waves, int len) {
|
||||
knf::OnlineFbank fbank_(fbank_opts);
|
||||
fbank_.AcceptWaveform(sample_rate, waves, len);
|
||||
//fbank_->InputFinished();
|
||||
int32_t frames = fbank_.NumFramesReady();
|
||||
int32_t feature_dim = fbank_opts.mel_opts.num_bins;
|
||||
vector<float> features(frames * feature_dim);
|
||||
float *p = features.data();
|
||||
|
||||
for (int32_t i = 0; i != frames; ++i) {
|
||||
const float *f = fbank_.GetFrame(i);
|
||||
std::copy(f, f + feature_dim, p);
|
||||
p += feature_dim;
|
||||
}
|
||||
delete din;
|
||||
din = tmp;
|
||||
|
||||
return features;
|
||||
}
|
||||
|
||||
void ModelImp::load_cmvn(const char *filename)
|
||||
@ -124,24 +120,6 @@ void ModelImp::load_cmvn(const char *filename)
|
||||
}
|
||||
}
|
||||
|
||||
void ModelImp::apply_cmvn(Tensor<float>* din)
|
||||
{
|
||||
const float* var;
|
||||
const float* mean;
|
||||
var = vars_list.data();
|
||||
mean= means_list.data();
|
||||
|
||||
int m = din->size[2];
|
||||
int n = din->size[3];
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
for (int j = 0; j < n; j++) {
|
||||
int idx = i * n + j;
|
||||
din->buff[idx] = (din->buff[idx] + mean[j]) * var[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string ModelImp::greedy_search(float * in, int nLen )
|
||||
{
|
||||
vector<int> hyps;
|
||||
@ -156,16 +134,115 @@ string ModelImp::greedy_search(float * in, int nLen )
|
||||
return vocab->vector2stringV2(hyps);
|
||||
}
|
||||
|
||||
vector<float> ModelImp::ApplyLFR(const std::vector<float> &in)
|
||||
{
|
||||
int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
|
||||
int32_t in_num_frames = in.size() / in_feat_dim;
|
||||
int32_t out_num_frames =
|
||||
(in_num_frames - lfr_window_size) / lfr_window_shift + 1;
|
||||
int32_t out_feat_dim = in_feat_dim * lfr_window_size;
|
||||
|
||||
std::vector<float> out(out_num_frames * out_feat_dim);
|
||||
|
||||
const float *p_in = in.data();
|
||||
float *p_out = out.data();
|
||||
|
||||
for (int32_t i = 0; i != out_num_frames; ++i) {
|
||||
std::copy(p_in, p_in + out_feat_dim, p_out);
|
||||
|
||||
p_out += out_feat_dim;
|
||||
p_in += lfr_window_shift * in_feat_dim;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void ModelImp::ApplyCMVN(std::vector<float> *v)
|
||||
{
|
||||
int32_t dim = means_list.size();
|
||||
int32_t num_frames = v->size() / dim;
|
||||
|
||||
float *p = v->data();
|
||||
|
||||
for (int32_t i = 0; i != num_frames; ++i) {
|
||||
for (int32_t k = 0; k != dim; ++k) {
|
||||
p[k] = (p[k] + means_list[k]) * vars_list[k];
|
||||
}
|
||||
|
||||
p += dim;
|
||||
}
|
||||
}
|
||||
|
||||
// void ParaformerOnnxAsrModel::ForwardFunc(
|
||||
// const std::vector<std::vector<float>>& chunk_feats,
|
||||
// std::vector<std::vector<float>>* out_prob) {
|
||||
// Ort::MemoryInfo memory_info =
|
||||
// Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
|
||||
// // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
|
||||
// // chunk
|
||||
// // int num_frames = cached_feature_.size() + chunk_feats.size();
|
||||
// int num_frames = chunk_feats.size();
|
||||
// const int feature_dim = chunk_feats[0].size();
|
||||
|
||||
// // 2. Generate 2 input nodes tensor
|
||||
// // speech node { batch,frame number,feature dim }
|
||||
// const int64_t paraformer_feats_shape[3] = {1, num_frames, feature_dim};
|
||||
// std::vector<float> paraformer_feats;
|
||||
// for (const auto & chunk_feat : chunk_feats) {
|
||||
// paraformer_feats.insert(paraformer_feats.end(), chunk_feat.begin(), chunk_feat.end());
|
||||
// }
|
||||
// Ort::Value paraformer_feats_ort = Ort::Value::CreateTensor<float>(
|
||||
// memory_info, paraformer_feats.data(), paraformer_feats.size(), paraformer_feats_shape, 3);
|
||||
// // speech_lengths node {speech length,}
|
||||
// const int64_t paraformer_length_shape[1] = {1};
|
||||
// std::vector<int32_t> paraformer_length;
|
||||
// paraformer_length.emplace_back(num_frames);
|
||||
// Ort::Value paraformer_length_ort = Ort::Value::CreateTensor<int32_t>(
|
||||
// memory_info, paraformer_length.data(), paraformer_length.size(), paraformer_length_shape, 1);
|
||||
|
||||
// // 3. Put nodes into onnx input vector
|
||||
// std::vector<Ort::Value> paraformer_inputs;
|
||||
// paraformer_inputs.emplace_back(std::move(paraformer_feats_ort));
|
||||
// paraformer_inputs.emplace_back(std::move(paraformer_length_ort));
|
||||
|
||||
// // 4. Onnx infer
|
||||
// std::vector<Ort::Value> paraformer_ort_outputs;
|
||||
// try{
|
||||
// VLOG(3) << "Start infer";
|
||||
// paraformer_ort_outputs = paraformer_session_->Run(
|
||||
// Ort::RunOptions{nullptr}, paraformer_in_names_.data(), paraformer_inputs.data(),
|
||||
// paraformer_inputs.size(), paraformer_out_names_.data(), paraformer_out_names_.size());
|
||||
// }catch (std::exception const& e) {
|
||||
// // Catch "Non-zero status code returned error",usually because there is no asr result.
|
||||
// // Need funasr to resolve.
|
||||
// LOG(ERROR) << e.what();
|
||||
// return;
|
||||
// }
|
||||
|
||||
// // 5. Change infer result to output shapes
|
||||
// float* logp_data = paraformer_ort_outputs[0].GetTensorMutableData<float>();
|
||||
// auto type_info = paraformer_ort_outputs[0].GetTensorTypeAndShapeInfo();
|
||||
|
||||
// int num_outputs = type_info.GetShape()[1];
|
||||
// int output_dim = type_info.GetShape()[2];
|
||||
// out_prob->resize(num_outputs);
|
||||
// for (int i = 0; i < num_outputs; i++) {
|
||||
// (*out_prob)[i].resize(output_dim);
|
||||
// memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
|
||||
// sizeof(float) * output_dim);
|
||||
// }
|
||||
// }
|
||||
|
||||
string ModelImp::forward(float* din, int len, int flag)
|
||||
{
|
||||
Tensor<float>* in;
|
||||
FeatureExtract* fe = new FeatureExtract(3);
|
||||
fe->reset();
|
||||
fe->insert(plan, din, len, flag);
|
||||
fe->fetch(in);
|
||||
apply_lfr(in);
|
||||
apply_cmvn(in);
|
||||
Ort::RunOptions run_option;
|
||||
|
||||
int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
|
||||
std::vector<float> wav_feats = FbankKaldi(model_sample_rate, din, len);
|
||||
wav_feats = ApplyLFR(wav_feats);
|
||||
ApplyCMVN(&wav_feats);
|
||||
|
||||
int32_t feat_dim = lfr_window_size*in_feat_dim;
|
||||
int32_t num_frames = wav_feats.size() / feat_dim;
|
||||
|
||||
#ifdef _WIN_X86
|
||||
Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
|
||||
@ -173,29 +250,26 @@ string ModelImp::forward(float* din, int len, int flag)
|
||||
Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
||||
#endif
|
||||
|
||||
std::array<int64_t, 3> input_shape_{ in->size[0],in->size[2],in->size[3] };
|
||||
const int64_t input_shape_[3] = {1, num_frames, feat_dim};
|
||||
Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
|
||||
in->buff,
|
||||
in->buff_size,
|
||||
input_shape_.data(),
|
||||
input_shape_.size());
|
||||
wav_feats.data(),
|
||||
wav_feats.size(),
|
||||
input_shape_,
|
||||
3);
|
||||
|
||||
std::vector<int32_t> feats_len{ in->size[2] };
|
||||
std::vector<int64_t> feats_len_dim{ 1 };
|
||||
Ort::Value onnx_feats_len = Ort::Value::CreateTensor(
|
||||
m_memoryInfo,
|
||||
feats_len.data(),
|
||||
feats_len.size() * sizeof(int32_t),
|
||||
feats_len_dim.data(),
|
||||
feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
|
||||
const int64_t paraformer_length_shape[1] = {1};
|
||||
std::vector<int32_t> paraformer_length;
|
||||
paraformer_length.emplace_back(num_frames);
|
||||
Ort::Value onnx_feats_len = Ort::Value::CreateTensor<int32_t>(
|
||||
m_memoryInfo, paraformer_length.data(), paraformer_length.size(), paraformer_length_shape, 1);
|
||||
|
||||
std::vector<Ort::Value> input_onnx;
|
||||
input_onnx.emplace_back(std::move(onnx_feats));
|
||||
input_onnx.emplace_back(std::move(onnx_feats_len));
|
||||
|
||||
string result;
|
||||
try {
|
||||
|
||||
auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
|
||||
auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), input_onnx.size(), m_szOutputNames.data(), m_szOutputNames.size());
|
||||
std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||
|
||||
int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
|
||||
@ -208,15 +282,6 @@ string ModelImp::forward(float* din, int len, int flag)
|
||||
result = "";
|
||||
}
|
||||
|
||||
if(in){
|
||||
delete in;
|
||||
in = nullptr;
|
||||
}
|
||||
if(fe){
|
||||
delete fe;
|
||||
fe = nullptr;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -4,23 +4,26 @@
|
||||
#ifndef PARAFORMER_MODELIMP_H
|
||||
#define PARAFORMER_MODELIMP_H
|
||||
|
||||
#include "kaldi-native-fbank/csrc/feature-fbank.h"
|
||||
#include "kaldi-native-fbank/csrc/online-feature.h"
|
||||
|
||||
namespace paraformer {
|
||||
|
||||
class ModelImp : public Model {
|
||||
private:
|
||||
int fft_size=512;
|
||||
float *fft_input;
|
||||
fftwf_complex *fft_out;
|
||||
fftwf_plan plan;
|
||||
//std::unique_ptr<knf::OnlineFbank> fbank_;
|
||||
knf::FbankOptions fbank_opts;
|
||||
|
||||
Vocab* vocab;
|
||||
vector<float> means_list;
|
||||
vector<float> vars_list;
|
||||
const float scale = 22.6274169979695;
|
||||
int32_t lfr_window_size = 7;
|
||||
int32_t lfr_window_shift = 6;
|
||||
|
||||
void apply_lfr(Tensor<float>*& din);
|
||||
void apply_cmvn(Tensor<float>* din);
|
||||
void load_cmvn(const char *filename);
|
||||
vector<float> ApplyLFR(const vector<float> &in);
|
||||
void ApplyCMVN(vector<float> *v);
|
||||
|
||||
string greedy_search( float* in, int nLen);
|
||||
|
||||
@ -36,6 +39,7 @@ namespace paraformer {
|
||||
ModelImp(const char* path, int nNumThread=0, bool quantize=false);
|
||||
~ModelImp();
|
||||
void reset();
|
||||
vector<float> FbankKaldi(float sample_rate, const float* waves, int len);
|
||||
string forward_chunk(float* din, int len, int flag);
|
||||
string forward(float* din, int len, int flag);
|
||||
string rescoring();
|
||||
|
||||
@ -40,7 +40,6 @@ using namespace std;
|
||||
#include "Tensor.h"
|
||||
#include "util.h"
|
||||
#include "CommonStruct.h"
|
||||
#include "FeatureExtract.h"
|
||||
#include "FeatureQueue.h"
|
||||
#include "SpeechWrap.h"
|
||||
#include <Audio.h>
|
||||
|
||||
@ -14,7 +14,6 @@ using namespace std;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
if (argc < 4)
|
||||
{
|
||||
printf("Usage: %s /path/to/model_dir /path/to/wav/file quantize(true or false) \n", argv[0]);
|
||||
@ -22,7 +21,7 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
int nThreadNum = 4;
|
||||
int nThreadNum = 1;
|
||||
// is quantize
|
||||
bool quantize = false;
|
||||
istringstream(argv[3]) >> boolalpha >> quantize;
|
||||
|
||||
@ -11,14 +11,77 @@
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
using namespace std;
|
||||
|
||||
std::atomic<int> index(0);
|
||||
std::mutex mtx;
|
||||
|
||||
void runReg(FUNASR_HANDLE AsrHanlde, vector<string> wav_list,
|
||||
float* total_length, long* total_time, int core_id) {
|
||||
|
||||
// cpu_set_t cpuset;
|
||||
// CPU_ZERO(&cpuset);
|
||||
// CPU_SET(core_id, &cpuset);
|
||||
// if(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) < 0){
|
||||
// perror("pthread_setaffinity_np");
|
||||
// }
|
||||
|
||||
struct timeval start, end;
|
||||
long seconds = 0;
|
||||
float n_total_length = 0.0f;
|
||||
long n_total_time = 0;
|
||||
|
||||
// warm up
|
||||
for (size_t i = 0; i < 1; i++)
|
||||
{
|
||||
FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[0].c_str(), RASR_NONE, NULL);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
// 使用原子变量获取索引并递增
|
||||
int i = index.fetch_add(1);
|
||||
if (i >= wav_list.size()) {
|
||||
break;
|
||||
}
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[i].c_str(), RASR_NONE, NULL);
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
n_total_time += taking_micros;
|
||||
|
||||
if(Result){
|
||||
string msg = FunASRGetResult(Result, 0);
|
||||
printf("Thread: %d Result: %s \n", this_thread::get_id(), msg.c_str());
|
||||
|
||||
float snippet_time = FunASRGetRetSnippetTime(Result);
|
||||
n_total_length += snippet_time;
|
||||
FunASRFreeResult(Result);
|
||||
}else{
|
||||
cout <<"No return data!";
|
||||
}
|
||||
|
||||
}
|
||||
{
|
||||
lock_guard<mutex> guard(mtx);
|
||||
*total_length += n_total_length;
|
||||
if(*total_time < n_total_time){
|
||||
*total_time = n_total_time;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
if (argc < 4)
|
||||
if (argc < 5)
|
||||
{
|
||||
printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) \n", argv[0]);
|
||||
printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) nThreadNum \n", argv[0]);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
@ -42,12 +105,14 @@ int main(int argc, char *argv[])
|
||||
// model init
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
int nThreadNum = 1;
|
||||
// is quantize
|
||||
bool quantize = false;
|
||||
istringstream(argv[3]) >> boolalpha >> quantize;
|
||||
// thread num
|
||||
int nThreadNum = 1;
|
||||
nThreadNum = atoi(argv[4]);
|
||||
|
||||
FUNASR_HANDLE AsrHanlde=FunASRInit(argv[1], nThreadNum, quantize);
|
||||
FUNASR_HANDLE AsrHanlde=FunASRInit(argv[1], 1, quantize);
|
||||
if (!AsrHanlde)
|
||||
{
|
||||
printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
|
||||
@ -58,36 +123,19 @@ int main(int argc, char *argv[])
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
|
||||
|
||||
// warm up
|
||||
for (size_t i = 0; i < 30; i++)
|
||||
// 多线程测试
|
||||
float total_length = 0.0f;
|
||||
long total_time = 0;
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
for (int i = 0; i < nThreadNum; i++)
|
||||
{
|
||||
FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[0].c_str(), RASR_NONE, NULL);
|
||||
threads.emplace_back(thread(runReg, AsrHanlde, wav_list, &total_length, &total_time, i));
|
||||
}
|
||||
|
||||
// forward
|
||||
float snippet_time = 0.0f;
|
||||
float total_length = 0.0f;
|
||||
long total_time = 0.0f;
|
||||
|
||||
for (size_t i = 0; i < wav_list.size(); i++)
|
||||
for (auto& thread : threads)
|
||||
{
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[i].c_str(), RASR_NONE, NULL);
|
||||
gettimeofday(&end, NULL);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
total_time += taking_micros;
|
||||
|
||||
if(Result){
|
||||
string msg = FunASRGetResult(Result, 0);
|
||||
printf("Result: %s \n", msg.c_str());
|
||||
|
||||
snippet_time = FunASRGetRetSnippetTime(Result);
|
||||
total_length += snippet_time;
|
||||
FunASRFreeResult(Result);
|
||||
}else{
|
||||
cout <<"No return data!";
|
||||
}
|
||||
thread.join();
|
||||
}
|
||||
|
||||
printf("total_time_wav %ld ms.\n", (long)(total_length * 1000));
|
||||
|
||||
Loading…
Reference in New Issue
Block a user