mirror of
https://github.com/openai/whisper.git
synced 2025-09-15 15:18:35 +08:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c0d2f624c0 | ||
|
|
db7fbc75fe | ||
|
|
31243bad24 | ||
|
|
1f8fc975d3 | ||
|
|
679ae1d141 | ||
|
|
f50c4f264e | ||
|
|
86899243e9 | ||
|
|
5dff4db81a | ||
|
|
dd985ac4b9 | ||
|
|
e1e6aa60ff | ||
|
|
e6a5fc0ff0 | ||
|
|
13907bed90 | ||
|
|
517a43ecd1 | ||
|
|
dd4d010d2c | ||
|
|
26a7cacc83 | ||
|
|
6c1d8f1ea1 | ||
|
|
90db0de189 | ||
|
|
fc5ded7d90 | ||
|
|
173ff7dd1d | ||
|
|
271445b2f2 | ||
|
|
5979f03701 | ||
|
|
cdb8147962 |
13
.github/dependabot.yml
vendored
Normal file
13
.github/dependabot.yml
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
# Keep GitHub Actions up to date with GitHub's Dependabot...
|
||||
# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
|
||||
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: github-actions
|
||||
directory: /
|
||||
groups:
|
||||
github-actions:
|
||||
patterns:
|
||||
- "*" # Group all Actions updates into a single larger pull request
|
||||
schedule:
|
||||
interval: weekly
|
||||
12
.github/workflows/python-publish.yml
vendored
12
.github/workflows/python-publish.yml
vendored
@ -8,23 +8,23 @@ jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions-ecosystem/action-regex-match@v2
|
||||
id: regex-match
|
||||
with:
|
||||
text: ${{ github.event.head_commit.message }}
|
||||
regex: '^Release ([^ ]+)'
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.8'
|
||||
python-version: '3.12'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install setuptools wheel twine
|
||||
pip install setuptools wheel twine build
|
||||
- name: Release
|
||||
if: ${{ steps.regex-match.outputs.match != '' }}
|
||||
uses: softprops/action-gh-release@v1
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: v${{ steps.regex-match.outputs.group1 }}
|
||||
- name: Build and publish
|
||||
@ -33,5 +33,5 @@ jobs:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: |
|
||||
python setup.py sdist
|
||||
python -m build --sdist
|
||||
twine upload dist/*
|
||||
|
||||
24
.github/workflows/test.yml
vendored
24
.github/workflows/test.yml
vendored
@ -11,19 +11,19 @@ jobs:
|
||||
pre-commit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch base branch
|
||||
run: git fetch origin ${{ github.base_ref }}
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.8"
|
||||
python-version: "3.9"
|
||||
architecture: x64
|
||||
- name: Get pip cache dir
|
||||
id: pip-cache
|
||||
run: |
|
||||
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
|
||||
- name: pip/pre-commit cache
|
||||
uses: actions/cache@v3
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
${{ steps.pip-cache.outputs.dir }}
|
||||
@ -33,15 +33,19 @@ jobs:
|
||||
${{ runner.os }}-pip-pre-commit
|
||||
- name: pre-commit
|
||||
run: |
|
||||
pip install -U pre-commit
|
||||
pip install --upgrade pre-commit
|
||||
pre-commit install --install-hooks
|
||||
pre-commit run --all-files
|
||||
whisper-test:
|
||||
needs: pre-commit
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- python-version: '3.8'
|
||||
pytorch-version: 1.10.1
|
||||
numpy-requirement: "'numpy<2'"
|
||||
- python-version: '3.8'
|
||||
pytorch-version: 1.13.1
|
||||
numpy-requirement: "'numpy<2'"
|
||||
@ -60,10 +64,16 @@ jobs:
|
||||
- python-version: '3.12'
|
||||
pytorch-version: 2.4.1
|
||||
numpy-requirement: "'numpy'"
|
||||
- python-version: '3.12'
|
||||
pytorch-version: 2.5.1
|
||||
numpy-requirement: "'numpy'"
|
||||
- python-version: '3.13'
|
||||
pytorch-version: 2.5.1
|
||||
numpy-requirement: "'numpy'"
|
||||
steps:
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
- uses: conda-incubator/setup-miniconda@v3
|
||||
- run: conda install -n test ffmpeg python=${{ matrix.python-version }}
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- run: echo "$CONDA/envs/test/bin" >> $GITHUB_PATH
|
||||
- run: pip3 install .["dev"] ${{ matrix.numpy-requirement }} torch==${{ matrix.pytorch-version }}+cpu --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple
|
||||
- run: pytest --durations=0 -vv -k 'not test_transcribe or test_transcribe[tiny] or test_transcribe[tiny.en]' -m 'not requires_cuda'
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.0.1
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-json
|
||||
- id: end-of-file-fixer
|
||||
@ -11,17 +11,17 @@ repos:
|
||||
- id: check-added-large-files
|
||||
args: [--maxkb=4096]
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 23.7.0
|
||||
rev: 25.1.0
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.12.0
|
||||
rev: 6.0.0
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort (python)
|
||||
args: ["--profile", "black", "-l", "88", "--trailing-comma", "--multi-line", "3"]
|
||||
- repo: https://github.com/pycqa/flake8.git
|
||||
rev: 6.0.0
|
||||
rev: 7.1.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
types: [python]
|
||||
|
||||
21
CHANGELOG.md
21
CHANGELOG.md
@ -1,5 +1,26 @@
|
||||
# CHANGELOG
|
||||
|
||||
## [v20250625](https://github.com/openai/whisper/releases/tag/v20250625)
|
||||
|
||||
* Fix: Update torch.load to use weights_only=True to prevent security w… ([#2451](https://github.com/openai/whisper/pull/2451))
|
||||
* Fix: Ensure DTW cost tensor is on the same device as input tensor ([#2561](https://github.com/openai/whisper/pull/2561))
|
||||
* docs: updated README to specify translation model limitation ([#2547](https://github.com/openai/whisper/pull/2547))
|
||||
* Fixed triton kernel update to support latest triton versions ([#2588](https://github.com/openai/whisper/pull/2588))
|
||||
* Fix: GitHub display errors for Jupyter notebooks ([#2589](https://github.com/openai/whisper/pull/2589))
|
||||
* Bump the github-actions group with 3 updates ([#2592](https://github.com/openai/whisper/pull/2592))
|
||||
* Keep GitHub Actions up to date with GitHub's Dependabot ([#2486](https://github.com/openai/whisper/pull/2486))
|
||||
* pre-commit: Upgrade black v25.1.0 and isort v6.0.0 ([#2514](https://github.com/openai/whisper/pull/2514))
|
||||
* GitHub Actions: Add Python 3.13 to the testing ([#2487](https://github.com/openai/whisper/pull/2487))
|
||||
* PEP 621: Migrate from setup.py to pyproject.toml ([#2435](https://github.com/openai/whisper/pull/2435))
|
||||
* pre-commit autoupdate && pre-commit run --all-files ([#2484](https://github.com/openai/whisper/pull/2484))
|
||||
* Upgrade GitHub Actions ([#2430](https://github.com/openai/whisper/pull/2430))
|
||||
* Bugfix: Illogical "Avoid computing higher temperatures on no_speech" ([#1903](https://github.com/openai/whisper/pull/1903))
|
||||
* Updating README and doc strings to reflect that n_mels can now be 128 ([#2049](https://github.com/openai/whisper/pull/2049))
|
||||
* fix typo data/README.md ([#2433](https://github.com/openai/whisper/pull/2433))
|
||||
* Update README.md ([#2379](https://github.com/openai/whisper/pull/2379))
|
||||
* Add option to carry initial_prompt with the sliding window ([#2343](https://github.com/openai/whisper/pull/2343))
|
||||
* more pytorch versions in tests ([#2408](https://github.com/openai/whisper/pull/2408))
|
||||
|
||||
## [v20240930](https://github.com/openai/whisper/releases/tag/v20240930)
|
||||
|
||||
* allowing numpy 2 in tests ([#2362](https://github.com/openai/whisper/pull/2362))
|
||||
|
||||
22
README.md
22
README.md
@ -77,25 +77,35 @@ Whisper's performance varies widely depending on the language. The figure below
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
## Command-line usage
|
||||
|
||||
The following command will transcribe speech in audio files, using the `turbo` model:
|
||||
|
||||
```bash
|
||||
whisper audio.flac audio.mp3 audio.wav --model turbo
|
||||
```
|
||||
|
||||
The default setting (which selects the `small` model) works well for transcribing English. To transcribe an audio file containing non-English speech, you can specify the language using the `--language` option:
|
||||
The default setting (which selects the `turbo` model) works well for transcribing English. However, **the `turbo` model is not trained for translation tasks**. If you need to **translate non-English speech into English**, use one of the **multilingual models** (`tiny`, `base`, `small`, `medium`, `large`) instead of `turbo`.
|
||||
|
||||
For example, to transcribe an audio file containing non-English speech, you can specify the language:
|
||||
|
||||
```bash
|
||||
whisper japanese.wav --language Japanese
|
||||
```
|
||||
|
||||
Adding `--task translate` will translate the speech into English:
|
||||
To **translate** speech into English, use:
|
||||
|
||||
whisper japanese.wav --language Japanese --task translate
|
||||
```bash
|
||||
whisper japanese.wav --model medium --language Japanese --task translate
|
||||
```
|
||||
|
||||
> **Note:** The `turbo` model will return the original language even if `--task translate` is specified. Use `medium` or `large` for the best translation results.
|
||||
|
||||
Run the following to view all available options:
|
||||
|
||||
```bash
|
||||
whisper --help
|
||||
```
|
||||
|
||||
See [tokenizer.py](https://github.com/openai/whisper/blob/main/whisper/tokenizer.py) for the list of all available languages.
|
||||
|
||||
@ -126,7 +136,7 @@ audio = whisper.load_audio("audio.mp3")
|
||||
audio = whisper.pad_or_trim(audio)
|
||||
|
||||
# make log-Mel spectrogram and move to the same device as the model
|
||||
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
||||
mel = whisper.log_mel_spectrogram(audio, n_mels=model.dims.n_mels).to(model.device)
|
||||
|
||||
# detect the spoken language
|
||||
_, probs = model.detect_language(mel)
|
||||
|
||||
@ -45,7 +45,7 @@ We downloaded the [CHiME-5 dataset](https://spandh.dcs.shef.ac.uk//chime_challen
|
||||
|
||||
### AMI-IHM, AMI-SDM1
|
||||
|
||||
We preprocessed the [AMI Corpus](https://groups.inf.ed.ac.uk/ami/corpus/overview.shtml) by following the stage 0 ad 2 of the [s5b recipe](https://github.com/kaldi-asr/kaldi/tree/master/egs/ami/s5b).
|
||||
We preprocessed the [AMI Corpus](https://groups.inf.ed.ac.uk/ami/corpus/overview.shtml) by following the stage 0 and 2 of the [s5b recipe](https://github.com/kaldi-asr/kaldi/tree/master/egs/ami/s5b).
|
||||
|
||||
|
||||
## Long-form English-only datasets
|
||||
|
||||
3
notebooks/LibriSpeech.ipynb
generated
3
notebooks/LibriSpeech.ipynb
generated
@ -949,7 +949,8 @@
|
||||
"style": "IPY_MODEL_039b53f2702c4179af7e0548018d0588",
|
||||
"value": " 164/164 [05:08<00:00, 1.86s/it]"
|
||||
}
|
||||
}
|
||||
},
|
||||
"state": {}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
3
notebooks/Multilingual_ASR.ipynb
generated
3
notebooks/Multilingual_ASR.ipynb
generated
@ -4219,7 +4219,8 @@
|
||||
"_view_name": "StyleView",
|
||||
"description_width": ""
|
||||
}
|
||||
}
|
||||
},
|
||||
"state": {}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@ -1,3 +1,50 @@
|
||||
[build-system]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
requires = [ "setuptools>=61.2" ]
|
||||
|
||||
[project]
|
||||
name = "openai-whisper"
|
||||
description = "Robust Speech Recognition via Large-Scale Weak Supervision"
|
||||
readme.content-type = "text/markdown"
|
||||
readme.file = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [ { name = "OpenAI" } ]
|
||||
requires-python = ">=3.8"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
dynamic = [ "version" ]
|
||||
dependencies = [
|
||||
"more-itertools",
|
||||
"numba",
|
||||
"numpy",
|
||||
"tiktoken",
|
||||
"torch",
|
||||
"tqdm",
|
||||
"triton>=2; (platform_machine=='x86_64' and sys_platform=='linux') or sys_platform=='linux2'",
|
||||
]
|
||||
optional-dependencies.dev = [ "black", "flake8", "isort", "pytest", "scipy" ]
|
||||
urls = { Homepage = "https://github.com/openai/whisper" }
|
||||
scripts.whisper = "whisper.transcribe:cli"
|
||||
|
||||
[tool.setuptools]
|
||||
py-modules = [ "whisper" ]
|
||||
include-package-data = true
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = { attr = "whisper.version.__version__" }
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
exclude = [ "tests*" ]
|
||||
namespaces = false
|
||||
|
||||
[tool.black]
|
||||
|
||||
[tool.isort]
|
||||
@ -5,4 +52,3 @@ profile = "black"
|
||||
include_trailing_comma = true
|
||||
line_length = 88
|
||||
multi_line_output = 3
|
||||
|
||||
|
||||
42
setup.py
42
setup.py
@ -1,42 +0,0 @@
|
||||
import platform
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pkg_resources
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
|
||||
def read_version(fname="whisper/version.py"):
|
||||
exec(compile(open(fname, encoding="utf-8").read(), fname, "exec"))
|
||||
return locals()["__version__"]
|
||||
|
||||
|
||||
requirements = []
|
||||
if sys.platform.startswith("linux") and platform.machine() == "x86_64":
|
||||
requirements.append("triton>=2.0.0")
|
||||
|
||||
setup(
|
||||
name="openai-whisper",
|
||||
py_modules=["whisper"],
|
||||
version=read_version(),
|
||||
description="Robust Speech Recognition via Large-Scale Weak Supervision",
|
||||
long_description=open("README.md", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
readme="README.md",
|
||||
python_requires=">=3.8",
|
||||
author="OpenAI",
|
||||
url="https://github.com/openai/whisper",
|
||||
license="MIT",
|
||||
packages=find_packages(exclude=["tests*"]),
|
||||
install_requires=[
|
||||
str(r)
|
||||
for r in pkg_resources.parse_requirements(
|
||||
Path(__file__).with_name("requirements.txt").open()
|
||||
)
|
||||
],
|
||||
entry_points={
|
||||
"console_scripts": ["whisper=whisper.transcribe:cli"],
|
||||
},
|
||||
include_package_data=True,
|
||||
extras_require={"dev": ["pytest", "scipy", "black", "flake8", "isort"]},
|
||||
)
|
||||
@ -147,7 +147,8 @@ def load_model(
|
||||
with (
|
||||
io.BytesIO(checkpoint_file) if in_memory else open(checkpoint_file, "rb")
|
||||
) as fp:
|
||||
checkpoint = torch.load(fp, map_location=device)
|
||||
kwargs = {"weights_only": True} if torch.__version__ >= "1.13" else {}
|
||||
checkpoint = torch.load(fp, map_location=device, **kwargs)
|
||||
del checkpoint_file
|
||||
|
||||
dims = ModelDimensions(**checkpoint["dims"])
|
||||
|
||||
@ -122,7 +122,7 @@ def log_mel_spectrogram(
|
||||
The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
|
||||
|
||||
n_mels: int
|
||||
The number of Mel-frequency filters, only 80 is supported
|
||||
The number of Mel-frequency filters, only 80 and 128 are supported
|
||||
|
||||
padding: int
|
||||
Number of zero samples to pad to the right
|
||||
@ -132,7 +132,7 @@ def log_mel_spectrogram(
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor, shape = (80, n_frames)
|
||||
torch.Tensor, shape = (n_mels, n_frames)
|
||||
A Tensor that contains the Mel spectrogram
|
||||
"""
|
||||
if not torch.is_tensor(audio):
|
||||
|
||||
@ -30,15 +30,19 @@ def remove_symbols_and_diacritics(s: str, keep=""):
|
||||
and drop any diacritics (category 'Mn' and some manual mappings)
|
||||
"""
|
||||
return "".join(
|
||||
(
|
||||
c
|
||||
if c in keep
|
||||
else ADDITIONAL_DIACRITICS[c]
|
||||
else (
|
||||
ADDITIONAL_DIACRITICS[c]
|
||||
if c in ADDITIONAL_DIACRITICS
|
||||
else ""
|
||||
else (
|
||||
""
|
||||
if unicodedata.category(c) == "Mn"
|
||||
else " "
|
||||
if unicodedata.category(c)[0] in "MSP"
|
||||
else c
|
||||
else " " if unicodedata.category(c)[0] in "MSP" else c
|
||||
)
|
||||
)
|
||||
)
|
||||
for c in unicodedata.normalize("NFKD", s)
|
||||
)
|
||||
|
||||
|
||||
@ -117,7 +117,7 @@ def dtw_cuda(x, BLOCK_SIZE=1024):
|
||||
x_skew = x_skew.T.contiguous()
|
||||
cost = torch.ones(N + M + 2, M + 2) * np.inf
|
||||
cost[0, 0] = 0
|
||||
cost = cost.cuda()
|
||||
cost = cost.to(x.device)
|
||||
trace = torch.zeros_like(cost, dtype=torch.int32)
|
||||
|
||||
dtw_kernel[(1,)](
|
||||
|
||||
@ -46,6 +46,7 @@ def transcribe(
|
||||
no_speech_threshold: Optional[float] = 0.6,
|
||||
condition_on_previous_text: bool = True,
|
||||
initial_prompt: Optional[str] = None,
|
||||
carry_initial_prompt: bool = False,
|
||||
word_timestamps: bool = False,
|
||||
prepend_punctuations: str = "\"'“¿([{-",
|
||||
append_punctuations: str = "\"'.。,,!!??::”)]}、",
|
||||
@ -102,6 +103,11 @@ def transcribe(
|
||||
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
|
||||
to make it more likely to predict those word correctly.
|
||||
|
||||
carry_initial_prompt: bool
|
||||
If carry_initial_prompt is True, `initial_prompt` is prepended to the prompt of each internal
|
||||
`decode()` call. If there is not enough context space at the start of the prompt, it is
|
||||
left-sliced to make space.
|
||||
|
||||
decode_options: dict
|
||||
Keyword arguments to construct `DecodingOptions` instances
|
||||
|
||||
@ -208,6 +214,8 @@ def transcribe(
|
||||
if (
|
||||
no_speech_threshold is not None
|
||||
and decode_result.no_speech_prob > no_speech_threshold
|
||||
and logprob_threshold is not None
|
||||
and decode_result.avg_logprob < logprob_threshold
|
||||
):
|
||||
needs_fallback = False # silence
|
||||
if not needs_fallback:
|
||||
@ -227,9 +235,11 @@ def transcribe(
|
||||
all_segments = []
|
||||
prompt_reset_since = 0
|
||||
|
||||
remaining_prompt_length = model.dims.n_text_ctx // 2 - 1
|
||||
if initial_prompt is not None:
|
||||
initial_prompt_tokens = tokenizer.encode(" " + initial_prompt.strip())
|
||||
all_tokens.extend(initial_prompt_tokens)
|
||||
remaining_prompt_length -= len(initial_prompt_tokens)
|
||||
else:
|
||||
initial_prompt_tokens = []
|
||||
|
||||
@ -275,7 +285,13 @@ def transcribe(
|
||||
segment_duration = segment_size * HOP_LENGTH / SAMPLE_RATE
|
||||
mel_segment = pad_or_trim(mel_segment, N_FRAMES).to(model.device).to(dtype)
|
||||
|
||||
if carry_initial_prompt:
|
||||
nignored = max(len(initial_prompt_tokens), prompt_reset_since)
|
||||
remaining_prompt = all_tokens[nignored:][-remaining_prompt_length:]
|
||||
decode_options["prompt"] = initial_prompt_tokens + remaining_prompt
|
||||
else:
|
||||
decode_options["prompt"] = all_tokens[prompt_reset_since:]
|
||||
|
||||
result: DecodingResult = decode_with_fallback(mel_segment)
|
||||
tokens = torch.tensor(result.tokens)
|
||||
|
||||
@ -529,6 +545,8 @@ def cli():
|
||||
|
||||
parser.add_argument("--suppress_tokens", type=str, default="-1", help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations")
|
||||
parser.add_argument("--initial_prompt", type=str, default=None, help="optional text to provide as a prompt for the first window.")
|
||||
parser.add_argument("--carry_initial_prompt", type=str2bool, default=False, help="if True, prepend initial_prompt to every internal decode() call. May reduce the effectiveness of condition_on_previous_text")
|
||||
|
||||
parser.add_argument("--condition_on_previous_text", type=str2bool, default=True, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop")
|
||||
parser.add_argument("--fp16", type=str2bool, default=True, help="whether to perform inference in fp16; True by default")
|
||||
|
||||
|
||||
@ -60,7 +60,7 @@ def median_kernel(filter_width: int):
|
||||
tl.store(y_ptr + offsets, MIDDLE_ROW_HERE, mask=mask) # noqa: F821
|
||||
|
||||
kernel = triton.JITFunction(kernel.fn)
|
||||
kernel.src = kernel.src.replace(
|
||||
new_kernel = kernel.src.replace(
|
||||
" LOAD_ALL_ROWS_HERE",
|
||||
"\n".join(
|
||||
[
|
||||
@ -69,7 +69,8 @@ def median_kernel(filter_width: int):
|
||||
]
|
||||
),
|
||||
)
|
||||
kernel.src = kernel.src.replace(
|
||||
|
||||
new_kernel = new_kernel.replace(
|
||||
" BUBBLESORT_HERE",
|
||||
"\n\n".join(
|
||||
[
|
||||
@ -90,7 +91,14 @@ def median_kernel(filter_width: int):
|
||||
]
|
||||
),
|
||||
)
|
||||
kernel.src = kernel.src.replace("MIDDLE_ROW_HERE", f"row{filter_width // 2}")
|
||||
|
||||
new_kernel = new_kernel.replace("MIDDLE_ROW_HERE", f"row{filter_width // 2}")
|
||||
|
||||
if hasattr(kernel, "_unsafe_update_src") is True:
|
||||
kernel._unsafe_update_src(new_kernel)
|
||||
kernel.hash = None
|
||||
else:
|
||||
kernel.src = new_kernel
|
||||
|
||||
return kernel
|
||||
|
||||
|
||||
@ -209,9 +209,11 @@ class SubtitlesWriter(ResultWriter):
|
||||
|
||||
yield start, end, "".join(
|
||||
[
|
||||
(
|
||||
re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word)
|
||||
if j == i
|
||||
else word
|
||||
)
|
||||
for j, word in enumerate(all_words)
|
||||
]
|
||||
)
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "20240930"
|
||||
__version__ = "20250625"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user