mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
there is a better way to do that! (#950)
This commit is contained in:
parent
990fc40219
commit
e8a23d4331
1 changed files with 3 additions and 4 deletions
|
|
@ -106,6 +106,7 @@ class Whisper:
|
|||
|
||||
# TODO: this is tragic. remove this
|
||||
import functools
|
||||
import itertools
|
||||
import torch
|
||||
import torchaudio
|
||||
import librosa
|
||||
|
|
@ -158,10 +159,8 @@ def get_encoding(n_vocab_in):
|
|||
"<|notimestamps|>",
|
||||
*[f"<|{i * 0.02:.2f}|>" for i in range(1501)],
|
||||
]
|
||||
special_tokens = {}
|
||||
for token in specials:
|
||||
special_tokens[token] = n_vocab
|
||||
n_vocab += 1
|
||||
special_tokens = dict(zip(specials, itertools.count(n_vocab)))
|
||||
n_vocab += len(specials)
|
||||
assert n_vocab == n_vocab_in
|
||||
import tiktoken
|
||||
return tiktoken.Encoding(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue