datasets isn't a feature + filter docstrings (#4228)

* datasets isn't a feature

* filter docstrings in sz
This commit is contained in:
George Hotz 2024-04-19 16:16:10 +04:00 committed by GitHub
commit cd88afc98b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 6 additions and 3 deletions

View file

@ -6,7 +6,7 @@ from tinygrad.helpers import DEBUG
# *****
# 0. Load mnist on the device
from tinygrad.features.datasets import mnist
from tinygrad.nn.datasets import mnist
X_train, Y_train, _, _ = mnist()
X_train = X_train.float()
X_train -= X_train.mean()

View file

@ -2,7 +2,7 @@
from typing import List, Callable
from tinygrad import Tensor, TinyJit, nn, GlobalCounters
from tinygrad.helpers import getenv, colored
from tinygrad.features.datasets import mnist
from tinygrad.nn.datasets import mnist
from tqdm import trange
class Model:

5
sz.py
View file

@ -7,6 +7,9 @@ from tabulate import tabulate
TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
def is_docstring(t):
return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
def gen_stats(base_path="."):
table = []
for path, _, files in os.walk(os.path.join(base_path, "tinygrad")):
@ -16,7 +19,7 @@ def gen_stats(base_path="."):
filepath = os.path.join(path, name)
relfilepath = os.path.relpath(filepath, base_path)
with tokenize.open(filepath) as file_:
tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST]
tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)]
token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)]))
table.append([relfilepath, line_count, token_count/line_count])
return table