datasets isn't a feature + filter docstrings (#4228)

* datasets isn't a feature * filter docstrings in sz
2026-06-24 02:14:17 +00:00 · 2024-04-19 16:16:10 +04:00 · 2024-04-19 16:16:10 +04:00 · cd88afc98b
commit cd88afc98b
parent b9570d6100
4 changed files with 6 additions and 3 deletions
--- a/docs-legacy/abstractions3.py
+++ b/docs-legacy/abstractions3.py
@ -6,7 +6,7 @@ from tinygrad.helpers import DEBUG
 # *****
 # 0. Load mnist on the device

-from tinygrad.features.datasets import mnist
+from tinygrad.nn.datasets import mnist
 X_train, Y_train, _, _ = mnist()
 X_train = X_train.float()
 X_train -= X_train.mean()
--- a/examples/beautiful_mnist.py
+++ b/examples/beautiful_mnist.py
@ -2,7 +2,7 @@
 from typing import List, Callable
 from tinygrad import Tensor, TinyJit, nn, GlobalCounters
 from tinygrad.helpers import getenv, colored
-from tinygrad.features.datasets import mnist
+from tinygrad.nn.datasets import mnist
 from tqdm import trange

 class Model:
--- a/sz.py
+++ b/sz.py
@ -7,6 +7,9 @@ from tabulate import tabulate

 TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]

+def is_docstring(t):
+  return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
+
 def gen_stats(base_path="."):
  table = []
  for path, _, files in os.walk(os.path.join(base_path, "tinygrad")):
@ -16,7 +19,7 @@ def gen_stats(base_path="."):
      filepath = os.path.join(path, name)
      relfilepath = os.path.relpath(filepath, base_path)
      with tokenize.open(filepath) as file_:
-        tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST]
+        tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)]
        token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)]))
        table.append([relfilepath, line_count, token_count/line_count])
  return table
--- a/tinygrad/features/datasets.py
+++ b/tinygrad/features/datasets.py