faster and better convnet

2026-06-24 02:14:17 +00:00 · 2020-10-25 13:48:44 -07:00 · 2020-10-25 13:48:44 -07:00 · 8fcada8071
commit 8fcada8071
parent 1255903519
2 changed files with 19 additions and 14 deletions
--- a/test/test_mnist.py
+++ b/test/test_mnist.py
@ -22,17 +22,20 @@ class TinyBobNet:
 # create a model with a conv layer
 class TinyConvNet:
  def __init__(self):
-    conv = 5
-    chans = 16
-    self.c1 = Tensor(layer_init_uniform(chans,1,conv,conv))
-    self.l1 = Tensor(layer_init_uniform(((28-conv+1)**2)*chans, 128))
-    self.l2 = Tensor(layer_init_uniform(128, 10))
+    # https://keras.io/examples/vision/mnist_convnet/
+    conv = 3
+    #inter_chan, out_chan = 32, 64
+    inter_chan, out_chan = 8, 16   # for speed
+    self.c1 = Tensor(layer_init_uniform(inter_chan,1,conv,conv))
+    self.c2 = Tensor(layer_init_uniform(out_chan,inter_chan,conv,conv))
+    self.l1 = Tensor(layer_init_uniform(out_chan*5*5, 10))

  def forward(self, x):
    x.data = x.data.reshape((-1, 1, 28, 28)) # hacks
-    x = x.conv2d(self.c1).relu()
+    x = x.conv2d(self.c1).relu().maxpool2x2()
+    x = x.conv2d(self.c2).relu().maxpool2x2()
    x = x.reshape(Tensor(np.array((x.shape[0], -1))))
-    return x.dot(self.l1).relu().dot(self.l2).logsoftmax()
+    return x.dot(self.l1).logsoftmax()

 def train(model, optim, steps, BS=128):
  losses, accuracies = [], []
@ -77,7 +80,7 @@ class TestMNIST(unittest.TestCase):
  def test_conv(self):
    np.random.seed(1337)
    model = TinyConvNet()
-    optimizer = optim.Adam([model.c1, model.l1, model.l2], lr=0.001)
+    optimizer = optim.Adam([model.c1, model.c2, model.l1], lr=0.001)
    train(model, optimizer, steps=400)
    evaluate(model)
    
--- a/tinygrad/ops.py
+++ b/tinygrad/ops.py
@ -174,23 +174,25 @@ register('conv2d', FastConv2D)
 class MaxPool2x2(Function):
  @staticmethod
  def forward(ctx, x):
+    my, mx = (x.shape[2]//2)*2, (x.shape[3]//2)*2
    stack = []
+    xup = x[:, :, :my, :mx]
    for Y in range(2):
      for X in range(2):
-        stack.append(x[:, :, Y::2, X::2][None])
+        stack.append(xup[:, :, Y::2, X::2][None])
    stack = np.concatenate(stack, axis=0)
    idxs = np.argmax(stack, axis=0)
-    ctx.save_for_backward(idxs)
+    ctx.save_for_backward(idxs, x.shape)
    return np.max(stack, axis=0)

  @staticmethod
  def backward(ctx, grad_output):
-    idxs, = ctx.saved_tensors
-    s = grad_output.shape
-    ret = np.zeros((s[0], s[1], s[2]*2, s[3]*2), dtype=grad_output.dtype)
+    idxs,s = ctx.saved_tensors
+    my, mx = (s[2]//2)*2, (s[3]//2)*2
+    ret = np.zeros(s, dtype=grad_output.dtype)
    for Y in range(2):
      for X in range(2):
-        ret[:, :, Y::2, X::2] = grad_output * (idxs == (Y*2+X))
+        ret[:, :, Y:my:2, X:mx:2] = grad_output * (idxs == (Y*2+X))
    return ret
 register('maxpool2x2', MaxPool2x2)