-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear.py
89 lines (69 loc) · 2.86 KB
/
linear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from tinygrad.tensor import Tensor as tn
from tinygrad.helpers import getenv
class Linear:
def __init__(self, input_size, output_size):
# Initialize weights and bias
self.weights = tn.randn(input_size, output_size)
self.bias = tn.randn(1, output_size)
if getenv("DEBUG") > 0:
print(
f"Initialized Linear layer with weights: {self.weights.shape} and bias: {self.bias.shape}"
)
def forward(self, inputs):
# Perform dot product of inputs and weights
weighted_sum = tn.dot(inputs, self.weights)
if getenv("DEBUG") > 0:
print(f"\t-- {inputs.shape} * {self.weights.shape} = {weighted_sum.shape}")
# Add the bias
output = weighted_sum + self.bias
if getenv("DEBUG") > 0:
print(f"\t-- {weighted_sum.shape} + {self.bias.shape} = {output.shape}")
return output
if __name__ == "__main__":
# Create a Linear layer
linear = Linear(5, 1)
# Create some random inputs
inputs = tn.randn(1, 5)
# Forward pass
output = linear.forward(inputs)
if getenv("DEBUG") > 0:
print(f"Output shape: {output.shape}, Output: {output.numpy()}")
from tinygrad.device import Device, Compiled
from tinygrad.realize import create_schedule
from tinygrad.ops import LoadOps
from tinygrad.codegen.linearizer import Linearizer
from tinygrad.features.search import time_linearizer, bufs_from_lin
from tinygrad.shape.symbolic import sym_infer
from tinygrad.helpers import ansilen
from tinygrad.features.search import get_linearizer_actions
from tinygrad.features.graph import graph_uops
seen = set()
create_schedule([linear.forward(tn.empty(1, 5)).lazydata], seen)
# the device we are optimizing for
device: Compiled = Device[Device.DEFAULT]
if getenv("DEBUG") > 0:
print(f"optimizing for {Device.DEFAULT}")
out = linear.forward(tn.randn(1, 5))
sched = create_schedule([out.lazydata], seen)
sched = [x for x in sched if x.ast.op not in LoadOps]
total_tm = 0
running_gflops = 0
for i, kernel in enumerate(sched):
rawbufs = bufs_from_lin(Linearizer(kernel.ast))
lin = Linearizer(kernel.ast, device.compiler.linearizer_opts)
# print(get_linearizer_actions(lin))
# lin.hand_coded_optimizations()
uops = lin.linearize().uops
graph_uops(uops)
tm = time_linearizer(lin, rawbufs, allow_test_size=False, cnt=10)
gflops = (
sym_infer(lin.info.flops, {k: k.min for k in lin.ast.vars()}) * 1e-9 / tm
)
total_tm += tm
running_gflops += gflops * tm
print(
f"*** {total_tm*1000:7.2f} ms : kernel{i:2d} takes {tm*1000:7.2f} ms, {gflops:6.0f} GFLOPS"
)
print(
f"******* total {total_tm*1000:.2f} ms, {running_gflops/total_tm:6.0f} GFLOPS"
)