forked from modular/mojo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmandelbrot.mojo
85 lines (67 loc) · 2.62 KB
/
mandelbrot.mojo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from benchmark import Benchmark
from complex import ComplexSIMD, ComplexFloat64
from math import iota
from python import Python
from runtime.llcl import num_cores, Runtime
from algorithm import parallelize, vectorize
from tensor import Tensor
from utils.index import Index
alias float_type = DType.float64
alias simd_width = simdwidthof[float_type]()
alias width = 960
alias height = 960
alias MAX_ITERS = 200
alias min_x = -2.0
alias max_x = 0.6
alias min_y = -1.5
alias max_y = 1.5
fn mandelbrot_kernel_SIMD[
simd_width: Int
](c: ComplexSIMD[float_type, simd_width]) -> SIMD[float_type, simd_width]:
"""A vectorized implementation of the inner mandelbrot computation."""
var z = ComplexSIMD[float_type, simd_width](0, 0)
var iters = SIMD[float_type, simd_width](0)
var in_set_mask: SIMD[DType.bool, simd_width] = True
for i in range(MAX_ITERS):
if not in_set_mask.reduce_or():
break
in_set_mask = z.squared_norm() <= 4
iters = in_set_mask.select(iters + 1, iters)
z = z.squared_add(c)
return iters
fn main():
let t = Tensor[float_type](height, width)
@parameter
fn worker(row: Int):
let scale_x = (max_x - min_x) / width
let scale_y = (max_y - min_y) / height
@parameter
fn compute_vector[simd_width: Int](col: Int):
"""Each time we oeprate on a `simd_width` vector of pixels."""
let cx = min_x + (col + iota[float_type, simd_width]()) * scale_x
let cy = min_y + row * scale_y
let c = ComplexSIMD[float_type, simd_width](cx, cy)
t.data().simd_store[simd_width](
row * width + col, mandelbrot_kernel_SIMD[simd_width](c)
)
# Vectorize the call to compute_vector where call gets a chunk of pixels.
vectorize[simd_width, compute_vector](width)
@parameter
fn bench[simd_width: Int]():
for row in range(height):
worker(row)
let vectorized_ms = Benchmark().run[bench[simd_width]]() / 1e6
print("Number of hardware cores:", num_cores())
print("Vectorized:", vectorized_ms, "ms")
# Parallelized
with Runtime() as rt:
@parameter
fn bench_parallel[simd_width: Int]():
parallelize[worker](rt, height, 5 * num_cores())
alias simd_width = simdwidthof[DType.float64]()
let parallelized_ms = Benchmark().run[
bench_parallel[simd_width]
]() / 1e6
print("Parallelized:", parallelized_ms, "ms")
print("Parallel speedup:", vectorized_ms / parallelized_ms)
_ = t # Make sure tensor isn't destroyed before benchmark is finished