forked from modular/mojo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmandelbrot.mojo
109 lines (92 loc) · 3.53 KB
/
mandelbrot.mojo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# ===----------------------------------------------------------------------=== #
# Copyright (c) 2023, Modular Inc. All rights reserved.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions:
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #
# RUN: %mojo -debug-level full %s | FileCheck %s
import benchmark
from complex import ComplexSIMD, ComplexFloat64
from math import iota
from python import Python
from runtime.llcl import Runtime
from sys.info import num_logical_cores
from algorithm import parallelize, vectorize
from tensor import Tensor
from utils.index import Index
from python import Python
alias float_type = DType.float64
alias int_type = DType.int64
alias simd_width = 2 * simdwidthof[float_type]()
alias width = 960
alias height = 960
alias MAX_ITERS = 200
alias min_x = -2.0
alias max_x = 0.6
alias min_y = -1.5
alias max_y = 1.5
fn mandelbrot_kernel_SIMD[
simd_width: Int
](c: ComplexSIMD[float_type, simd_width]) -> SIMD[int_type, simd_width]:
"""A vectorized implementation of the inner mandelbrot computation."""
var cx = c.re
var cy = c.im
var x = SIMD[float_type, simd_width](0)
var y = SIMD[float_type, simd_width](0)
var y2 = SIMD[float_type, simd_width](0)
var iters = SIMD[int_type, simd_width](0)
var t: SIMD[DType.bool, simd_width] = True
for i in range(MAX_ITERS):
if not t.reduce_or():
break
y2 = y * y
y = x.fma(y + y, cy)
t = x.fma(x, y2) <= 4
x = x.fma(x, cx - y2)
iters = t.select(iters + 1, iters)
return iters
fn main() raises:
var t = Tensor[int_type](height, width)
@parameter
fn worker(row: Int):
var scale_x = (max_x - min_x) / width
var scale_y = (max_y - min_y) / height
@__copy_capture(scale_x, scale_y)
@parameter
fn compute_vector[simd_width: Int](col: Int):
"""Each time we operate on a `simd_width` vector of pixels."""
var cx = min_x + (col + iota[float_type, simd_width]()) * scale_x
var cy = min_y + row * scale_y
var c = ComplexSIMD[float_type, simd_width](cx, cy)
t.data().simd_store[simd_width](
row * width + col, mandelbrot_kernel_SIMD[simd_width](c)
)
# Vectorize the call to compute_vector where call gets a chunk of pixels.
vectorize[compute_vector, simd_width, width]()
@parameter
fn bench[simd_width: Int]():
for row in range(height):
worker(row)
var vectorized = benchmark.run[bench[simd_width]](
max_runtime_secs=0.5
).mean()
print("Number of threads:", num_logical_cores())
print("Vectorized:", vectorized, "s")
with Runtime(num_logical_cores()) as rt:
# Parallelized
@parameter
fn bench_parallel[simd_width: Int]():
parallelize[worker](height, height)
var parallelized = benchmark.run[bench_parallel[simd_width]](
max_runtime_secs=0.5
).mean()
print("Parallelized:", parallelized, "s")
# CHECK: Parallel speedup
print("Parallel speedup:", vectorized / parallelized)
_ = t # Make sure tensor isn't destroyed before benchmark is finished