forked from modular/mojo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmandelbrot.mojo
111 lines (91 loc) · 3.52 KB
/
mandelbrot.mojo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# ===----------------------------------------------------------------------=== #
# Copyright (c) 2023, Modular Inc. All rights reserved.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions:
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #
# RUN: %mojo -debug-level full %s | FileCheck %s
from math import iota
from sys.info import num_logical_cores
import benchmark
from algorithm import parallelize, vectorize
from complex import ComplexFloat64, ComplexSIMD
from python import Python
from runtime.llcl import Runtime
from tensor import Tensor
from utils.index import Index
alias float_type = DType.float64
alias int_type = DType.int64
alias simd_width = 2 * simdwidthof[float_type]()
alias width = 960
alias height = 960
alias MAX_ITERS = 200
alias min_x = -2.0
alias max_x = 0.6
alias min_y = -1.5
alias max_y = 1.5
fn mandelbrot_kernel_SIMD[
simd_width: Int
](c: ComplexSIMD[float_type, simd_width]) -> SIMD[int_type, simd_width]:
"""A vectorized implementation of the inner mandelbrot computation."""
var cx = c.re
var cy = c.im
var x = SIMD[float_type, simd_width](0)
var y = SIMD[float_type, simd_width](0)
var y2 = SIMD[float_type, simd_width](0)
var iters = SIMD[int_type, simd_width](0)
var t: SIMD[DType.bool, simd_width] = True
for i in range(MAX_ITERS):
if not t.reduce_or():
break
y2 = y * y
y = x.fma(y + y, cy)
t = x.fma(x, y2) <= 4
x = x.fma(x, cx - y2)
iters = t.select(iters + 1, iters)
return iters
fn main() raises:
var t = Tensor[int_type](height, width)
@parameter
fn worker(row: Int):
var scale_x = (max_x - min_x) / width
var scale_y = (max_y - min_y) / height
@__copy_capture(scale_x, scale_y)
@parameter
fn compute_vector[simd_width: Int](col: Int):
"""Each time we operate on a `simd_width` vector of pixels."""
var cx = min_x + (col + iota[float_type, simd_width]()) * scale_x
var cy = min_y + row * scale_y
var c = ComplexSIMD[float_type, simd_width](cx, cy)
t.data().store[width=simd_width](
row * width + col, mandelbrot_kernel_SIMD[simd_width](c)
)
# Vectorize the call to compute_vector where call gets a chunk of pixels.
vectorize[compute_vector, simd_width, size=width]()
@parameter
fn bench[simd_width: Int]():
for row in range(height):
worker(row)
var vectorized = benchmark.run[bench[simd_width]](
max_runtime_secs=0.5
).mean()
print("Number of threads:", num_logical_cores())
print("Vectorized:", vectorized, "s")
with Runtime(num_logical_cores()) as rt:
# Parallelized
@parameter
fn bench_parallel[simd_width: Int]():
parallelize[worker](height, height)
var parallelized = benchmark.run[bench_parallel[simd_width]](
max_runtime_secs=0.5
).mean()
print("Parallelized:", parallelized, "s")
# CHECK: Parallel speedup
print("Parallel speedup:", vectorized / parallelized)
_ = t # Make sure tensor isn't destroyed before benchmark is finished