Skip to content

Commit

Permalink
Mandelbrot and matmul improvements (modular#557)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackos authored Sep 5, 2023
1 parent bdc57ad commit c30473a
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 249 deletions.
165 changes: 19 additions & 146 deletions examples/mandelbrot.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -20,58 +20,6 @@ alias min_y = -1.5
alias max_y = 1.5


# Compute the number of steps to escape.
def mandelbrot_kernel(c: ComplexFloat64) -> Int:
z = c
for i in range(MAX_ITERS):
z = z * z + c
if z.squared_norm() > 4:
return i
return MAX_ITERS


def compute_mandelbrot() -> Tensor[float_type]:
# create a matrix. Each element of the matrix corresponds to a pixel
m = Tensor[float_type](width, height)

dx = (max_x - min_x) / width
dy = (max_y - min_y) / height

y = min_y
for j in range(height):
x = min_x
for i in range(width):
m[Index(i, j)] = mandelbrot_kernel(ComplexFloat64(x, y))
x += dx
y += dy
return m


def show_plot(tensor: Tensor[float_type]):
np = Python.import_module("numpy")
plt = Python.import_module("matplotlib.pyplot")
colors = Python.import_module("matplotlib.colors")

numpy_array = np.zeros((height, width), np.float32)

for col in range(width):
for row in range(height):
numpy_array.itemset((row, col), tensor[col, row])

fig = plt.figure(1, [10, 10 * height // width], 64)
ax = fig.add_axes([0.0, 0.0, 1.0, 1.0], False, 1)
light = colors.LightSource(315, 10, 0, 1, 1, 0)

image = light.shade(
numpy_array, plt.cm.hot, colors.PowerNorm(0.3), "hsv", 0, 0, 1.5
)

plt.imshow(image)
plt.axis("off")
plt.savefig("out.png")
plt.show()


fn mandelbrot_kernel_SIMD[
simd_width: Int
](c: ComplexSIMD[float_type, simd_width]) -> SIMD[float_type, simd_width]:
Expand All @@ -90,123 +38,48 @@ fn mandelbrot_kernel_SIMD[
return iters


fn vectorized():
let m = Tensor[float_type](width, height)
fn main():
let t = Tensor[float_type](height, width)

@parameter
fn worker(col: Int):
fn worker(row: Int):
let scale_x = (max_x - min_x) / width
let scale_y = (max_y - min_y) / height

@parameter
fn compute_vector[simd_width: Int](row: Int):
fn compute_vector[simd_width: Int](col: Int):
"""Each time we oeprate on a `simd_width` vector of pixels."""
let cy = min_y + (row + iota[float_type, simd_width]()) * scale_y
let cx = min_x + col * scale_x
let cx = min_x + (col + iota[float_type, simd_width]()) * scale_x
let cy = min_y + row * scale_y
let c = ComplexSIMD[float_type, simd_width](cx, cy)
m._ptr.simd_store[simd_width](
col * height + row, mandelbrot_kernel_SIMD[simd_width](c)
t.data().simd_store[simd_width](
row * width + col, mandelbrot_kernel_SIMD[simd_width](c)
)

# Vectorize the call to compute_vector where call gets a chunk of pixels.
vectorize[simd_width, compute_vector](width)

@parameter
fn bench[simd_width: Int]():
for col in range(width):
worker(col)

let vectorized = Benchmark().run[bench[simd_width]]() / 1e6
print("Vectorized", ":", vectorized, "ms")

try:
_ = show_plot(m)
except e:
print("failed to show plot:", e.value)


fn parallelized():
let m = Tensor[float_type](width, height)

@parameter
fn worker(col: Int):
let scale_x = (max_x - min_x) / width
let scale_y = (max_y - min_y) / height

@parameter
fn compute_vector[simd_width: Int](row: Int):
"""Each time we oeprate on a `simd_width` vector of pixels."""
let cy = min_y + (row + iota[float_type, simd_width]()) * scale_y
let cx = min_x + col * scale_x
let c = ComplexSIMD[float_type, simd_width](cx, cy)
m._ptr.simd_store[simd_width](
col * height + row, mandelbrot_kernel_SIMD[simd_width](c)
)

# Vectorize the call to compute_vector where call gets a chunk of pixels.
vectorize[simd_width, compute_vector](width)

with Runtime() as rt:

@parameter
fn bench_parallel[simd_width: Int]():
parallelize[worker](rt, width, 5 * num_cores())

alias simd_width = simdwidthof[DType.float64]()
let parallelized = Benchmark().run[bench_parallel[simd_width]]() / 1e6
print("Parallelized:", parallelized, "ms")

try:
_ = show_plot(m)
except e:
print("failed to show plot:", e.value)


fn compare():
let m = Tensor[float_type](width, height)

@parameter
fn worker(col: Int):
let scale_x = (max_x - min_x) / width
let scale_y = (max_y - min_y) / height

@parameter
fn compute_vector[simd_width: Int](row: Int):
"""Each time we oeprate on a `simd_width` vector of pixels."""
let cy = min_y + (row + iota[float_type, simd_width]()) * scale_y
let cx = min_x + col * scale_x
let c = ComplexSIMD[float_type, simd_width](cx, cy)
m._ptr.simd_store[simd_width](
col * height + row, mandelbrot_kernel_SIMD[simd_width](c)
)

# Vectorize the call to compute_vector where call gets a chunk of pixels.
vectorize[simd_width, compute_vector](width)

# Vectorized
@parameter
fn bench[simd_width: Int]():
for col in range(width):
worker(col)
for row in range(height):
worker(row)

let vectorized = Benchmark().run[bench[simd_width]]() / 1e6
let vectorized_ms = Benchmark().run[bench[simd_width]]() / 1e6
print("Number of hardware cores:", num_cores())
print("Vectorized:", vectorized, "ms")
print("Vectorized:", vectorized_ms, "ms")

# Parallelized
with Runtime() as rt:

@parameter
fn bench_parallel[simd_width: Int]():
parallelize[worker](rt, width, 5 * num_cores())
parallelize[worker](rt, height, 5 * num_cores())

alias simd_width = simdwidthof[DType.float64]()
let parallelized = Benchmark().run[bench_parallel[simd_width]]() / 1e6
print("Parallelized:", parallelized, "ms")
print("Parallel speedup:", vectorized / parallelized)

_ = m

let parallelized_ms = Benchmark().run[
bench_parallel[simd_width]
]() / 1e6
print("Parallelized:", parallelized_ms, "ms")
print("Parallel speedup:", vectorized_ms / parallelized_ms)

fn main() raises:
compare()
_ = t # Make sure tensor isn't destroyed before benchmark is finished
Loading

0 comments on commit c30473a

Please sign in to comment.