-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexchangedecode.cu
55 lines (42 loc) · 1.32 KB
/
exchangedecode.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include <stdint.h>
#include "stencil.h"
#include "float3.h"
#include "exchange.h"
// see exchange.go
extern "C" __global__ void
exchangedecode(float* __restrict__ dst, float* __restrict__ aLUT2d, uint8_t* __restrict__ regions,
float wx, float wy, float wz, int Nx, int Ny, int Nz, uint8_t PBC) {
int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
int iz = blockIdx.z * blockDim.z + threadIdx.z;
if (ix >= Nx || iy >= Ny || iz >= Nz) {
return;
}
// central cell
int I = idx(ix, iy, iz);
uint8_t r0 = regions[I];
int i_; // neighbor index
float avg = 0.0f;
// left neighbor
i_ = idx(lclampx(ix-1), iy, iz); // clamps or wraps index according to PBC
avg += aLUT2d[symidx(r0, regions[i_])];
// right neighbor
i_ = idx(hclampx(ix+1), iy, iz);
avg += aLUT2d[symidx(r0, regions[i_])];
// back neighbor
i_ = idx(ix, lclampy(iy-1), iz);
avg += aLUT2d[symidx(r0, regions[i_])];
// front neighbor
i_ = idx(ix, hclampy(iy+1), iz);
avg += aLUT2d[symidx(r0, regions[i_])];
// only take vertical derivative for 3D sim
if (Nz != 1) {
// bottom neighbor
i_ = idx(ix, iy, lclampz(iz-1));
avg += aLUT2d[symidx(r0, regions[i_])];
// top neighbor
i_ = idx(ix, iy, hclampz(iz+1));
avg += aLUT2d[symidx(r0, regions[i_])];
}
dst[I] = avg;
}