forked from mikex86/LibreCuda
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwrite_float.ptx
72 lines (59 loc) · 1.44 KB
/
write_float.ptx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-33961263
// Cuda compilation tools, release 12.4, V12.4.99
// Based on NVVM 7.0.1
//
.version 8.4
.target sm_80
.address_size 64
// .globl write_float_ptr
.visible .entry write_float_ptr(
.param .u64 write_float_ptr_param_0,
.param .u64 write_float_ptr_param_1
)
{
.reg .f32 %f<2>;
.reg .b64 %rd<5>;
ld.param.u64 %rd1, [write_float_ptr_param_0];
ld.param.u64 %rd2, [write_float_ptr_param_1];
cvta.to.global.u64 %rd3, %rd1;
cvta.to.global.u64 %rd4, %rd2;
ld.global.f32 %f1, [%rd4];
st.global.f32 [%rd3], %f1;
ret;
}
// .globl write_float_value
.visible .entry write_float_value(
.param .u64 write_float_value_param_0,
.param .f32 write_float_value_param_1
)
{
.reg .f32 %f<2>;
.reg .b64 %rd<3>;
ld.param.u64 %rd1, [write_float_value_param_0];
ld.param.f32 %f1, [write_float_value_param_1];
cvta.to.global.u64 %rd2, %rd1;
st.global.f32 [%rd2], %f1;
ret;
}
// .globl write_float_sum
.visible .entry write_float_sum(
.param .u64 write_float_sum_param_0,
.param .u16 write_float_sum_param_1,
.param .f32 write_float_sum_param_2
)
{
.reg .b16 %rs<2>;
.reg .f32 %f<4>;
.reg .b64 %rd<3>;
ld.param.u64 %rd1, [write_float_sum_param_0];
ld.param.u16 %rs1, [write_float_sum_param_1];
ld.param.f32 %f1, [write_float_sum_param_2];
cvta.to.global.u64 %rd2, %rd1;
cvt.rn.f32.s16 %f2, %rs1;
add.f32 %f3, %f2, %f1;
st.global.f32 [%rd2], %f3;
ret;
}