-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Correctness issue in canonicalize-loops #431
Comments
ToRem of this seems to be wrong:
* Pattern {anonymous}::{anonymous}::ToRem : 'arith.addi -> ()' {
Trying to match "{anonymous}::{anonymous}::ToRem"
** Insert : 'arith.remui'(0x5661ebcabd50)
** Replace : 'arith.addi'(0x5661ebcabc40)
** Modified: 'arith.index_cast'(0x5661ebcac330)
** Erase : 'arith.addi'(0x5661ebcabc40)
"{anonymous}::{anonymous}::ToRem" result 1
} -> success : pattern applied successfully
// *** IR Dump After Pattern Application ***
func.func private @"##call__Z28gpu__mask_immersed_field_xy_16CompilerMetadataI10StaticSizeI9_180__85_E12DynamicCheckvv7NDRangeILi2ES0_I7_12__6_ES0_I8_16__16_EvvEE5FieldI4Face6Centervvvv11OffsetArrayI7Float64Li3E13CuTracedArrayISC_Li3ELi1E13_194__187__1_EESC_vvvE5TupleIS9_SA_vE20ImmersedBoundaryGridISC_8Periodic14RightConnected7Bounded28OrthogonalSphericalShellGridISC_SK_SL_SM_28StaticVerticalDiscretizationISB_ISC_Li1ESD_ISC_Li1ELi1E5_35__EESB_ISC_Li1ESD_ISC_Li1ELi1E5_34__EESQ_SS_ESB_ISC_Li2ESD_ISC_Li2ELi1E10_194__187_EE8TripolarI5Int64SX_SX_EvE16GridFittedBottomIS8_ISA_SA_vvvvSF_SC_vvvE23CenterImmersedConditionEvvvESC_SX_#802$par0"(%arg0: memref<1x187x194xf64, 1>, %arg1: memref<34xf64, 1>, %arg2: memref<1x187x194xf64, 1>) {
%c51_i64 = arith.constant 51 : i64
%c-1_i64 = arith.constant -1 : i64
%c1 = arith.constant 1 : index
%c12_i32 = arith.constant 12 : i32
%c-12_i64 = arith.constant -12 : i64
%c16_i16 = arith.constant 16 : i16
%c0_i64 = arith.constant 0 : i64
%c16_i64 = arith.constant 16 : i64
%c194_i64 = arith.constant 194 : i64
%c6_i64 = arith.constant 6 : i64
%c5_i64 = arith.constant 5 : i64
%cst = arith.constant 0.000000e+00 : f64
%0 = "enzymexla.memref2pointer"(%arg0) : (memref<1x187x194xf64, 1>) -> !llvm.ptr<1>
%1 = "enzymexla.memref2pointer"(%arg1) : (memref<34xf64, 1>) -> !llvm.ptr<1>
%2 = "enzymexla.memref2pointer"(%arg2) : (memref<1x187x194xf64, 1>) -> !llvm.ptr<1>
affine.parallel (%arg3, %arg4) = (0, 0) to (72, 256) {
%3 = arith.addi %arg3, %c1 : index
%4 = arith.addi %arg4, %c1 : index
%5 = arith.index_castui %3 : index to i64
%c0 = arith.constant 0 : index
%6 = arith.addi %arg3, %c0 : index
%7 = arith.index_castui %6 : index to i64
%8 = arith.index_castui %6 : index to i32
%c12 = arith.constant 12 : index
%9 = arith.divui %6, %c12 : index
%10 = arith.index_castui %9 : index to i32
%11 = arith.index_castui %9 : index to i64
%c-12 = arith.constant -12 : index
%12 = arith.muli %9, %c-12 : index
%13 = arith.index_cast %12 : index to i64
%14 = arith.remui %3, %c12 : index
%15 = arith.index_cast %14 : index to i64
%16 = arith.index_castui %4 : index to i64
%c16 = arith.constant 16 : index
%17 = arith.divui %arg4, %c16 : index
%18 = arith.index_castui %17 : index to i64
%19 = arith.subi %c0_i64, %18 : i64
%20 = arith.addi %19, %15 : i64
%21 = arith.addi %20, %c-1_i64 : i64
%22 = arith.muli %21, %c16_i64 : i64
%23 = arith.addi %16, %22 : i64
%24 = arith.muli %11, %c16_i64 : i64
%25 = arith.addi %24, %18 : i64
affine.if affine_set<(d0, d1) : (d0 floordiv 16 + (d1 floordiv 12) * 16 + 1 >= 0, -(d0 floordiv 16) - (d1 floordiv 12) * 16 + 84 >= 0, d0 mod 16 + (d1 mod 12) * 16 >= 0, -(d0 mod 16) - (d1 mod 12) * 16 + 179 >= 0)>(%arg4, %arg3) {
%26 = llvm.getelementptr inbounds %1[26] : (!llvm.ptr<1>) -> !llvm.ptr<1>, f64
%27 = llvm.load %26 {alignment = 16 : i64, tbaa = [#llvm.tbaa_tag<base_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, access_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, offset = 0>]} : !llvm.ptr<1> -> f64
%28 = arith.addi %25, %c51_i64 : i64
%29 = arith.muli %28, %c194_i64 : i64
%30 = arith.addi %29, %23 : i64
%31 = arith.addi %30, %c6_i64 : i64
%32 = llvm.getelementptr inbounds %2[%31] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f64
%33 = llvm.load %32 {alignment = 8 : i64, tbaa = [#llvm.tbaa_tag<base_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, access_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, offset = 0>]} : !llvm.ptr<1> -> f64
%34 = arith.cmpf ole, %27, %33 {fastmathFlags = #llvm.fastmath<none>} : f64
%35 = arith.addi %29, %23 : i64
%36 = arith.addi %35, %c5_i64 : i64
%37 = llvm.getelementptr inbounds %2[%36] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f64
%38 = llvm.load %37 {alignment = 8 : i64, tbaa = [#llvm.tbaa_tag<base_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, access_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, offset = 0>]} : !llvm.ptr<1> -> f64
%39 = arith.cmpf ole, %27, %38 {fastmathFlags = #llvm.fastmath<none>} : f64
%40 = arith.ori %34, %39 : i1
%41 = llvm.getelementptr inbounds %0[%31] : (!llvm.ptr<1>, i64) -> !llvm.ptr<1>, f64
%42 = llvm.load %41 {alignment = 8 : i64, tbaa = [#llvm.tbaa_tag<base_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, access_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, offset = 0>]} : !llvm.ptr<1> -> f64
%43 = arith.select %40, %cst, %42 : f64
llvm.store %43, %41 {alignment = 8 : i64, tbaa = [#llvm.tbaa_tag<base_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, access_type = <id = "custom_tbaa_addrspace(1)", members = {<#llvm.tbaa_root<id = "custom_tbaa">, 0>}>, offset = 0>]} : f64, !llvm.ptr<1>
}
}
return
} |
found bug |
x/ref #432 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The text was updated successfully, but these errors were encountered: