|
| 1 | +; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s |
| 2 | + |
| 3 | +; ScalarEvolution should be able to fold away the sign-extensions |
| 4 | +; on this loop with a primary induction variable incremented with |
| 5 | +; a nsw add of 2 (this test is derived from the nsw-offset.ll test, but uses an |
| 6 | +; assume instead of a preheader conditional branch to guard the loop). |
| 7 | + |
| 8 | +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" |
| 9 | + |
| 10 | +define void @foo(i32 %no, double* nocapture %d, double* nocapture %q) nounwind { |
| 11 | +entry: |
| 12 | + %n = and i32 %no, 4294967294 |
| 13 | + %0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1] |
| 14 | + tail call void @llvm.assume(i1 %0) |
| 15 | + br label %bb.nph |
| 16 | + |
| 17 | +bb.nph: ; preds = %entry |
| 18 | + br label %bb |
| 19 | + |
| 20 | +bb: ; preds = %bb.nph, %bb1 |
| 21 | + %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ] ; <i32> [#uses=5] |
| 22 | + |
| 23 | +; CHECK: %1 = sext i32 %i.01 to i64 |
| 24 | +; CHECK: --> {0,+,2}<nuw><nsw><%bb> |
| 25 | + %1 = sext i32 %i.01 to i64 ; <i64> [#uses=1] |
| 26 | + |
| 27 | +; CHECK: %2 = getelementptr inbounds double* %d, i64 %1 |
| 28 | +; CHECK: --> {%d,+,16}<nsw><%bb> |
| 29 | + %2 = getelementptr inbounds double* %d, i64 %1 ; <double*> [#uses=1] |
| 30 | + |
| 31 | + %3 = load double* %2, align 8 ; <double> [#uses=1] |
| 32 | + %4 = sext i32 %i.01 to i64 ; <i64> [#uses=1] |
| 33 | + %5 = getelementptr inbounds double* %q, i64 %4 ; <double*> [#uses=1] |
| 34 | + %6 = load double* %5, align 8 ; <double> [#uses=1] |
| 35 | + %7 = or i32 %i.01, 1 ; <i32> [#uses=1] |
| 36 | + |
| 37 | +; CHECK: %8 = sext i32 %7 to i64 |
| 38 | +; CHECK: --> {1,+,2}<nuw><nsw><%bb> |
| 39 | + %8 = sext i32 %7 to i64 ; <i64> [#uses=1] |
| 40 | + |
| 41 | +; CHECK: %9 = getelementptr inbounds double* %q, i64 %8 |
| 42 | +; CHECK: {(8 + %q),+,16}<nsw><%bb> |
| 43 | + %9 = getelementptr inbounds double* %q, i64 %8 ; <double*> [#uses=1] |
| 44 | + |
| 45 | +; Artificially repeat the above three instructions, this time using |
| 46 | +; add nsw instead of or. |
| 47 | + %t7 = add nsw i32 %i.01, 1 ; <i32> [#uses=1] |
| 48 | + |
| 49 | +; CHECK: %t8 = sext i32 %t7 to i64 |
| 50 | +; CHECK: --> {1,+,2}<nuw><nsw><%bb> |
| 51 | + %t8 = sext i32 %t7 to i64 ; <i64> [#uses=1] |
| 52 | + |
| 53 | +; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8 |
| 54 | +; CHECK: {(8 + %q),+,16}<nsw><%bb> |
| 55 | + %t9 = getelementptr inbounds double* %q, i64 %t8 ; <double*> [#uses=1] |
| 56 | + |
| 57 | + %10 = load double* %9, align 8 ; <double> [#uses=1] |
| 58 | + %11 = fadd double %6, %10 ; <double> [#uses=1] |
| 59 | + %12 = fadd double %11, 3.200000e+00 ; <double> [#uses=1] |
| 60 | + %13 = fmul double %3, %12 ; <double> [#uses=1] |
| 61 | + %14 = sext i32 %i.01 to i64 ; <i64> [#uses=1] |
| 62 | + %15 = getelementptr inbounds double* %d, i64 %14 ; <double*> [#uses=1] |
| 63 | + store double %13, double* %15, align 8 |
| 64 | + %16 = add nsw i32 %i.01, 2 ; <i32> [#uses=2] |
| 65 | + br label %bb1 |
| 66 | + |
| 67 | +bb1: ; preds = %bb |
| 68 | + %17 = icmp slt i32 %16, %n ; <i1> [#uses=1] |
| 69 | + br i1 %17, label %bb, label %bb1.return_crit_edge |
| 70 | + |
| 71 | +bb1.return_crit_edge: ; preds = %bb1 |
| 72 | + br label %return |
| 73 | + |
| 74 | +return: ; preds = %bb1.return_crit_edge, %entry |
| 75 | + ret void |
| 76 | +} |
| 77 | + |
| 78 | +declare void @llvm.assume(i1) nounwind |
| 79 | + |
| 80 | +; Note: Without the preheader assume, there is an 'smax' in the |
| 81 | +; backedge-taken count expression: |
| 82 | +; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))) /u 2) |
| 83 | +; CHECK: Loop %bb: max backedge-taken count is 1073741822 |
0 commit comments