-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[analyzer] Improve constraint inferring on concrete div/mod #65448
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
1. Reduce constraint on modulo with small concrete range when assigned; 2. Improve constraint inferring on modulo over concrete value; 3. Improve constraint inferring on division over concrete value. Fixes #54377
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1338,6 +1338,9 @@ class SymbolicRangeInferrer | |
RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op, | ||
RangeSet RHS, QualType T); | ||
|
||
RangeSet handleConcreteModulo(Range LHS, llvm::APSInt Modulo, QualType T); | ||
RangeSet handleRangeModulo(Range LHS, Range RHS, QualType T); | ||
|
||
//===----------------------------------------------------------------------===// | ||
// Ranges and operators | ||
//===----------------------------------------------------------------------===// | ||
|
@@ -1771,6 +1774,14 @@ template <> | |
RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS, | ||
Range RHS, | ||
QualType T) { | ||
if (const llvm::APSInt *ModOrNull = RHS.getConcreteValue()) | ||
return handleConcreteModulo(LHS, *ModOrNull, T); | ||
|
||
return handleRangeModulo(LHS, RHS, T); | ||
} | ||
|
||
RangeSet SymbolicRangeInferrer::handleRangeModulo(Range LHS, Range RHS, | ||
QualType T) { | ||
llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue(); | ||
|
||
Range ConservativeRange = getSymmetricalRange(RHS, T); | ||
|
@@ -1824,6 +1835,101 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS, | |
return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)}; | ||
} | ||
|
||
RangeSet SymbolicRangeInferrer::handleConcreteModulo(Range LHS, | ||
llvm::APSInt Modulo, | ||
QualType T) { | ||
APSIntType ResultType = ValueFactory.getAPSIntType(T); | ||
llvm::APSInt Zero = ResultType.getZeroValue(); | ||
llvm::APSInt One = ResultType.getValue(1); | ||
|
||
if (Modulo == Zero) | ||
return RangeFactory.getEmptySet(); | ||
|
||
// Quick path, this also avoids unary '-' overflow on MinValue modulo. | ||
if (Modulo == ValueFactory.getMinValue(T) || | ||
Modulo == ValueFactory.getMaxValue(T)) | ||
return RangeFactory.getRangeSet(LHS); | ||
|
||
// Normalize to positive modulo since a % N == a % -N | ||
if (Modulo < 0) | ||
Modulo = -Modulo; | ||
|
||
auto ComputeModuloN = [&](llvm::APSInt From, llvm::APSInt To, | ||
llvm::APSInt N) -> RangeSet { | ||
assert(N > Zero && "Positive N expected!"); | ||
bool NonNegative = From >= Zero; | ||
assert(NonNegative == (To >= Zero) && "Signedness mismatch!"); | ||
|
||
if (From > To) | ||
return RangeFactory.getEmptySet(); | ||
|
||
llvm::APSInt N1 = N - One; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. APSInt cannot wrap, right? So if N was the minimum representable on the same number of bits, it would extend the number of bits used for the representation, which might be different from the bitwidth of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a problem if N is INT_MIN like value, fixed with quick return on such values. |
||
|
||
// spans [0, N - 1] if NonNegative or [-(N - 1), 0] otherwise. | ||
if ((To - From) / N > Zero) | ||
return {RangeFactory, ValueFactory.getValue(NonNegative ? Zero : -N1), | ||
ValueFactory.getValue(NonNegative ? N1 : Zero)}; | ||
|
||
llvm::APSInt Min = From % N; | ||
llvm::APSInt Max = To % N; | ||
|
||
if (Min < Max) // [Min, Max] | ||
return {RangeFactory, ValueFactory.getValue(Min), | ||
ValueFactory.getValue(Max)}; | ||
|
||
// [0, Max] U [Min, N - 1] if NonNegative, or | ||
// [-(N - 1), Max] U [Min, 0] otherwise. | ||
const llvm::APSInt &Min1 = ValueFactory.getValue(NonNegative ? Zero : -N1); | ||
const llvm::APSInt &Max1 = ValueFactory.getValue(Max); | ||
RangeSet RS1{RangeFactory, Min1, Max1}; | ||
|
||
const llvm::APSInt &Min2 = ValueFactory.getValue(Min); | ||
const llvm::APSInt &Max2 = ValueFactory.getValue(NonNegative ? N1 : Zero); | ||
RangeSet RS2{RangeFactory, Min2, Max2}; | ||
|
||
return RangeFactory.unite(RS1, RS2); | ||
}; | ||
|
||
if (!LHS.Includes(Zero)) | ||
return ComputeModuloN(LHS.From(), LHS.To(), Modulo); | ||
|
||
// split over [From, -1] U [0, To] for easy handling. | ||
return RangeFactory.unite(ComputeModuloN(LHS.From(), -One, Modulo), | ||
ComputeModuloN(Zero, LHS.To(), Modulo)); | ||
} | ||
|
||
template <> | ||
RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Div>(Range LHS, | ||
Range RHS, | ||
QualType T) { | ||
const llvm::APSInt *DividerOrNull = RHS.getConcreteValue(); | ||
if (!DividerOrNull) | ||
return infer(T); | ||
|
||
const llvm::APSInt &D = *DividerOrNull; | ||
|
||
APSIntType ResultType = ValueFactory.getAPSIntType(T); | ||
llvm::APSInt Zero = ResultType.getZeroValue(); | ||
llvm::APSInt One = ResultType.getValue(1); | ||
if (D == Zero) | ||
return RangeFactory.getEmptySet(); | ||
|
||
if (D == -One) { // might overflow | ||
if (LHS.To().isMinSignedValue()) | ||
return {RangeFactory, LHS.To(), LHS.To()}; | ||
else if (LHS.From().isMinSignedValue()) { | ||
const llvm::APSInt &From = ValueFactory.getValue((LHS.From() + One) / D); | ||
const llvm::APSInt &To = ValueFactory.getValue(LHS.To() / D); | ||
RangeSet RS{RangeFactory, To, From}; | ||
return RangeFactory.unite(RS, LHS.From()); | ||
} | ||
} | ||
|
||
const llvm::APSInt &From = ValueFactory.getValue(LHS.From() / D); | ||
const llvm::APSInt &To = ValueFactory.getValue(LHS.To() / D); | ||
return {RangeFactory, D < Zero ? To : From, D < Zero ? From : To}; | ||
} | ||
|
||
RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS, | ||
BinaryOperator::Opcode Op, | ||
RangeSet RHS, QualType T) { | ||
|
@@ -1842,6 +1948,8 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS, | |
return VisitBinaryOperator<BO_And>(LHS, RHS, T); | ||
case BO_Rem: | ||
return VisitBinaryOperator<BO_Rem>(LHS, RHS, T); | ||
case BO_Div: | ||
return VisitBinaryOperator<BO_Div>(LHS, RHS, T); | ||
default: | ||
return infer(T); | ||
} | ||
|
@@ -2073,13 +2181,62 @@ class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> { | |
if (Sym->getOpcode() != BO_Rem) | ||
return true; | ||
// a % b != 0 implies that a != 0. | ||
// Z3 verification: | ||
// (declare-const a Int) | ||
// (declare-const m Int) | ||
// (assert (not (= m 0))) | ||
// (assert (not (= (mod a m) 0))) | ||
// (assert (= a 0)) | ||
// (check-sat) | ||
// ; unsat | ||
if (!Constraint.containsZero()) { | ||
SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS()); | ||
if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) { | ||
State = State->assume(*NonLocSymSVal, true); | ||
if (!State) | ||
return false; | ||
} | ||
} else if (const auto *SIE = dyn_cast<SymIntExpr>(Sym); | ||
SIE && Constraint.encodesFalseRange()) { | ||
// a % m == 0 && a in [x, y] && y - x < m implies that | ||
// a = (y < 0 ? x : y) / m * m which is a 'ConcreteInt' | ||
// where x and m are 'ConcreteInt'. | ||
// | ||
// Z3 verification: | ||
// (declare-const a Int) | ||
// (declare-const m Int) | ||
// (declare-const x Int) | ||
// (declare-const y Int) | ||
// (assert (= (mod a m) 0)) | ||
// (assert (< (- y x) m)) | ||
// (assert (and (>= a x) (<= a y))) | ||
// (assert (not (= a (* (div y m) m)))) | ||
// (check-sat) | ||
// ; unsat | ||
BasicValueFactory &ValueFactory = RangeFactory.getValueFactory(); | ||
APSIntType ResultType = ValueFactory.getAPSIntType(SIE->getType()); | ||
llvm::APSInt N = SIE->getRHS(); | ||
if (N < 0) | ||
N = -N; | ||
N = ResultType.convert(N); | ||
|
||
SymbolRef Sym = SIE->getLHS(); | ||
RangeSet RS = SymbolicRangeInferrer::inferRange(RangeFactory, State, Sym); | ||
if (RS.size() == 1) { | ||
Range R = *RS.begin(); | ||
llvm::APSInt Distance = ResultType.convert(R.To()).extend(64) - | ||
ResultType.convert(R.From()).extend(64); | ||
if (Distance < 0) // overflows | ||
return true; | ||
|
||
if (Distance < N.extend(64)) { | ||
const llvm::APSInt &Point = ValueFactory.getValue( | ||
(ResultType.convert(R.To() > 0 ? R.To() : R.From())) / N * N); | ||
State = assign(Sym, {RangeFactory, Point, Point}); | ||
if (!State) | ||
return false; | ||
} | ||
} | ||
} | ||
return true; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// RUN: %clang_analyze_cc1 %s \ | ||
// RUN: -analyzer-checker=core \ | ||
// RUN: -analyzer-checker=debug.ExprInspection \ | ||
// RUN: -verify | ||
|
||
void clang_analyzer_eval(int); | ||
|
||
void div_infer_positive_divider(int x) { | ||
if (x < 1 || x > 5) | ||
return; | ||
|
||
int i = x / 2; | ||
clang_analyzer_eval(i >= 0 && i <= 2); // expected-warning{{TRUE}} | ||
} | ||
|
||
void div_infer_negative_divider_positive_range(int x) { | ||
if (x < 1 || x > 2) | ||
return; | ||
|
||
int i = x / -2; | ||
clang_analyzer_eval(i >= -1 && i <= 0); // expected-warning{{TRUE}} | ||
} | ||
|
||
void div_infer_negative_divider_negative_range(int x) { | ||
if (x < -2 || x > 0) | ||
return; | ||
|
||
int i = x / -2; | ||
clang_analyzer_eval(i >= 0 && i <= 1); // expected-warning{{TRUE}} | ||
} | ||
|
||
void div_infer_positive_divider_positive_range(int x) { | ||
if (x < 0 || x > 5) | ||
return; | ||
|
||
int i = x / 6; | ||
clang_analyzer_eval(i == 0); // expected-warning{{TRUE}} | ||
} | ||
|
||
#define LONG_MAX 0x7fffffffffffffffLL | ||
#define LONG_MIN (-LONG_MAX - 1LL) | ||
|
||
void div_infer_overflow_long(long long x) { | ||
if (x > LONG_MIN + 1) | ||
return; | ||
|
||
// x in [LONG_MIN, LONG_MIN + 1] | ||
clang_analyzer_eval(x >= LONG_MIN && x <= LONG_MIN + 1); // expected-warning{{TRUE}} | ||
long long i = x / -1; | ||
clang_analyzer_eval(i == LONG_MIN || i == LONG_MAX); // expected-warning{{TRUE}} | ||
} | ||
|
||
#define INT_MAX 0x7fffffff | ||
#define INT_MIN (-INT_MAX - 1) | ||
|
||
void div_infer_overflow_int(int x) { | ||
if (x > INT_MIN + 1) | ||
return; | ||
|
||
// x in [INT_MIN, INT_MIN + 1] | ||
clang_analyzer_eval(x >= INT_MIN && x <= INT_MIN + 1); // expected-warning{{TRUE}} | ||
int i = x / -1; | ||
clang_analyzer_eval(i == INT_MIN || i == INT_MAX); // expected-warning{{TRUE}} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we swap the sign of the RHS, we should also swap it on the LHS:
X % Y <=> -X % -Y
right?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually
X % Y == X % -Y
, notX % Y == -X % Y
orX % Y == -X % -Y
. Sign of Y is not significant.