Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[analyzer] Improve constraint inferring on concrete div/mod #65448

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
[analyzer] Improve constraint inferring on concrete div/mod
1. Reduce constraint on modulo with small concrete range when assigned;
2. Improve constraint inferring on modulo over concrete value;
3. Improve constraint inferring on division over concrete value.

Fixes #54377
  • Loading branch information
danix800 committed Sep 15, 2023
commit 111de0e99482cf8c8c206286bb9a4f40149992c6
157 changes: 157 additions & 0 deletions clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,9 @@ class SymbolicRangeInferrer
RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
RangeSet RHS, QualType T);

RangeSet handleConcreteModulo(Range LHS, llvm::APSInt Modulo, QualType T);
RangeSet handleRangeModulo(Range LHS, Range RHS, QualType T);

//===----------------------------------------------------------------------===//
// Ranges and operators
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1771,6 +1774,14 @@ template <>
RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
Range RHS,
QualType T) {
if (const llvm::APSInt *ModOrNull = RHS.getConcreteValue())
return handleConcreteModulo(LHS, *ModOrNull, T);

return handleRangeModulo(LHS, RHS, T);
}

RangeSet SymbolicRangeInferrer::handleRangeModulo(Range LHS, Range RHS,
QualType T) {
llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();

Range ConservativeRange = getSymmetricalRange(RHS, T);
Expand Down Expand Up @@ -1824,6 +1835,101 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
}

RangeSet SymbolicRangeInferrer::handleConcreteModulo(Range LHS,
llvm::APSInt Modulo,
QualType T) {
APSIntType ResultType = ValueFactory.getAPSIntType(T);
llvm::APSInt Zero = ResultType.getZeroValue();
llvm::APSInt One = ResultType.getValue(1);

if (Modulo == Zero)
return RangeFactory.getEmptySet();

// Quick path, this also avoids unary '-' overflow on MinValue modulo.
if (Modulo == ValueFactory.getMinValue(T) ||
Modulo == ValueFactory.getMaxValue(T))
return RangeFactory.getRangeSet(LHS);

// Normalize to positive modulo since a % N == a % -N
if (Modulo < 0)
Modulo = -Modulo;
Comment on lines +1854 to +1855
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we swap the sign of the RHS, we should also swap it on the LHS: X % Y <=> -X % -Y right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually X % Y == X % -Y, not X % Y == -X % Y or X % Y == -X % -Y. Sign of Y is not significant.


auto ComputeModuloN = [&](llvm::APSInt From, llvm::APSInt To,
llvm::APSInt N) -> RangeSet {
assert(N > Zero && "Positive N expected!");
bool NonNegative = From >= Zero;
assert(NonNegative == (To >= Zero) && "Signedness mismatch!");

if (From > To)
return RangeFactory.getEmptySet();

llvm::APSInt N1 = N - One;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

APSInt cannot wrap, right? So if N was the minimum representable on the same number of bits, it would extend the number of bits used for the representation, which might be different from the bitwidth of Zero and friends.
This could lead to constructing Ranges with APSInts of different widths, which is unexpected.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a problem if N is INT_MIN like value, fixed with quick return on such values.


// spans [0, N - 1] if NonNegative or [-(N - 1), 0] otherwise.
if ((To - From) / N > Zero)
return {RangeFactory, ValueFactory.getValue(NonNegative ? Zero : -N1),
ValueFactory.getValue(NonNegative ? N1 : Zero)};

llvm::APSInt Min = From % N;
llvm::APSInt Max = To % N;

if (Min < Max) // [Min, Max]
return {RangeFactory, ValueFactory.getValue(Min),
ValueFactory.getValue(Max)};

// [0, Max] U [Min, N - 1] if NonNegative, or
// [-(N - 1), Max] U [Min, 0] otherwise.
const llvm::APSInt &Min1 = ValueFactory.getValue(NonNegative ? Zero : -N1);
const llvm::APSInt &Max1 = ValueFactory.getValue(Max);
RangeSet RS1{RangeFactory, Min1, Max1};

const llvm::APSInt &Min2 = ValueFactory.getValue(Min);
const llvm::APSInt &Max2 = ValueFactory.getValue(NonNegative ? N1 : Zero);
RangeSet RS2{RangeFactory, Min2, Max2};

return RangeFactory.unite(RS1, RS2);
};

if (!LHS.Includes(Zero))
return ComputeModuloN(LHS.From(), LHS.To(), Modulo);

// split over [From, -1] U [0, To] for easy handling.
return RangeFactory.unite(ComputeModuloN(LHS.From(), -One, Modulo),
ComputeModuloN(Zero, LHS.To(), Modulo));
}

template <>
RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Div>(Range LHS,
Range RHS,
QualType T) {
const llvm::APSInt *DividerOrNull = RHS.getConcreteValue();
if (!DividerOrNull)
return infer(T);

const llvm::APSInt &D = *DividerOrNull;

APSIntType ResultType = ValueFactory.getAPSIntType(T);
llvm::APSInt Zero = ResultType.getZeroValue();
llvm::APSInt One = ResultType.getValue(1);
if (D == Zero)
return RangeFactory.getEmptySet();

if (D == -One) { // might overflow
if (LHS.To().isMinSignedValue())
return {RangeFactory, LHS.To(), LHS.To()};
else if (LHS.From().isMinSignedValue()) {
const llvm::APSInt &From = ValueFactory.getValue((LHS.From() + One) / D);
const llvm::APSInt &To = ValueFactory.getValue(LHS.To() / D);
RangeSet RS{RangeFactory, To, From};
return RangeFactory.unite(RS, LHS.From());
}
}

const llvm::APSInt &From = ValueFactory.getValue(LHS.From() / D);
const llvm::APSInt &To = ValueFactory.getValue(LHS.To() / D);
return {RangeFactory, D < Zero ? To : From, D < Zero ? From : To};
}

RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
BinaryOperator::Opcode Op,
RangeSet RHS, QualType T) {
Expand All @@ -1842,6 +1948,8 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
return VisitBinaryOperator<BO_And>(LHS, RHS, T);
case BO_Rem:
return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
case BO_Div:
return VisitBinaryOperator<BO_Div>(LHS, RHS, T);
default:
return infer(T);
}
Expand Down Expand Up @@ -2073,13 +2181,62 @@ class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
if (Sym->getOpcode() != BO_Rem)
return true;
// a % b != 0 implies that a != 0.
// Z3 verification:
// (declare-const a Int)
// (declare-const m Int)
// (assert (not (= m 0)))
// (assert (not (= (mod a m) 0)))
// (assert (= a 0))
// (check-sat)
// ; unsat
if (!Constraint.containsZero()) {
SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
State = State->assume(*NonLocSymSVal, true);
if (!State)
return false;
}
} else if (const auto *SIE = dyn_cast<SymIntExpr>(Sym);
SIE && Constraint.encodesFalseRange()) {
// a % m == 0 && a in [x, y] && y - x < m implies that
// a = (y < 0 ? x : y) / m * m which is a 'ConcreteInt'
// where x and m are 'ConcreteInt'.
//
// Z3 verification:
// (declare-const a Int)
// (declare-const m Int)
// (declare-const x Int)
// (declare-const y Int)
// (assert (= (mod a m) 0))
// (assert (< (- y x) m))
// (assert (and (>= a x) (<= a y)))
// (assert (not (= a (* (div y m) m))))
// (check-sat)
// ; unsat
BasicValueFactory &ValueFactory = RangeFactory.getValueFactory();
APSIntType ResultType = ValueFactory.getAPSIntType(SIE->getType());
llvm::APSInt N = SIE->getRHS();
if (N < 0)
N = -N;
N = ResultType.convert(N);

SymbolRef Sym = SIE->getLHS();
RangeSet RS = SymbolicRangeInferrer::inferRange(RangeFactory, State, Sym);
if (RS.size() == 1) {
Range R = *RS.begin();
llvm::APSInt Distance = ResultType.convert(R.To()).extend(64) -
ResultType.convert(R.From()).extend(64);
if (Distance < 0) // overflows
return true;

if (Distance < N.extend(64)) {
const llvm::APSInt &Point = ValueFactory.getValue(
(ResultType.convert(R.To() > 0 ? R.To() : R.From())) / N * N);
State = assign(Sym, {RangeFactory, Point, Point});
if (!State)
return false;
}
}
}
return true;
}
Expand Down
108 changes: 108 additions & 0 deletions clang/test/Analysis/constraint-assignor.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

void clang_analyzer_warnIfReached(void);
void clang_analyzer_eval(int);
void clang_analyzer_dump(int);

void rem_constant_rhs_ne_zero(int x, int y) {
if (x % 3 == 0) // x % 3 != 0 -> x != 0
Expand Down Expand Up @@ -82,3 +83,110 @@ void remainder_with_adjustment_of_composit_lhs(int x, int y) {
clang_analyzer_eval(x + y != -1); // expected-warning{{TRUE}}
(void)(x * y); // keep the constraints alive.
}

void remainder_infeasible_positive_range(int x) {
if (x <= 2 || x >= 5)
return;
if (x % 5 != 0)
return;
clang_analyzer_warnIfReached(); // no-warning
(void)x; // keep the constraints alive.
}

void remainder_infeasible_negative_range(int x) {
if (x <= -14 || x >= -1)
return;
if (x % 15 != 0)
return;
clang_analyzer_warnIfReached(); // no-warning
(void)x; // keep the constraints alive.
}

void remainder_within_modulo_positive_range_unsigned_1(unsigned x) {
if (x <= 2 || x > 6)
return;
if (x % 5 != 0)
return;
clang_analyzer_dump(x); // expected-warning{{5 S32b}}
(void)x; // keep the constraints alive.
}

void remainder_within_modulo_positive_range_unsigned_2(unsigned char x) {
if (x < 252 || x > 254)
return;
if (x % 5 != 0)
return;
clang_analyzer_dump(x); // no-warning
(void)x; // keep the constraints alive.
}

void remainder_within_modulo_positive_range_unsigned_3(unsigned x) {
if (x < 4294967289 || x > 4294967294)
return;
if (x % 10 != 0)
return;
clang_analyzer_eval(x == 4294967290); // expected-warning{{TRUE}}
(void)x; // keep the constraints alive.
}

void remainder_within_modulo_positive_range(int x) {
if (x <= 2 || x > 6)
return;
if (x % 5 != 0)
return;
clang_analyzer_dump(x); // expected-warning{{5 S32b}}
(void)x; // keep the constraints alive.
}

void remainder_within_modulo_range_spans_zero(int x) {
if (x <= -2 || x > 2)
return;
if (x % 5 != 0)
return;
clang_analyzer_dump(x); // expected-warning{{0 S32b}}
(void)x; // keep the constraints alive.
}

void remainder_within_modulo_negative_range(int x) {
if (x <= -7 || x > -2)
return;
if (x % 5 != 0)
return;
clang_analyzer_dump(x); // expected-warning{{-5 S32b}}
(void)x; // keep the constraints alive.
}

void remainder_within_modulo_range_neg_mod(int x) {
if (x <= 2 || x > 6)
return;
if (x % -5 != 0)
return;
clang_analyzer_dump(x); // expected-warning{{5 S32b}}
(void)x; // keep the constraints alive.
}

#define LONG_MAX 0x7fffffffffffffffLL
#define LONG_MIN (-LONG_MAX - 1LL)

void remainder_within_modulo_distance_overflow(long long x) {
if (x < LONG_MIN + 1 || x > LONG_MAX - 1)
return;

if (x % 10 != 0)
return;
clang_analyzer_dump(x); // expected-warning{{reg_$0<long long x>}}
(void)x; // keep the constraints alive.
}

#define CHAR_MAX 0x7f
#define CHAR_MIN (-CHAR_MAX - 1)

void remainder_within_modulo_not_overflow(char x) {
if (x < CHAR_MIN + 1 || x > CHAR_MAX - 1)
return;

if (x % (CHAR_MAX * 2) != 0)
return;
clang_analyzer_dump(x); // expected-warning{{0 S32b}}
(void)x; // keep the constraints alive.
}
64 changes: 64 additions & 0 deletions clang/test/Analysis/constraint-infer-div.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// RUN: %clang_analyze_cc1 %s \
// RUN: -analyzer-checker=core \
// RUN: -analyzer-checker=debug.ExprInspection \
// RUN: -verify

void clang_analyzer_eval(int);

void div_infer_positive_divider(int x) {
if (x < 1 || x > 5)
return;

int i = x / 2;
clang_analyzer_eval(i >= 0 && i <= 2); // expected-warning{{TRUE}}
}

void div_infer_negative_divider_positive_range(int x) {
if (x < 1 || x > 2)
return;

int i = x / -2;
clang_analyzer_eval(i >= -1 && i <= 0); // expected-warning{{TRUE}}
}

void div_infer_negative_divider_negative_range(int x) {
if (x < -2 || x > 0)
return;

int i = x / -2;
clang_analyzer_eval(i >= 0 && i <= 1); // expected-warning{{TRUE}}
}

void div_infer_positive_divider_positive_range(int x) {
if (x < 0 || x > 5)
return;

int i = x / 6;
clang_analyzer_eval(i == 0); // expected-warning{{TRUE}}
}

#define LONG_MAX 0x7fffffffffffffffLL
#define LONG_MIN (-LONG_MAX - 1LL)

void div_infer_overflow_long(long long x) {
if (x > LONG_MIN + 1)
return;

// x in [LONG_MIN, LONG_MIN + 1]
clang_analyzer_eval(x >= LONG_MIN && x <= LONG_MIN + 1); // expected-warning{{TRUE}}
long long i = x / -1;
clang_analyzer_eval(i == LONG_MIN || i == LONG_MAX); // expected-warning{{TRUE}}
}

#define INT_MAX 0x7fffffff
#define INT_MIN (-INT_MAX - 1)

void div_infer_overflow_int(int x) {
if (x > INT_MIN + 1)
return;

// x in [INT_MIN, INT_MIN + 1]
clang_analyzer_eval(x >= INT_MIN && x <= INT_MIN + 1); // expected-warning{{TRUE}}
int i = x / -1;
clang_analyzer_eval(i == INT_MIN || i == INT_MAX); // expected-warning{{TRUE}}
}
Loading