Skip to content

Rust: Add LiteralExpr sub classes #19475

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/** Provides sub classes of literal expressions. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the Ext suffix in the filename indicate? I can't find any existing files with that suffix.

My first thought was it meant that these could be _ext_ended, but that's not the case.

Copy link
Contributor

@geoffw0 geoffw0 May 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it just means the file is an extension to the functionality provided in LiteralExpr.qll.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed.


private import internal.LiteralExprImpl

final class CharLiteralExpr = Impl::CharLiteralExpr;

final class StringLiteralExpr = Impl::StringLiteralExpr;

final class NumberLiteralExpr = Impl::NumberLiteralExpr;

final class IntegerLiteralExpr = Impl::IntegerLiteralExpr;

final class FloatLiteralExpr = Impl::FloatLiteralExpr;

final class BooleanLiteralExpr = Impl::BooleanLiteralExpr;
178 changes: 178 additions & 0 deletions rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,182 @@ module Impl {
)
}
}

/**
* A [character literal][1]. For example:
*
* ```rust
* 'x';
* ```
*
* [1]: https://doc.rust-lang.org/reference/tokens.html#character-literals
*/
class CharLiteralExpr extends LiteralExpr {
CharLiteralExpr() { this.getTextValue().regexpMatch("'.*'") }

override string getAPrimaryQlClass() { result = "CharLiteralExpr" }
}

/**
* A [string literal][1]. For example:
*
* ```rust
* "Hello, world!";
* ```
*
* [1]: https://doc.rust-lang.org/reference/tokens.html#string-literals
*/
class StringLiteralExpr extends LiteralExpr {
StringLiteralExpr() { this.getTextValue().regexpMatch("r?#*\".*\"#*") }

override string getAPrimaryQlClass() { result = "StringLiteralExpr" }
}

/**
* A number literal.
*/
abstract class NumberLiteralExpr extends LiteralExpr { }

// https://doc.rust-lang.org/reference/tokens.html#integer-literals
private module IntegerLiteralRegexs {
bindingset[s]
string paren(string s) { result = "(?:" + s + ")" }

string integerLiteral() {
result =
paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" +
paren(hexLiteral())) + "(" + suffix() + ")?"
}

private string suffix() { result = "u8|i8|u16|i16|u32|i32|u64|i64|u128|i128|usize|isize" }

string decLiteral() { result = decDigit() + "(?:" + decDigit() + "|_)*" }

string binLiteral() {
result = "0b(?:" + binDigit() + "|_)*" + binDigit() + "(?:" + binDigit() + "|_)*"
}

string octLiteral() {
result = "0o(?:" + octDigit() + "|_)*" + octDigit() + "(?:" + octDigit() + "|_)*"
}

string hexLiteral() {
result = "0x(?:" + hexDigit() + "|_)*" + hexDigit() + "(?:" + hexDigit() + "|_)*"
}

string decDigit() { result = "[0-9]" }

string binDigit() { result = "[01]" }

string octDigit() { result = "[0-7]" }

string hexDigit() { result = "[0-9a-fA-F]" }
}

/**
* An [integer literal][1]. For example:
*
* ```rust
* 42;
* ```
*
* [1]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
*/
class IntegerLiteralExpr extends NumberLiteralExpr {
IntegerLiteralExpr() { this.getTextValue().regexpMatch(IntegerLiteralRegexs::integerLiteral()) }

/**
* Get the suffix of this integer literal, if any.
*
* For example, `42u8` has the suffix `u8`.
*/
string getSuffix() {
exists(string s, string reg |
s = this.getTextValue() and
reg = IntegerLiteralRegexs::integerLiteral() and
result = s.regexpCapture(reg, 1)
)
}

override string getAPrimaryQlClass() { result = "IntegerLiteralExpr" }
}

// https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
private module FloatLiteralRegexs {
private import IntegerLiteralRegexs

string floatLiteral() {
result =
paren(decLiteral() + "\\.") + "|" + paren(floatLiteralSuffix1()) + "|" +
paren(floatLiteralSuffix2())
}

string floatLiteralSuffix1() {
result = decLiteral() + "\\." + decLiteral() + "(" + suffix() + ")?"
}

string floatLiteralSuffix2() {
result =
decLiteral() + paren("\\." + decLiteral()) + "?" + paren(exponent()) + "(" + suffix() + ")?"
}

string integerSuffixLiteral() {
result =
paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" +
paren(hexLiteral())) + "(" + suffix() + ")"
}

private string suffix() { result = "f32|f64" }

string exponent() {
result =
"(?:e|E)(?:\\+|-)?(?:" + decDigit() + "|_)*" + decDigit() + "(?:" + decDigit() + "|_)*"
}
}

/**
* A [floating-point literal][1]. For example:
*
* ```rust
* 42.0;
* ```
*
* [1]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
*/
class FloatLiteralExpr extends NumberLiteralExpr {
FloatLiteralExpr() {
this.getTextValue()
.regexpMatch(IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteral()) + "|" +
IntegerLiteralRegexs::paren(FloatLiteralRegexs::integerSuffixLiteral())) and
// E.g. `0x01_f32` is an integer, not a float
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused by the cases 0x01_f32 and 0x01_e3. I understand they're supposed to be understood as integers, I'm not sure why this is so (in the Rust language) and I'm not sure how it happens (in your QL).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is exactly the restriction on this line that makes it an integer only; otherwise it would be consider a float as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see how IntegerLiteralExpr accepts 0x01_f32 when its suffix() function doesn't include f32.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0x01_f32 is just a HEX number, underscores are allowed as separators.

not this instanceof IntegerLiteralExpr
}

/**
* Get the suffix of this floating-point literal, if any.
*
* For example, `42.0f32` has the suffix `f32`.
*/
string getSuffix() {
exists(string s, string reg |
reg =
IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteralSuffix1()) + "|" +
IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteralSuffix2()) + "|" +
IntegerLiteralRegexs::paren(FloatLiteralRegexs::integerSuffixLiteral()) and
s = this.getTextValue() and
result = s.regexpCapture(reg, [1, 2, 3])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does [1, 2, 3] not end up mixing things up? Do they all three always correspond to the suffix for all the three disjuncts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They each correspond to a suffix capturing group for one of the disjuncts, so all should be good.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for being dense, but if 3 is the right capture group for one of the disjuncts why won't trying 1 or 2 give the wrong thing for that disjunct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying 1 for, say, the third disjunct will simply not give a result, since capture group 1 does not exist in that disjunct, it exists in the first disjunct.

)
}

override string getAPrimaryQlClass() { result = "FloatLiteralExpr" }
}

/**
* A Boolean literal. Either `true` or `false`.
*/
class BooleanLiteralExpr extends LiteralExpr {
BooleanLiteralExpr() { this.getTextValue() = ["false", "true"] }

override string getAPrimaryQlClass() { result = "BooleanLiteralExpr" }
}
}
1 change: 1 addition & 0 deletions rust/ql/lib/rust.qll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import codeql.rust.elements
import codeql.Locations
import codeql.files.FileSystem
import codeql.rust.elements.AssignmentOperation
import codeql.rust.elements.LiteralExprExt
import codeql.rust.elements.LogicalOperation
import codeql.rust.elements.AsyncBlockExpr
import codeql.rust.elements.Variable
Expand Down
45 changes: 45 additions & 0 deletions rust/ql/test/extractor-tests/literal/literal.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
charLiteral
| literal.rs:2:5:2:7 | 'a' |
| literal.rs:3:5:3:7 | 'b' |
| literal.rs:4:5:4:8 | '\\'' |
| literal.rs:5:5:5:8 | '\\n' |
| literal.rs:6:5:6:15 | '\\u{1F600}' |
stringLiteral
| literal.rs:11:5:11:9 | "foo" |
| literal.rs:12:5:12:10 | r"foo" |
| literal.rs:13:5:13:13 | "\\"foo\\"" |
| literal.rs:14:5:14:14 | r#""foo""# |
| literal.rs:16:5:16:18 | "foo #\\"# bar" |
| literal.rs:17:5:17:22 | r##"foo #"# bar"## |
| literal.rs:19:5:19:10 | "\\x52" |
| literal.rs:20:5:20:7 | "R" |
| literal.rs:21:5:21:8 | r"R" |
| literal.rs:22:5:22:11 | "\\\\x52" |
| literal.rs:23:5:23:11 | r"\\x52" |
integerLiteral
| literal.rs:28:5:28:7 | 123 | |
| literal.rs:29:5:29:10 | 123i32 | i32 |
| literal.rs:30:5:30:10 | 123u32 | u32 |
| literal.rs:31:5:31:11 | 123_u32 | u32 |
| literal.rs:33:5:33:8 | 0xff | |
| literal.rs:34:5:34:11 | 0xff_u8 | u8 |
| literal.rs:35:5:35:12 | 0x01_f32 | |
| literal.rs:36:5:36:11 | 0x01_e3 | |
| literal.rs:38:5:38:8 | 0o70 | |
| literal.rs:39:5:39:12 | 0o70_i16 | i16 |
| literal.rs:41:5:41:25 | 0b1111_1111_1001_0000 | |
| literal.rs:42:5:42:28 | 0b1111_1111_1001_0000i64 | i64 |
| literal.rs:43:5:43:15 | 0b________1 | |
| literal.rs:45:5:45:10 | 0usize | usize |
| literal.rs:48:5:49:10 | 128_i8 | i8 |
| literal.rs:50:5:51:10 | 256_u8 | u8 |
floatLiteral
| literal.rs:56:5:56:8 | 5f32 | f32 |
| literal.rs:58:5:58:12 | 123.0f64 | f64 |
| literal.rs:59:5:59:10 | 0.1f64 | f64 |
| literal.rs:60:5:60:10 | 0.1f32 | f32 |
| literal.rs:61:5:61:14 | 12E+99_f64 | f64 |
| literal.rs:62:18:62:19 | 2. | |
booleanLiteral
| literal.rs:66:5:66:8 | true |
| literal.rs:67:5:67:9 | false |
13 changes: 13 additions & 0 deletions rust/ql/test/extractor-tests/literal/literal.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import rust

query predicate charLiteral(CharLiteralExpr e) { any() }

query predicate stringLiteral(StringLiteralExpr e) { any() }

query predicate integerLiteral(IntegerLiteralExpr e, string suffix) {
suffix = concat(e.getSuffix())
}

query predicate floatLiteral(FloatLiteralExpr e, string suffix) { suffix = concat(e.getSuffix()) }

query predicate booleanLiteral(BooleanLiteralExpr e) { any() }
68 changes: 68 additions & 0 deletions rust/ql/test/extractor-tests/literal/literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
fn char_literals() {
'a';
'b';
'\'';
'\n';
'\u{1F600}';
}

fn string_literals() {
// from https://doc.rust-lang.org/reference/tokens.html#string-literals
"foo";
r"foo"; // foo
"\"foo\"";
r#""foo""#; // "foo"

"foo #\"# bar";
r##"foo #"# bar"##; // foo #"# bar

"\x52";
"R";
r"R"; // R
"\\x52";
r"\x52"; // \x52
}

fn integer_literals() {
// from https://doc.rust-lang.org/reference/tokens.html#integer-literals
123;
123i32;
123u32;
123_u32;

0xff;
0xff_u8;
0x01_f32; // integer 7986, not floating-point 1.0
0x01_e3; // integer 483, not floating-point 1000.0

0o70;
0o70_i16;

0b1111_1111_1001_0000;
0b1111_1111_1001_0000i64;
0b________1;

0usize;

// These are too big for their type, but are accepted as literal expressions.
#[allow(overflowing_literals)]
128_i8;
#[allow(overflowing_literals)]
256_u8;
}

fn float_literals() {
// This is an integer literal, accepted as a floating-point literal expression.
5f32;

123.0f64;
0.1f64;
0.1f32;
12E+99_f64;
let x: f64 = 2.;
}

fn boolean_literals() {
true;
false;
}