Skip to content

Commit 17fc5a2

Browse files
committed
Implemented hexadecimal, octal and binary literals. Also use bigint in the lexer to store big integer literals.
1 parent 0a72409 commit 17fc5a2

File tree

10 files changed

+218
-47
lines changed

10 files changed

+218
-47
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

parser/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,6 @@ lalrpop="0.15.1"
1111
lalrpop-util="0.15.1"
1212
log="0.4.1"
1313
regex="0.2.2"
14+
num-bigint = "0.2"
15+
num-traits = "0.2"
1416

parser/src/ast.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
//! Roughly equivalent to this: https://docs.python.org/3/library/ast.html
44
55
pub use super::lexer::Location;
6+
use num_bigint::BigInt;
67
/*
78
#[derive(Debug)]
89
@@ -305,7 +306,7 @@ pub enum Comparison {
305306

306307
#[derive(Debug, PartialEq)]
307308
pub enum Number {
308-
Integer { value: i32 },
309+
Integer { value: BigInt },
309310
Float { value: f64 },
310311
Complex { real: f64, imag: f64 },
311312
}

parser/src/lexer.rs

Lines changed: 153 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
//! code is translated into seperate tokens.
33
44
pub use super::token::Tok;
5+
use num_bigint::BigInt;
6+
use num_traits::Num;
57
use std::collections::HashMap;
68
use std::str::FromStr;
79

@@ -280,10 +282,56 @@ where
280282
}
281283

282284
fn lex_number(&mut self) -> Spanned<Tok> {
285+
let start_pos = self.get_pos();
286+
if self.chr0 == Some('0') {
287+
if self.chr1 == Some('x') || self.chr1 == Some('X') {
288+
// Hex!
289+
self.next_char();
290+
self.next_char();
291+
self.lex_number_radix(start_pos, 16)
292+
} else if self.chr1 == Some('o') || self.chr1 == Some('O') {
293+
// Octal style!
294+
self.next_char();
295+
self.next_char();
296+
self.lex_number_radix(start_pos, 8)
297+
} else if self.chr1 == Some('b') || self.chr1 == Some('B') {
298+
// Binary!
299+
self.next_char();
300+
self.next_char();
301+
self.lex_number_radix(start_pos, 2)
302+
} else {
303+
self.lex_normal_number()
304+
}
305+
} else {
306+
self.lex_normal_number()
307+
}
308+
}
309+
310+
fn lex_number_radix(&mut self, start_pos: Location, radix: u32) -> Spanned<Tok> {
283311
let mut value_text = String::new();
284312

313+
loop {
314+
if self.is_number(radix) {
315+
value_text.push(self.next_char().unwrap());
316+
} else if self.chr0 == Some('_') {
317+
self.next_char();
318+
} else {
319+
break;
320+
}
321+
}
322+
323+
let end_pos = self.get_pos();
324+
let value = BigInt::from_str_radix(&value_text, radix).unwrap();
325+
Ok((start_pos, Tok::Int { value: value }, end_pos))
326+
}
327+
328+
fn lex_normal_number(&mut self) -> Spanned<Tok> {
285329
let start_pos = self.get_pos();
286-
while self.is_number() {
330+
331+
let mut value_text = String::new();
332+
333+
// Normal number:
334+
while self.is_number(10) {
287335
value_text.push(self.next_char().unwrap());
288336
}
289337

@@ -292,7 +340,7 @@ where
292340
// Take '.':
293341
if self.chr0 == Some('.') {
294342
value_text.push(self.next_char().unwrap());
295-
while self.is_number() {
343+
while self.is_number(10) {
296344
value_text.push(self.next_char().unwrap());
297345
}
298346
}
@@ -306,18 +354,47 @@ where
306354
value_text.push(self.next_char().unwrap());
307355
}
308356

309-
while self.is_number() {
357+
while self.is_number(10) {
310358
value_text.push(self.next_char().unwrap());
311359
}
312360
}
313361

314-
let end_pos = self.get_pos();
315362
let value = f64::from_str(&value_text).unwrap();
316-
Ok((start_pos, Tok::Float { value: value }, end_pos))
363+
// Parse trailing 'j':
364+
if self.chr0 == Some('j') {
365+
self.next_char();
366+
let end_pos = self.get_pos();
367+
Ok((
368+
start_pos,
369+
Tok::Complex {
370+
real: 0.0,
371+
imag: value,
372+
},
373+
end_pos,
374+
))
375+
} else {
376+
let end_pos = self.get_pos();
377+
Ok((start_pos, Tok::Float { value: value }, end_pos))
378+
}
317379
} else {
318-
let end_pos = self.get_pos();
319-
let value = i32::from_str(&value_text).unwrap();
320-
Ok((start_pos, Tok::Int { value: value }, end_pos))
380+
// Parse trailing 'j':
381+
if self.chr0 == Some('j') {
382+
self.next_char();
383+
let end_pos = self.get_pos();
384+
let imag = f64::from_str(&value_text).unwrap();
385+
Ok((
386+
start_pos,
387+
Tok::Complex {
388+
real: 0.0,
389+
imag: imag,
390+
},
391+
end_pos,
392+
))
393+
} else {
394+
let end_pos = self.get_pos();
395+
let value = value_text.parse::<BigInt>().unwrap();
396+
Ok((start_pos, Tok::Int { value: value }, end_pos))
397+
}
321398
}
322399
}
323400

@@ -445,10 +522,25 @@ where
445522
}
446523
}
447524

448-
fn is_number(&self) -> bool {
449-
match self.chr0 {
450-
Some('0'...'9') => return true,
451-
_ => return false,
525+
fn is_number(&self, radix: u32) -> bool {
526+
match radix {
527+
2 => match self.chr0 {
528+
Some('0'...'1') => return true,
529+
_ => return false,
530+
},
531+
8 => match self.chr0 {
532+
Some('0'...'7') => return true,
533+
_ => return false,
534+
},
535+
10 => match self.chr0 {
536+
Some('0'...'9') => return true,
537+
_ => return false,
538+
},
539+
16 => match self.chr0 {
540+
Some('0'...'9') | Some('a'...'f') | Some('A'...'F') => return true,
541+
_ => return false,
542+
},
543+
x => unimplemented!("Radix not implemented: {}", x),
452544
}
453545
}
454546

@@ -920,6 +1012,7 @@ where
9201012
#[cfg(test)]
9211013
mod tests {
9221014
use super::{make_tokenizer, NewlineHandler, Tok};
1015+
use num_bigint::BigInt;
9231016
use std::iter::FromIterator;
9241017
use std::iter::Iterator;
9251018

@@ -959,14 +1052,46 @@ mod tests {
9591052
);
9601053
}
9611054

1055+
#[test]
1056+
fn test_numbers() {
1057+
let source = String::from("0x2f 0b1101 0 123 0.2 2j 2.2j");
1058+
let tokens = lex_source(&source);
1059+
assert_eq!(
1060+
tokens,
1061+
vec![
1062+
Tok::Int {
1063+
value: BigInt::from(47),
1064+
},
1065+
Tok::Int {
1066+
value: BigInt::from(13),
1067+
},
1068+
Tok::Int {
1069+
value: BigInt::from(0),
1070+
},
1071+
Tok::Int {
1072+
value: BigInt::from(123),
1073+
},
1074+
Tok::Float { value: 0.2 },
1075+
Tok::Complex {
1076+
real: 0.0,
1077+
imag: 2.0,
1078+
},
1079+
Tok::Complex {
1080+
real: 0.0,
1081+
imag: 2.2,
1082+
},
1083+
]
1084+
);
1085+
}
1086+
9621087
macro_rules! test_line_comment {
9631088
($($name:ident: $eol:expr,)*) => {
9641089
$(
9651090
#[test]
9661091
fn $name() {
9671092
let source = String::from(format!(r"99232 # {}", $eol));
9681093
let tokens = lex_source(&source);
969-
assert_eq!(tokens, vec![Tok::Int { value: 99232 }]);
1094+
assert_eq!(tokens, vec![Tok::Int { value: BigInt::from(99232) }]);
9701095
}
9711096
)*
9721097
}
@@ -989,9 +1114,9 @@ mod tests {
9891114
assert_eq!(
9901115
tokens,
9911116
vec![
992-
Tok::Int { value: 123 },
1117+
Tok::Int { value: BigInt::from(123) },
9931118
Tok::Newline,
994-
Tok::Int { value: 456 },
1119+
Tok::Int { value: BigInt::from(456) },
9951120
]
9961121
)
9971122
}
@@ -1016,11 +1141,17 @@ mod tests {
10161141
name: String::from("avariable"),
10171142
},
10181143
Tok::Equal,
1019-
Tok::Int { value: 99 },
1144+
Tok::Int {
1145+
value: BigInt::from(99)
1146+
},
10201147
Tok::Plus,
1021-
Tok::Int { value: 2 },
1148+
Tok::Int {
1149+
value: BigInt::from(2)
1150+
},
10221151
Tok::Minus,
1023-
Tok::Int { value: 0 },
1152+
Tok::Int {
1153+
value: BigInt::from(0)
1154+
},
10241155
]
10251156
);
10261157
}
@@ -1045,7 +1176,7 @@ mod tests {
10451176
Tok::Newline,
10461177
Tok::Indent,
10471178
Tok::Return,
1048-
Tok::Int { value: 99 },
1179+
Tok::Int { value: BigInt::from(99) },
10491180
Tok::Newline,
10501181
Tok::Dedent,
10511182
]
@@ -1088,7 +1219,7 @@ mod tests {
10881219
Tok::Newline,
10891220
Tok::Indent,
10901221
Tok::Return,
1091-
Tok::Int { value: 99 },
1222+
Tok::Int { value: BigInt::from(99) },
10921223
Tok::Newline,
10931224
Tok::Dedent,
10941225
Tok::Dedent,
@@ -1120,9 +1251,9 @@ mod tests {
11201251
},
11211252
Tok::Equal,
11221253
Tok::Lsqb,
1123-
Tok::Int { value: 1 },
1254+
Tok::Int { value: BigInt::from(1) },
11241255
Tok::Comma,
1125-
Tok::Int { value: 2 },
1256+
Tok::Int { value: BigInt::from(2) },
11261257
Tok::Rsqb,
11271258
Tok::Newline,
11281259
]

parser/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#[macro_use]
22
extern crate log;
33

4+
extern crate num_bigint;
5+
extern crate num_traits;
6+
47
pub mod ast;
58
pub mod lexer;
69
pub mod parser;

0 commit comments

Comments
 (0)