Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lex: Handle the char variable initalization #5

Merged
merged 1 commit into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ impl Iterator for Lexer {
Ok(transition) => transition,
Err(err) => {
self.errors.push(err.clone());
match err {
return match err {
LexerError::UnexpectedToken(token) => {
error!("Unexpected token: {}", token);
// TODO: return a transition to continue lexing (for error recovery)
return None;
None
}
}
};
}
};
let (state, transition_kind) = transition.into_parts();
Expand Down Expand Up @@ -133,7 +133,7 @@ mod tests {
#[test]
fn identifier() {
let fs_files = collect_fs_files("./testdata/identifier", true);
assert_eq!(fs_files.len(), 17);
assert_eq!(fs_files.len(), 18);

for path in fs_files {
info!("file -> {:?}", path);
Expand Down
20 changes: 18 additions & 2 deletions src/lexer/states.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use super::Lexer;
use super::LexerError;
use crate::lexer::token::Token;
use crate::lexer::token::TokenKind;
use crate::lexer::token::TokenKind::TokenTick;
use std::fmt::Debug;

pub trait State: Debug {
Expand Down Expand Up @@ -137,7 +138,7 @@ pub struct StateWord;
impl State for StateWord {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some(c) if c.is_alphanumeric() || c.eq(&'_') => Ok(Lexer::proceed(
Some(c) if c.is_alphabetic() || c.eq(&'_') => Ok(Lexer::proceed(
Box::new(StateWord),
TransitionKind::AdvanceOffset,
)),
Expand All @@ -162,14 +163,29 @@ pub struct StateSymbol;

impl StateSymbol {
fn is_symbol(c: char) -> bool {
matches!(c, ':' | '=' | '\n')
matches!(c, ':' | '=' | '\n' | '\'')
}
}

impl State for StateSymbol {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some('\n') => {
let lexeme = cursor.source().content()[cursor.index()..cursor.offset()].to_string();
let token_kind = TokenKind::from(&lexeme);
// NOTE: if a '\n' is found and it was scanning another "symbol" token, the previous was mangled, and only the '\n' is emitted,
// right now we need to handle only TokenTick since can be at the end of the line, but this can be extended to other symbols
if token_kind == TokenTick {
return Ok(Lexer::proceed(
Box::new(StateStart),
TransitionKind::EmitToken(Token::new(
token_kind,
lexeme,
cursor.location().clone(),
)),
));
}

let transition = Lexer::proceed(
Box::new(StateStart),
TransitionKind::EmitToken(Token::new(
Expand Down
15 changes: 11 additions & 4 deletions src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::path::{Path, PathBuf};
const KEYWORD_INT: &str = "int";
const KEYWORD_FLOAT: &str = "float";
const KEYWORD_BOOL: &str = "bool";
const KEYWORD_CHAR: &str = "char";
const KEYWORD_BOOL_TRUE: &str = "true";
const KEYWORD_BOOL_FALSE: &str = "false";
const SEPARATOR_COLON: &str = ":";
Expand All @@ -15,6 +16,7 @@ pub enum Literal {
Int(i64),
Float(f64),
Bool(bool),
Char(char),
}

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
Expand All @@ -25,6 +27,7 @@ pub enum TokenKind {
TokenNewLine, // \n
TokenColon, // :
TokenAssign, // =
TokenTick, // '
TokenEOF, // End of file
}

Expand All @@ -34,6 +37,7 @@ impl TokenKind {
KEYWORD_INT => Some(TokenKind::TokenKeyword),
KEYWORD_FLOAT => Some(TokenKind::TokenKeyword),
KEYWORD_BOOL => Some(TokenKind::TokenKeyword),
KEYWORD_CHAR => Some(TokenKind::TokenKeyword),
KEYWORD_BOOL_TRUE => Some(TokenKind::TokenLiteral(Literal::Bool(true))),
KEYWORD_BOOL_FALSE => Some(TokenKind::TokenLiteral(Literal::Bool(false))),
_ => None,
Expand All @@ -42,9 +46,10 @@ impl TokenKind {

fn match_number(lexeme: &str) -> Option<TokenKind> {
if lexeme.chars().all(char::is_numeric) {
return Some(TokenKind::TokenLiteral(Literal::Int(
lexeme.parse().unwrap(),
)));
return match lexeme.parse() {
Ok(value) => Some(TokenKind::TokenLiteral(Literal::Int(value))),
Err(_) => None,
};
}

if lexeme.contains('.') {
Expand All @@ -60,6 +65,7 @@ impl TokenKind {
match lexeme {
SEPARATOR_COLON => Some(TokenKind::TokenColon),
SEPARATOR_ASSIGN => Some(TokenKind::TokenAssign),
"'" => Some(TokenKind::TokenTick),
_ => None,
}
}
Expand All @@ -86,7 +92,6 @@ impl From<&String> for TokenKind {
TokenKind::TokenIdentifier
}
}

/// The location of a token in the source code in a uman-readable format
#[derive(Debug, Clone, Eq, PartialEq, Deserialize, Serialize)]
pub struct TokenLocation {
Expand Down Expand Up @@ -231,6 +236,7 @@ impl std::fmt::Display for Literal {
Literal::Int(value) => write!(f, "Int({})", value),
Literal::Float(value) => write!(f, "Float({})", value),
Literal::Bool(value) => write!(f, "Bool({})", value),
Literal::Char(value) => write!(f, "Char({})", value),
}
}
}
Expand All @@ -243,6 +249,7 @@ impl std::fmt::Display for TokenKind {
TokenKind::TokenNewLine => write!(f, "TokenNewLine"),
TokenKind::TokenColon => write!(f, "TokenColon"),
TokenKind::TokenAssign => write!(f, "TokenAssign"),
TokenKind::TokenTick => write!(f, "TokenTick"),
TokenKind::TokenEOF => write!(f, "TokenEOF"),
}
}
Expand Down
1 change: 1 addition & 0 deletions testdata/identifier/id_char_assign.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
my_char: char = 'a'
11 changes: 11 additions & 0 deletions testdata/identifier/id_char_assign.tokens
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[
{"kind": "TokenIdentifier","lexeme": "my_char","location": {"file_path": "","line": 0,"column_start": 0,"column_end": 7}},
{"kind": "TokenColon","lexeme": ":","location": {"file_path": "","line": 0,"column_start": 7,"column_end": 8}},
{"kind": "TokenKeyword","lexeme": "char","location": {"file_path": "","line": 0,"column_start": 9,"column_end": 13}},
{"kind": "TokenAssign","lexeme": "=","location": {"file_path": "","line": 0,"column_start": 14,"column_end": 15}},
{"kind": "TokenTick","lexeme": "'","location": {"file_path": "","line": 0,"column_start": 16,"column_end": 17}},
{"kind": "TokenIdentifier","lexeme": "a","location": {"file_path": "","line": 0,"column_start": 17,"column_end": 18}},
FedericoBruzzone marked this conversation as resolved.
Show resolved Hide resolved
{"kind": "TokenTick","lexeme": "'","location": {"file_path": "","line": 0,"column_start": 18,"column_end": 19}},
{"kind": "TokenNewLine","lexeme": "\\n","location": {"file_path": "","line": 0,"column_start": 19,"column_end": 19}},
{"kind": "TokenEOF","lexeme": "","location": {"file_path": "","line": 1,"column_start": 0,"column_end": 0}}
]