make_tokenizer -> lex to integrate terms

youknowone · youknowone · commit a8d4de27ede1 · 2023-02-22T20:28:15.000+09:00
we don't distinguish scanner or tokenizer from lexer
diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs
@@ -4,20 +4,20 @@
 //! governing what is and is not a valid token are defined in the Python reference
 //! guide section on [Lexical analysis].
 //!
-//! The primary function in this module is [`make_tokenizer`], which takes a string slice
+//! The primary function in this module is [`lex`], which takes a string slice
 //! and returns an iterator over the tokens in the source code. The tokens are currently returned
 //! as a `Result<Spanned, LexicalError>`, where [`Spanned`] is a tuple containing the
 //! start and end [`Location`] and a [`Tok`] denoting the token.
 //!
 //! # Example
 //!
 //! ```
-//! use rustpython_parser::lexer::{make_tokenizer, Tok};
+//! use rustpython_parser::lexer::{lex, Tok};
 //! use rustpython_parser::mode::Mode;
 //! use rustpython_parser::token::StringKind;
 //!
 //! let source = "x = 'RustPython'";
-//! let tokens = make_tokenizer(source, Mode::Module)
+//! let tokens = lex(source, Mode::Module)
 //!     .map(|tok| tok.expect("Failed to lex"))
 //!     .collect::<Vec<_>>();
 //!
@@ -195,29 +195,29 @@ pub type Spanned = (Location, Tok, Location);
 /// The result of lexing a token.
 pub type LexResult = Result<Spanned, LexicalError>;
 
-/// Create a new tokenizer from a source string.
+/// Create a new lexer from a source string.
 ///
 /// # Examples
 ///
 /// ```
 /// use rustpython_parser::mode::Mode;
-/// use rustpython_parser::lexer::{make_tokenizer};
+/// use rustpython_parser::lexer::{lex};
 ///
 /// let source = "def hello(): return 'world'";
-/// let tokenizer = make_tokenizer(source, Mode::Module);
+/// let lexer = lex(source, Mode::Module);
 ///
-/// for token in tokenizer {
+/// for token in lexer {
 ///    println!("{:?}", token);
 /// }
 /// ```
 #[inline]
-pub fn make_tokenizer(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
-    make_tokenizer_located(source, mode, Location::default())
+pub fn lex(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
+    lex_located(source, mode, Location::default())
 }
 
-/// Create a new tokenizer from a source string, starting at a given location.
-/// You probably want to use [`make_tokenizer`] instead.
-pub fn make_tokenizer_located(
+/// Create a new lexer from a source string, starting at a given location.
+/// You probably want to use [`lex`] instead.
+pub fn lex_located(
     source: &str,
     mode: Mode,
     start_location: Location,
@@ -230,7 +230,7 @@ where
     T: Iterator<Item = char>,
 {
     /// Create a new lexer from T and a starting location. You probably want to use
-    /// [`make_tokenizer`] instead.
+    /// [`lex`] instead.
     pub fn new(input: T, start: Location) -> Self {
         let mut lxr = Lexer {
             at_begin_of_line: true,
@@ -1320,7 +1320,7 @@ impl std::fmt::Display for LexicalErrorType {
 
 #[cfg(test)]
 mod tests {
-    use super::{make_tokenizer, StringKind, Tok};
+    use super::{lex, StringKind, Tok};
     use crate::mode::Mode;
     use num_bigint::BigInt;
 
@@ -1329,7 +1329,7 @@ mod tests {
     const UNIX_EOL: &str = "\n";
 
     pub fn lex_source(source: &str) -> Vec<Tok> {
-        let lexer = make_tokenizer(source, Mode::Module);
+        let lexer = lex(source, Mode::Module);
         lexer.map(|x| x.unwrap().1).collect()
     }
 
diff --git a/parser/src/lib.rs b/parser/src/lib.rs
@@ -67,28 +67,28 @@
 //!
 //! ```
 //! use rustpython_parser::mode::Mode;
-//! use rustpython_parser::lexer::make_tokenizer;
+//! use rustpython_parser::lexer::lex;
 //!
 //! let python_source = r#"
 //! def is_odd(i):
 //!     return bool(i & 1)
 //! "#;
-//! let mut tokens = make_tokenizer(python_source, Mode::Module);
+//! let mut tokens = lex(python_source, Mode::Module);
 //! assert!(tokens.all(|t| t.is_ok()));
 //! ```
 //!
 //! These tokens can be directly fed into the parser to generate an AST:
 //!
 //! ```
-//! use rustpython_parser::lexer::make_tokenizer;
+//! use rustpython_parser::lexer::lex;
 //! use rustpython_parser::mode::Mode;
 //! use rustpython_parser::parser::parse_tokens;
 //!
 //! let python_source = r#"
 //! def is_odd(i):
 //!    return bool(i & 1)
 //! "#;
-//! let tokens = make_tokenizer(python_source, Mode::Module);
+//! let tokens = lex(python_source, Mode::Module);
 //! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
 //!
 //! assert!(ast.is_ok());
diff --git a/parser/src/parser.rs b/parser/src/parser.rs
@@ -164,7 +164,7 @@ pub fn parse_located(
     source_path: &str,
     location: Location,
 ) -> Result<ast::Mod, ParseError> {
-    let lxr = lexer::make_tokenizer_located(source, mode, location);
+    let lxr = lexer::lex_located(source, mode, location);
     parse_tokens(lxr, mode, source_path)
 }
 
@@ -175,14 +175,14 @@ pub fn parse_located(
 /// # Example
 ///
 /// As an example, instead of parsing a string, we can parse a list of tokens after we generate
-/// them using the [`lexer::make_tokenizer`] function:
+/// them using the [`lexer::lex`] function:
 ///
 /// ```
-/// use rustpython_parser::lexer::make_tokenizer;
+/// use rustpython_parser::lexer::lex;
 /// use rustpython_parser::mode::Mode;
 /// use rustpython_parser::parser::parse_tokens;
 ///
-/// let expr = parse_tokens(make_tokenizer("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
+/// let expr = parse_tokens(lex("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
 /// assert!(expr.is_ok());
 /// ```
 pub fn parse_tokens(
@@ -191,11 +191,11 @@ pub fn parse_tokens(
     source_path: &str,
 ) -> Result<ast::Mod, ParseError> {
     let marker_token = (Default::default(), mode.to_marker(), Default::default());
-    let tokenizer = iter::once(Ok(marker_token))
+    let lexer = iter::once(Ok(marker_token))
         .chain(lxr)
         .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
     python::TopParser::new()
-        .parse(tokenizer.into_iter())
+        .parse(lexer.into_iter())
         .map_err(|e| parse_error_from_lalrpop(e, source_path))
 }
 
diff --git a/parser/src/soft_keywords.rs b/parser/src/soft_keywords.rs
@@ -27,9 +27,9 @@ impl<I> SoftKeywordTransformer<I>
 where
     I: Iterator<Item = LexResult>,
 {
-    pub fn new(tokenizer: I, mode: Mode) -> Self {
+    pub fn new(lexer: I, mode: Mode) -> Self {
         Self {
-            underlying: tokenizer.multipeek(),
+            underlying: lexer.multipeek(),
             start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
         }
     }

Original file line number	Diff line number	Diff line change
`@@ -27,9 +27,9 @@ impl<I> SoftKeywordTransformer<I>`
`27`	`27`	`where`
`28`	`28`	`I: Iterator<Item = LexResult>,`
`29`	`29`	`{`
`30`		`- pub fn new(tokenizer: I, mode: Mode) -> Self {`
	`30`	`+ pub fn new(lexer: I, mode: Mode) -> Self {`
`31`	`31`	`Self {`
`32`		`- underlying: tokenizer.multipeek(),`
	`32`	`+ underlying: lexer.multipeek(),`
`33`	`33`	`start_of_line: matches!(mode, Mode::Interactive \| Mode::Module),`
`34`	`34`	`}`
`35`	`35`	`}`