@@ -13,6 +13,7 @@ use std::collections::HashMap;
13
13
use std:: str:: FromStr ;
14
14
use unic_emoji_char:: is_emoji_presentation;
15
15
use unicode_xid:: UnicodeXID ;
16
+ use wtf8;
16
17
17
18
#[ derive( Clone , Copy , PartialEq , Debug ) ]
18
19
struct IndentationLevel {
@@ -67,6 +68,7 @@ pub struct LexicalError {
67
68
#[ derive( Debug ) ]
68
69
pub enum LexicalErrorType {
69
70
StringError ,
71
+ UnicodeError ,
70
72
NestingError ,
71
73
UnrecognizedToken { tok : char } ,
72
74
OtherError ( String ) ,
@@ -456,6 +458,27 @@ where
456
458
}
457
459
}
458
460
461
+ fn unicode_literal ( & mut self , literal_number : usize ) -> Result < char , LexicalError > {
462
+ let mut p: u32 = 0u32 ;
463
+ let unicode_error = Err ( LexicalError {
464
+ error : LexicalErrorType :: UnicodeError ,
465
+ location : self . get_pos ( ) ,
466
+ } ) ;
467
+ for i in 1 ..=literal_number {
468
+ match self . next_char ( ) {
469
+ Some ( c) => match c. to_digit ( 16 ) {
470
+ Some ( d) => p += d << ( literal_number - i) * 4 ,
471
+ None => return unicode_error,
472
+ } ,
473
+ None => return unicode_error,
474
+ }
475
+ }
476
+ match wtf8:: CodePoint :: from_u32 ( p) {
477
+ Some ( cp) => return Ok ( cp. to_char_lossy ( ) ) ,
478
+ None => return unicode_error,
479
+ }
480
+ }
481
+
459
482
fn lex_string (
460
483
& mut self ,
461
484
is_bytes : bool ,
@@ -513,6 +536,9 @@ where
513
536
Some ( 't' ) => {
514
537
string_content. push ( '\t' ) ;
515
538
}
539
+ Some ( 'u' ) => string_content. push ( self . unicode_literal ( 4 ) ?) ,
540
+ Some ( 'U' ) => string_content. push ( self . unicode_literal ( 8 ) ?) ,
541
+ Some ( 'x' ) if !is_bytes => string_content. push ( self . unicode_literal ( 2 ) ?) ,
516
542
Some ( 'v' ) => string_content. push ( '\x0b' ) ,
517
543
Some ( c) => {
518
544
string_content. push ( '\\' ) ;
0 commit comments