Skip to content
This repository was archived by the owner on Mar 22, 2024. It is now read-only.

Commit b4f0b05

Browse files
authored
Merge pull request #6 from qingshi163/zerowidth
fix zerowidth search
2 parents 73abbac + df8453d commit b4f0b05

File tree

3 files changed

+85
-34
lines changed

3 files changed

+85
-34
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "sre-engine"
3-
version = "0.1.1"
3+
version = "0.1.2"
44
authors = ["Kangzhi Shi <[email protected]>", "RustPython Team"]
55
description = "A low-level implementation of Python's SRE regex engine"
66
repository = "https://github.com/RustPython/sre-engine"

src/engine.rs

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ pub struct State<'a> {
2323
repeat_stack: Vec<RepeatContext>,
2424
pub string_position: usize,
2525
popped_context: Option<MatchContext>,
26-
pub has_matched: Option<bool>,
26+
pub has_matched: bool,
2727
pub match_all: bool,
28+
pub must_advance: bool,
2829
}
2930

3031
impl<'a> State<'a> {
@@ -50,8 +51,9 @@ impl<'a> State<'a> {
5051
marks: Vec::new(),
5152
string_position: start,
5253
popped_context: None,
53-
has_matched: None,
54+
has_matched: false,
5455
match_all: false,
56+
must_advance: false,
5557
}
5658
}
5759

@@ -63,7 +65,7 @@ impl<'a> State<'a> {
6365
self.marks.clear();
6466
self.string_position = self.start;
6567
self.popped_context = None;
66-
self.has_matched = None;
68+
self.has_matched = false;
6769
}
6870

6971
fn set_mark(&mut self, mark_nr: usize, position: usize) {
@@ -100,17 +102,7 @@ impl<'a> State<'a> {
100102
self.marks_stack.pop();
101103
}
102104

103-
pub fn pymatch(mut self) -> Self {
104-
let ctx = MatchContext {
105-
string_position: self.start,
106-
string_offset: self.string.offset(0, self.start),
107-
code_position: 0,
108-
has_matched: None,
109-
toplevel: true,
110-
};
111-
self.context_stack.push(ctx);
112-
113-
let mut dispatcher = OpcodeDispatcher::new();
105+
fn _match(mut self, dispatcher: &mut OpcodeDispatcher) -> Self {
114106
let mut has_matched = None;
115107

116108
loop {
@@ -127,21 +119,58 @@ impl<'a> State<'a> {
127119
}
128120
}
129121

130-
self.has_matched = has_matched;
122+
self.has_matched = has_matched == Some(true);
131123
self
132124
}
133125

126+
pub fn pymatch(mut self) -> Self {
127+
let ctx = MatchContext {
128+
string_position: self.start,
129+
string_offset: self.string.offset(0, self.start),
130+
code_position: 0,
131+
has_matched: None,
132+
toplevel: true,
133+
};
134+
self.context_stack.push(ctx);
135+
136+
let mut dispatcher = OpcodeDispatcher::new();
137+
138+
self._match(&mut dispatcher)
139+
}
140+
134141
pub fn search(mut self) -> Self {
135142
// TODO: optimize by op info and skip prefix
136-
while self.start <= self.end {
137-
self.match_all = false;
138-
self = self.pymatch();
139143

140-
if self.has_matched == Some(true) {
141-
return self;
142-
}
144+
if self.start > self.end {
145+
return self;
146+
}
147+
148+
let mut dispatcher = OpcodeDispatcher::new();
149+
150+
let ctx = MatchContext {
151+
string_position: self.start,
152+
string_offset: self.string.offset(0, self.start),
153+
code_position: 0,
154+
has_matched: None,
155+
toplevel: true,
156+
};
157+
self.context_stack.push(ctx);
158+
self = self._match(&mut dispatcher);
159+
160+
self.must_advance = false;
161+
while !self.has_matched && self.start < self.end {
143162
self.start += 1;
144163
self.reset();
164+
dispatcher.clear();
165+
let ctx = MatchContext {
166+
string_position: self.start,
167+
string_offset: self.string.offset(0, self.start),
168+
code_position: 0,
169+
has_matched: None,
170+
toplevel: false,
171+
};
172+
self.context_stack.push(ctx);
173+
self = self._match(&mut dispatcher);
145174
}
146175

147176
self
@@ -310,6 +339,18 @@ trait MatchContextDrive {
310339
.string
311340
.back_offset(self.ctx().string_offset, skip_count);
312341
}
342+
fn can_success(&self) -> bool {
343+
if !self.ctx().toplevel {
344+
return true;
345+
}
346+
if self.state().match_all && !self.at_end() {
347+
return false;
348+
}
349+
if self.state().must_advance && self.ctx().string_position == self.state().start {
350+
return false;
351+
}
352+
true
353+
}
313354
}
314355

315356
struct StackDrive<'a> {
@@ -429,6 +470,9 @@ impl OpcodeDispatcher {
429470
executing_contexts: HashMap::new(),
430471
}
431472
}
473+
fn clear(&mut self) {
474+
self.executing_contexts.clear();
475+
}
432476
// Returns True if the current context matches, False if it doesn't and
433477
// None if matching is not finished, ie must be resumed after child
434478
// contexts have been matched.
@@ -470,11 +514,9 @@ impl OpcodeDispatcher {
470514
drive.ctx_mut().has_matched = Some(false);
471515
}),
472516
SreOpcode::SUCCESS => once(|drive| {
473-
if drive.ctx().toplevel && drive.state.match_all && !drive.at_end() {
474-
drive.ctx_mut().has_matched = Some(false);
475-
} else {
517+
drive.ctx_mut().has_matched = Some(drive.can_success());
518+
if drive.ctx().has_matched == Some(true) {
476519
drive.state.string_position = drive.ctx().string_position;
477-
drive.ctx_mut().has_matched = Some(true);
478520
}
479521
}),
480522
SreOpcode::ANY => once(|drive| {
@@ -1152,9 +1194,7 @@ impl OpcodeExecutor for OpMinRepeatOne {
11521194
};
11531195

11541196
let next_code = drive.peek_code(drive.peek_code(1) as usize + 1);
1155-
if next_code == SreOpcode::SUCCESS as u32
1156-
&& !(drive.ctx().toplevel && drive.state.match_all && !drive.at_end())
1157-
{
1197+
if next_code == SreOpcode::SUCCESS as u32 && drive.can_success() {
11581198
// tail is empty. we're finished
11591199
drive.state.string_position = drive.ctx().string_position;
11601200
drive.ctx_mut().has_matched = Some(true);
@@ -1455,8 +1495,7 @@ impl OpcodeExecutor for OpRepeatOne {
14551495
}
14561496

14571497
let next_code = drive.peek_code(drive.peek_code(1) as usize + 1);
1458-
if next_code == SreOpcode::SUCCESS as u32 && drive.at_end() && !drive.ctx().toplevel
1459-
{
1498+
if next_code == SreOpcode::SUCCESS as u32 && drive.can_success() {
14601499
// tail is empty. we're finished
14611500
drive.state.string_position = drive.ctx().string_position;
14621501
drive.ctx_mut().has_matched = Some(true);

tests/tests.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ fn test_2427() {
2424
// END GENERATED
2525
let mut state = lookbehind.state("x", 0..usize::MAX);
2626
state = state.pymatch();
27-
assert!(state.has_matched == Some(true));
27+
assert!(state.has_matched);
2828
}
2929

3030
#[test]
@@ -35,7 +35,7 @@ fn test_assert() {
3535
// END GENERATED
3636
let mut state = positive_lookbehind.state("abcdef", 0..usize::MAX);
3737
state = state.search();
38-
assert!(state.has_matched == Some(true));
38+
assert!(state.has_matched);
3939
}
4040

4141
#[test]
@@ -46,5 +46,17 @@ fn test_string_boundaries() {
4646
// END GENERATED
4747
let mut state = big_b.state("", 0..usize::MAX);
4848
state = state.search();
49-
assert!(state.has_matched == None)
49+
assert!(!state.has_matched);
50+
}
51+
52+
#[test]
53+
fn test_zerowidth() {
54+
// pattern p = re.compile(r'\b|:+')
55+
// START GENERATED by generate_tests.py
56+
#[rustfmt::skip] let p = Pattern { code: &[15, 4, 0, 0, 4294967295, 7, 5, 6, 10, 16, 12, 10, 25, 6, 1, 4294967295, 17, 58, 1, 16, 2, 0, 1], flags: SreFlag::from_bits_truncate(32) };
57+
// END GENERATED
58+
let mut state = p.state("a:", 0..usize::MAX);
59+
state.must_advance = true;
60+
state = state.search();
61+
assert!(state.string_position == 1);
5062
}

0 commit comments

Comments
 (0)