Skip to content

Commit

Permalink
format generated source code; semi&comp modifier
Browse files Browse the repository at this point in the history
  • Loading branch information
ShawnJian committed Aug 30, 2023
1 parent a4c93a7 commit a1f19f4
Show file tree
Hide file tree
Showing 11 changed files with 152 additions and 58 deletions.
4 changes: 3 additions & 1 deletion bootstrap/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,7 @@ license = "MIT/Apache-2.0"
rust-version = "1.56"

[dependencies]
pest_generator = "2.1.1" # Use the crates-io version, which (should be) known-good
#pest_generator = "2.1.1" # Use the crates-io version, which (should be) known-good
pest_generator = { path = "../generator" }
quote = "1.0"
rust-format = "0.3.4"
5 changes: 3 additions & 2 deletions bootstrap/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ extern crate pest_generator;

use pest_generator::derive_parser;
use std::{fs::File, io::prelude::*, path::Path};
use rust_format::{Formatter, RustFmt};

fn main() {
let pest = Path::new(concat!(
Expand All @@ -25,6 +26,6 @@ fn main() {
};

let mut file = File::create(rs).unwrap();

writeln!(file, "pub struct PestParser;\n{}", derived,).unwrap();
let formatted = RustFmt::default().format_str(derived.to_string(),).unwrap();
writeln!(file, "pub struct PestParser;\n{}", formatted,).unwrap();
}
41 changes: 29 additions & 12 deletions generator/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream {

fn generate_rule(rule: OptimizedRule) -> TokenStream {
let name = format_ident!("r#{}", rule.name);
let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic {
let expr = if rule.ty.has(RuleType::Atomic) || rule.ty.has(RuleType::CompoundAtomic) {
generate_expr_atomic(rule.expr)
} else if rule.name == "WHITESPACE" || rule.name == "COMMENT" {
let atomic = generate_expr_atomic(rule.expr);
Expand All @@ -276,24 +276,26 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream {

let box_ty = box_type();

match rule.ty {
RuleType::Normal => quote! {
if rule.ty.has(RuleType::Normal) {
return quote! {
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
state.rule(Rule::#name, |state| {
#expr
})
}
},
RuleType::Silent => quote! {
}
} else if rule.ty.has(RuleType::Silent) {
return quote! {
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
#expr
}
},
RuleType::Atomic => quote! {
}
} else if rule.ty.has(RuleType::Atomic) {
return quote! {
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
Expand All @@ -303,8 +305,9 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream {
})
})
}
},
RuleType::CompoundAtomic => quote! {
}
} else if rule.ty.has(RuleType::CompoundAtomic) {
return quote! {
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
Expand All @@ -314,8 +317,21 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream {
})
})
}
},
RuleType::NonAtomic => quote! {
}
} else if rule.ty.has(RuleType::SemiAtomic) {
return quote! {
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
state.atomic(::pest::Atomicity::SemiAtomic, |state| {
state.rule(Rule::#name, |state| {
#expr
})
})
}
}
} else if rule.ty.has(RuleType::NonAtomic) {
return quote! {
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> {
Expand All @@ -325,8 +341,9 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream {
})
})
}
},
}
}
return expr;
}

fn generate_skip(rules: &[OptimizedRule]) -> TokenStream {
Expand Down
1 change: 1 addition & 0 deletions meta/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ rust-version = "1.56"
[dependencies]
pest = { path = "../pest", version = "2.6.0" }
once_cell = "1.8.0"
num_enum = "0.5.1"

[build-dependencies]
sha2 = { version = "0.10", default-features = false }
45 changes: 39 additions & 6 deletions meta/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@

//! Types for the pest's abstract syntax tree.
use std::ops;
use std::convert::TryFrom;
use num_enum::TryFromPrimitive;

/// A grammar rule
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Rule {
Expand All @@ -21,31 +25,60 @@ pub struct Rule {
}

/// All possible rule types
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[derive(Clone, Copy, Debug, Eq, PartialEq, TryFromPrimitive)]
#[repr(u32)]
pub enum RuleType {
/// The normal rule type
Normal,
Normal = 0b00000,
/// Silent rules are just like normal rules
/// — when run, they function the same way —
/// except they do not produce pairs or tokens.
/// If a rule is silent, it will never appear in a parse result.
/// (their syntax is `_{ ... }`)
Silent,
Silent = 0b00001,
/// atomic rule prevent implicit whitespace: inside an atomic rule,
/// the tilde ~ means "immediately followed by",
/// and repetition operators (asterisk * and plus sign +)
/// have no implicit separation. In addition, all other rules
/// called from an atomic rule are also treated as atomic.
/// In an atomic rule, interior matching rules are silent.
/// (their syntax is `@{ ... }`)
Atomic,
Atomic = 0b00010,
/// Compound atomic rules are similar to atomic rules,
/// but they produce inner tokens as normal.
/// (their syntax is `${ ... }`)
CompoundAtomic,
CompoundAtomic = 0b00100,
/// Non-atomic rules cancel the effect of atomic rules.
/// (their syntax is `!{ ... }`)
NonAtomic,
SemiAtomic = 0b01000,
/// Separator rules are similar to normal rules
/// — when run, they function the same way —
/// except they produce SEPARATOR tokens,
/// even these tokens already specified in WHITESPACE.
/// (their syntax is `#{ ... }`)
NonAtomic = 0b10000,
}

impl RuleType {
/// Check if the rule type is enabled
pub fn has(self, ty :RuleType) -> bool {
return self & ty != RuleType::Normal || (self == RuleType::Normal && ty == RuleType::Normal)
}
}

impl ops::BitAnd<RuleType> for RuleType {
type Output = Self;
fn bitand(self, _rhs: RuleType) -> RuleType {
let result = RuleType::try_from(self as u32 & _rhs as u32);
result.unwrap()
}
}

impl ops::BitOrAssign<RuleType> for RuleType {
fn bitor_assign(&mut self, _rhs: RuleType) {
let result = RuleType::try_from(*self as u32 & _rhs as u32);
*self = result.unwrap()
}
}

/// All possible rule expressions
Expand Down
12 changes: 8 additions & 4 deletions meta/src/grammar.pest
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,24 @@ modifier = _{
| atomic_modifier
| compound_atomic_modifier
| non_atomic_modifier
| semi_atomic_modifier
}

/// Silent rule prefix.
/// Silent rule prefix. - explicit WHITESPACE and COMMENT, no self name
silent_modifier = { "_" }

/// Atomic rule prefix.
/// Atomic rule prefix. - explicit WHITESPACE and COMMENT, no children's names
atomic_modifier = { "@" }

/// Compound atomic rule prefix.
/// Compound atomic rule prefix. - explicit WHITESPACE and COMMENT, have all names
compound_atomic_modifier = { "$" }

/// Non-atomic rule prefix.
/// Non-atomic rule prefix. - children back to normal.
non_atomic_modifier = { "!" }

/// Compound-separating rule prefix. - explicit SEPARATOR
semi_atomic_modifier = { "#" }

/// A tag label.
tag_id = @{ "#" ~ ("_" | alpha) ~ ("_" | alpha_num)* }

Expand Down
2 changes: 1 addition & 1 deletion meta/src/optimizer/concatenator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub fn concatenate(rule: Rule) -> Rule {
name,
ty,
expr: expr.map_bottom_up(|expr| {
if ty == RuleType::Atomic {
if rule.ty.has(RuleType::Atomic) {
match expr {
Expr::Seq(lhs, rhs) => match (*lhs, *rhs) {
(Expr::Str(lhs), Expr::Str(rhs)) => Expr::Str(lhs + &rhs),
Expand Down
2 changes: 1 addition & 1 deletion meta/src/optimizer/skipper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub fn skip(rule: Rule) -> Rule {
Rule {
name,
ty,
expr: if ty == RuleType::Atomic {
expr: if ty.has(RuleType::Atomic) {
expr.map_top_down(|expr| {
if let Expr::Rep(expr) = expr.clone() {
if let Expr::Seq(lhs, rhs) = *expr {
Expand Down
14 changes: 8 additions & 6 deletions meta/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ pub fn rename_meta_rule(rule: &Rule) -> String {
Rule::atomic_modifier => "`@`".to_owned(),
Rule::compound_atomic_modifier => "`$`".to_owned(),
Rule::non_atomic_modifier => "`!`".to_owned(),
Rule::semi_atomic_modifier => "`#`".to_owned(),
Rule::opening_brace => "`{`".to_owned(),
Rule::closing_brace => "`}`".to_owned(),
Rule::opening_brack => "`[`".to_owned(),
Expand Down Expand Up @@ -270,17 +271,18 @@ fn consume_rules_with_spans(

pairs.next().unwrap(); // assignment_operator

let ty = if pairs.peek().unwrap().as_rule() != Rule::opening_brace {
match pairs.next().unwrap().as_rule() {
let mut ty = RuleType::Normal;
while pairs.peek().unwrap().as_rule() != Rule::opening_brace {
let t = match pairs.next().unwrap().as_rule() {
Rule::silent_modifier => RuleType::Silent,
Rule::atomic_modifier => RuleType::Atomic,
Rule::compound_atomic_modifier => RuleType::CompoundAtomic,
Rule::non_atomic_modifier => RuleType::NonAtomic,
Rule::semi_atomic_modifier => RuleType::SemiAtomic,
_ => unreachable!(),
}
} else {
RuleType::Normal
};
};
ty |= t;
}

pairs.next().unwrap(); // opening_brace

Expand Down
2 changes: 2 additions & 0 deletions pest/src/parser_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ pub enum Atomicity {
Atomic,
/// The same as atomic, but inner tokens are produced as normal.
CompoundAtomic,
/// implicit separator is enabled
SemiAtomic,
/// implicit whitespace is enabled
NonAtomic,
}
Expand Down
82 changes: 57 additions & 25 deletions vm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,47 +127,79 @@ impl Vm {

if let Some(rule) = self.rules.get(rule) {
if rule.name == "WHITESPACE" || rule.name == "COMMENT" {
match rule.ty {
RuleType::Normal => state.rule(&rule.name, |state| {
if rule.ty.has(RuleType::Normal) {
return state.rule(&rule.name, |state| {
state.atomic(Atomicity::Atomic, |state| {
self.parse_expr(&rule.expr, state)
})
}),
RuleType::Silent => state.atomic(Atomicity::Atomic, |state| {
self.parse_expr(&rule.expr, state)
}),
RuleType::Atomic => state.rule(&rule.name, |state| {
});
} else if rule.ty.has(RuleType::Silent) {
if rule.ty.has(RuleType::CompoundAtomic) {
return state.atomic(Atomicity::CompoundAtomic, |state| {
self.parse_expr(&rule.expr, state)
})
}
if rule.ty.has(RuleType::SemiAtomic) {
return state.atomic(Atomicity::SemiAtomic, |state| {
self.parse_expr(&rule.expr, state)
})
}
} else if rule.ty.has(RuleType::Atomic) {
return state.rule(&rule.name, |state| {
state.atomic(Atomicity::Atomic, |state| {
self.parse_expr(&rule.expr, state)
})
}),
RuleType::CompoundAtomic => state.atomic(Atomicity::CompoundAtomic, |state| {
})
} else if rule.ty.has(RuleType::CompoundAtomic) {
return state.atomic(Atomicity::CompoundAtomic, |state| {
state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
})
} else if rule.ty.has(RuleType::SemiAtomic) {
return state.atomic(Atomicity::SemiAtomic, |state| {
state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
}),
RuleType::NonAtomic => state.atomic(Atomicity::Atomic, |state| {
})
} else if rule.ty.has(RuleType::NonAtomic) {
return state.atomic(Atomicity::Atomic, |state| {
state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
}),
})
}
return state.atomic(Atomicity::Atomic, |state| {
self.parse_expr(&rule.expr, state)
});
} else {
match rule.ty {
RuleType::Normal => {
state.rule(&rule.name, move |state| self.parse_expr(&rule.expr, state))
}
RuleType::Silent => self.parse_expr(&rule.expr, state),
RuleType::Atomic => state.rule(&rule.name, move |state| {
state.atomic(Atomicity::Atomic, move |state| {
if rule.ty.has(RuleType::Normal) {
return state.rule(&rule.name, move |state| self.parse_expr(&rule.expr, state));
} else if rule.ty.has(RuleType::Silent) {
if rule.ty.has(RuleType::CompoundAtomic) {
return state.atomic(Atomicity::CompoundAtomic, |state| {
self.parse_expr(&rule.expr, state)
})
}),
RuleType::CompoundAtomic => {
state.atomic(Atomicity::CompoundAtomic, move |state| {
state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
}
if rule.ty.has(RuleType::SemiAtomic) {
return state.atomic(Atomicity::SemiAtomic, |state| {
self.parse_expr(&rule.expr, state)
})
}
RuleType::NonAtomic => state.atomic(Atomicity::NonAtomic, move |state| {
} else if rule.ty.has(RuleType::Atomic) {
return state.rule(&rule.name, move |state| {
state.atomic(Atomicity::Atomic, move |state| {
self.parse_expr(&rule.expr, state)
})
})
} else if rule.ty.has(RuleType::CompoundAtomic) {
return state.atomic(Atomicity::CompoundAtomic, move |state| {
state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
})
} else if rule.ty.has(RuleType::SemiAtomic) {
return state.atomic(Atomicity::SemiAtomic, |state| {
state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
}),
})
} else if rule.ty.has(RuleType::NonAtomic) {
return state.atomic(Atomicity::NonAtomic, move |state| {
state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
})
}
return self.parse_expr(&rule.expr, state);
}
} else {
if let Some(property) = unicode::by_name(rule) {
Expand Down

0 comments on commit a1f19f4

Please sign in to comment.