Skip to content

Commit

Permalink
test byte matching in regexp
Browse files Browse the repository at this point in the history
  • Loading branch information
Geal committed Mar 14, 2016
1 parent 82c5c7b commit 41f87f8
Show file tree
Hide file tree
Showing 2 changed files with 318 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ regexp_macros = ["regexp", "lazy_static"]
stream = []

[dependencies.regex]
version = "^0.1.41"
version = "^0.1.56"
optional = true

[dependencies.lazy_static]
Expand Down
318 changes: 317 additions & 1 deletion src/regexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ macro_rules! regex (
);
);

#[doc(hidden)]
#[macro_export]
macro_rules! regex_bytes (
($re: ident, $s:expr) => (
lazy_static! {
static ref $re: ::regex::bytes::Regex = ::regex::bytes::Regex::new($s).unwrap();
}
);
);


/// `re_match!(regexp) => &[T] -> IResult<&[T], &[T]>`
/// Returns the whole input if a match is found
Expand Down Expand Up @@ -45,7 +55,45 @@ macro_rules! re_match_static (
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpMatch))
}
}
)
);

/// `re_bytes_match!(regexp) => &[T] -> IResult<&[T], &[T]>`
/// Returns the whole input if a match is found
///
/// requires the `regexp` feature
#[macro_export]
macro_rules! re_bytes_match (
($i:expr, $re:expr) => (
{
use $crate::InputLength;
let re = ::regex::bytes::Regex::new($re).unwrap();
if re.is_match($i) {
$crate::IResult::Done(&$i[$i.input_len()..], $i)
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpMatch))
}
}
)
);

#[cfg(feature = "regexp_macros")]
/// `re_bytes_match_static!(regexp) => &[T] -> IResult<&[T], &[T]>`
/// Returns the whole input if a match is found. Regular expression calculated at compile time
///
/// requires the `regexp_macros` feature
#[macro_export]
macro_rules! re_bytes_match_static (
($i:expr, $re:expr) => (
{
use $crate::InputLength;
regex_bytes!(RE, $re);
if RE.is_match($i) {
$crate::IResult::Done(&$i[$i.input_len()..], $i)
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpMatch))
}
}
)
);

Expand All @@ -68,7 +116,7 @@ macro_rules! re_find (
);

#[cfg(feature = "regexp_macros")]
/// `re_find!(regexp) => &[T] -> IResult<&[T], &[T]>`
/// `re_find_static!(regexp) => &[T] -> IResult<&[T], &[T]>`
/// Returns the first match. Regular expression calculated at compile time
///
/// requires the `regexp_macros` feature
Expand All @@ -87,6 +135,44 @@ macro_rules! re_find_static (
)
);

/// `re_bytes_find!(regexp) => &[T] -> IResult<&[T], &[T]>`
/// Returns the first match
///
/// requires the `regexp` feature
#[macro_export]
macro_rules! re_bytes_find (
($i:expr, $re:expr) => (
{
let re = ::regex::bytes::Regex::new($re).unwrap();
if let Some((begin, end)) = re.find($i) {
$crate::IResult::Done(&$i[end..], &$i[begin..end])
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpFind))
}
}
)
);

#[cfg(feature = "regexp_macros")]
/// `re_bytes_find!(regexp) => &[T] -> IResult<&[T], &[T]>`
/// Returns the first match. Regular expression calculated at compile time
///
/// requires the `regexp_macros` feature
#[macro_export]
macro_rules! re_bytes_find_static (
($i:expr, $re:expr) => (
{
regex_bytes!(RE, $re);
if let Some((begin, end)) = RE.find($i) {
$crate::IResult::Done(&$i[end..], &$i[begin..end])
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpFind))
}
}

)
);

/// `re_matches!(regexp) => &[T] -> IResult<&[T], Vec<&[T]>>`
/// Returns all the matched parts
///
Expand Down Expand Up @@ -134,6 +220,53 @@ macro_rules! re_matches_static (
)
);

/// `re_bytes_matches!(regexp) => &[T] -> IResult<&[T], Vec<&[T]>>`
/// Returns all the matched parts
///
/// requires the `regexp` feature
#[macro_export]
macro_rules! re_bytes_matches (
($i:expr, $re:expr) => (
{
let re = ::regex::bytes::Regex::new($re).unwrap();
let v: Vec<&[u8]> = re.find_iter($i).map(|(begin,end)| &$i[begin..end]).collect();
if v.len() != 0 {
let offset = {
let end = v.last().unwrap();
end.as_ptr() as usize + end.len() - $i.as_ptr() as usize
};
$crate::IResult::Done(&$i[offset..], v)
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpMatches))
}
}
)
);

#[cfg(feature = "regexp_macros")]
/// `re_bytes_matches_static!(regexp) => &[T] -> IResult<&[T], Vec<&[T]>>`
/// Returns all the matched parts. Regular expression calculated at compile time
///
/// requires the `regexp_macros` feature
#[macro_export]
macro_rules! re_bytes_matches_static (
($i:expr, $re:expr) => (
{
regex_bytes!(RE, $re);
let v: Vec<&[u8]> = RE.find_iter($i).map(|(begin,end)| &$i[begin..end]).collect();
if v.len() != 0 {
let offset = {
let end = v.last().unwrap();
end.as_ptr() as usize + end.len() - $i.as_ptr() as usize
};
$crate::IResult::Done(&$i[offset..], v)
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpMatches))
}
}
)
);

/// `re_capture!(regexp) => &[T] -> IResult<&[T], Vec<&[T]>>`
/// Returns the first capture group
///
Expand Down Expand Up @@ -181,6 +314,53 @@ macro_rules! re_capture_static (
)
);

/// `re_bytes_capture!(regexp) => &[T] -> IResult<&[T], Vec<&[T]>>`
/// Returns the first capture group
///
/// requires the `regexp` feature
#[macro_export]
macro_rules! re_bytes_capture (
($i:expr, $re:expr) => (
{
let re = ::regex::bytes::Regex::new($re).unwrap();
if let Some(c) = re.captures($i) {
let v:Vec<&[u8]> = c.iter_pos().filter(|el| el.is_some()).map(|el| el.unwrap()).map(|(begin,end)| &$i[begin..end]).collect();
let offset = {
let end = v.last().unwrap();
end.as_ptr() as usize + end.len() - $i.as_ptr() as usize
};
$crate::IResult::Done(&$i[offset..], v)
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpCapture))
}
}
)
);

#[cfg(feature = "regexp_macros")]
/// `re_bytes_capture_static!(regexp) => &[T] -> IResult<&[T], Vec<&[T]>>`
/// Returns the first capture group. Regular expression calculated at compile time
///
/// requires the `regexp_macros` feature
#[macro_export]
macro_rules! re_bytes_capture_static (
($i:expr, $re:expr) => (
{
regex_bytes!(RE, $re);
if let Some(c) = RE.captures($i) {
let v:Vec<&[u8]> = c.iter_pos().filter(|el| el.is_some()).map(|el| el.unwrap()).map(|(begin,end)| &$i[begin..end]).collect();
let offset = {
let end = v.last().unwrap();
end.as_ptr() as usize + end.len() - $i.as_ptr() as usize
};
$crate::IResult::Done(&$i[offset..], v)
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpCapture))
}
}
)
);

/// `re_captures!(regexp) => &[T] -> IResult<&[T], Vec<Vec<&[T]>>>`
/// Returns all the capture groups
///
Expand Down Expand Up @@ -228,6 +408,52 @@ macro_rules! re_captures_static (
)
);

/// `re_bytes_captures!(regexp) => &[T] -> IResult<&[T], Vec<Vec<&[T]>>>`
/// Returns all the capture groups
///
/// requires the `regexp` feature
#[macro_export]
macro_rules! re_bytes_captures (
($i:expr, $re:expr) => (
{
let re = ::regex::bytes::Regex::new($re).unwrap();
let v:Vec<Vec<&[u8]>> = re.captures_iter($i).map(|c| c.iter_pos().filter(|el| el.is_some()).map(|el| el.unwrap()).map(|(begin,end)| &$i[begin..end]).collect()).collect();
if v.len() != 0 {
let offset = {
let end = v.last().unwrap().last().unwrap();
end.as_ptr() as usize + end.len() - $i.as_ptr() as usize
};
$crate::IResult::Done(&$i[offset..], v)
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpCapture))
}
}
)
);

#[cfg(feature = "regexp_macros")]
/// `re_bytes_captures_static!(regexp) => &[T] -> IResult<&[T], Vec<Vec<&[T]>>>`
/// Returns all the capture groups. Regular expression calculated at compile time
///
/// requires the `regexp_macros` feature
#[macro_export]
macro_rules! re_bytes_captures_static (
($i:expr, $re:expr) => (
{
regex_bytes!(RE, $re);
let v:Vec<Vec<&[u8]>> = RE.captures_iter($i).map(|c| c.iter_pos().filter(|el| el.is_some()).map(|el| el.unwrap()).map(|(begin,end)| &$i[begin..end]).collect()).collect();
if v.len() != 0 {
let offset = {
let end = v.last().unwrap().last().unwrap();
end.as_ptr() as usize + end.len() - $i.as_ptr() as usize
};
$crate::IResult::Done(&$i[offset..], v)
} else {
$crate::IResult::Error($crate::Err::Code($crate::ErrorKind::RegexpCapture))
}
}
)
);
#[cfg(test)]
mod tests {
use internal::IResult::*;
Expand Down Expand Up @@ -325,4 +551,94 @@ mod tests {
]));
}

#[test]
fn re_bytes_match() {
named!(rm, re_bytes_match!(r"^\d{4}-\d{2}-\d{2}"));
assert_eq!(rm(&b"2015-09-07"[..]), Done(&b""[..], &b"2015-09-07"[..]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpMatch)));
assert_eq!(rm(&b"2015-09-07blah"[..]), Done(&b""[..], &b"2015-09-07blah"[..]));
}

#[cfg(feature = "regexp_macros")]
#[test]
fn re_bytes_match_static() {
named!(rm, re_bytes_match_static!(r"^\d{4}-\d{2}-\d{2}"));
assert_eq!(rm(&b"2015-09-07"[..]), Done(&b""[..], &b"2015-09-07"[..]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpMatch)));
assert_eq!(rm(&b"2015-09-07blah"[..]), Done(&b""[..], &b"2015-09-07blah"[..]));
}

#[test]
fn re_bytes_find() {
named!(rm, re_bytes_find!(r"^\d{4}-\d{2}-\d{2}"));
assert_eq!(rm(&b"2015-09-07"[..]), Done(&b""[..], &b"2015-09-07"[..]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpFind)));
assert_eq!(rm(&b"2015-09-07blah"[..]), Done(&b"blah"[..], &b"2015-09-07"[..]));
}

#[cfg(feature = "regexp_macros")]
#[test]
fn re_bytes_find_static() {
named!(rm, re_bytes_find_static!(r"^\d{4}-\d{2}-\d{2}"));
assert_eq!(rm(&b"2015-09-07"[..]), Done(&b""[..], &b"2015-09-07"[..]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpFind)));
assert_eq!(rm(&b"2015-09-07blah"[..]), Done(&b"blah"[..], &b"2015-09-07"[..]));
}

#[test]
fn re_bytes_matches() {
named!(rm<Vec<&[u8]> >, re_bytes_matches!(r"\d{4}-\d{2}-\d{2}"));
assert_eq!(rm(&b"2015-09-07"[..]), Done(&b""[..], vec![&b"2015-09-07"[..]]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpMatches)));
assert_eq!(rm(&b"aaa2015-09-07blah2015-09-09pouet"[..]), Done(&b"pouet"[..], vec![&b"2015-09-07"[..], &b"2015-09-09"[..]]));
}

#[cfg(feature = "regexp_macros")]
#[test]
fn re_bytes_matches_static() {
named!(rm<Vec<&[u8]> >, re_bytes_matches_static!(r"\d{4}-\d{2}-\d{2}"));
assert_eq!(rm(&b"2015-09-07"[..]), Done(&b""[..], vec![&b"2015-09-07"[..]]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpMatches)));
assert_eq!(rm(&b"aaa2015-09-07blah2015-09-09pouet"[..]), Done(&b"pouet"[..], vec![&b"2015-09-07"[..], &b"2015-09-09"[..]]));
}

#[test]
fn re_bytes_capture() {
named!(rm<Vec<&[u8]> >, re_bytes_capture!(r"([:alpha:]+)\s+((\d+).(\d+).(\d+))"));
assert_eq!(rm(&b"blah nom 0.3.11pouet"[..]), Done(&b"pouet"[..], vec![&b"nom 0.3.11"[..], &b"nom"[..], &b"0.3.11"[..], &b"0"[..], &b"3"[..], &b"11"[..]]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpCapture)));
assert_eq!(rm(&b"hello nom 0.3.11 world regex 0.1.41"[..]), Done(&b" world regex 0.1.41"[..], vec![&b"nom 0.3.11"[..], &b"nom"[..], &b"0.3.11"[..], &b"0"[..], &b"3"[..], &b"11"[..]]));
}

#[cfg(feature = "regexp_macros")]
#[test]
fn re_bytes_capture_static() {
named!(rm< Vec<&[u8]> >, re_bytes_capture_static!(r"([:alpha:]+)\s+((\d+).(\d+).(\d+))"));
assert_eq!(rm(&b"blah nom 0.3.11pouet"[..]), Done(&b"pouet"[..], vec![&b"nom 0.3.11"[..], &b"nom"[..], &b"0.3.11"[..], &b"0"[..], &b"3"[..], &b"11"[..]]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpCapture)));
assert_eq!(rm(&b"hello nom 0.3.11 world regex 0.1.41"[..]), Done(&b" world regex 0.1.41"[..], vec![&b"nom 0.3.11"[..], &b"nom"[..], &b"0.3.11"[..], &b"0"[..], &b"3"[..], &b"11"[..]]));
}

#[test]
fn re_bytes_captures() {
named!(rm< Vec<Vec<&[u8]>> >, re_bytes_captures!(r"([:alpha:]+)\s+((\d+).(\d+).(\d+))"));
assert_eq!(rm(&b"blah nom 0.3.11pouet"[..]), Done(&b"pouet"[..], vec![vec![&b"nom 0.3.11"[..], &b"nom"[..], &b"0.3.11"[..], &b"0"[..], &b"3"[..], &b"11"[..]]]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpCapture)));
assert_eq!(rm(&b"hello nom 0.3.11 world regex 0.1.41 aaa"[..]), Done(&b" aaa"[..], vec![
vec![&b"nom 0.3.11"[..], &b"nom"[..], &b"0.3.11"[..], &b"0"[..], &b"3"[..], &b"11"[..]],
vec![&b"regex 0.1.41"[..], &b"regex"[..], &b"0.1.41"[..], &b"0"[..], &b"1"[..], &b"41"[..]],
]));
}

#[cfg(feature = "regexp_macros")]
#[test]
fn re_bytes_captures_static() {
named!(rm< Vec<Vec<&[u8]>> >, re_bytes_captures_static!(r"([:alpha:]+)\s+((\d+).(\d+).(\d+))"));
assert_eq!(rm(&b"blah nom 0.3.11pouet"[..]), Done(&b"pouet"[..], vec![vec![&b"nom 0.3.11"[..], &b"nom"[..], &b"0.3.11"[..], &b"0"[..], &b"3"[..], &b"11"[..]]]));
assert_eq!(rm(&b"blah"[..]), Error(Code(ErrorKind::RegexpCapture)));
assert_eq!(rm(&b"hello nom 0.3.11 world regex 0.1.41 aaa"[..]), Done(&b" aaa"[..], vec![
vec![&b"nom 0.3.11"[..], &b"nom"[..], &b"0.3.11"[..], &b"0"[..], &b"3"[..], &b"11"[..]],
vec![&b"regex 0.1.41"[..], &b"regex"[..], &b"0.1.41"[..], &b"0"[..], &b"1"[..], &b"41"[..]],
]));
}
}

0 comments on commit 41f87f8

Please sign in to comment.