Skip to content

Commit

Permalink
feat: use build script and replace String with &str
Browse files Browse the repository at this point in the history
  • Loading branch information
swanandx committed Aug 9, 2022
1 parent ec74e3e commit 5c7ad31
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 40 deletions.
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ assert_cmd = "2.0"
assert_fs = "1.0"
predicates = "2.1"

[build-dependencies]
uneval = "0.2.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
fancy-regex = "0.10"

[profile.release]
lto = "fat"
panic = "abort"
Expand Down
52 changes: 52 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use std::{env, fs, path::Path};

use fancy_regex::Regex;
use serde::Deserialize;

#[allow(dead_code)]
#[derive(Deserialize, Debug)]
struct Data {
#[serde(rename(deserialize = "Name"))]
name: String,
#[serde(rename(deserialize = "Regex"))]
regex: String,
#[serde(skip_deserializing)]
boundaryless: String,
plural_name: bool,
#[serde(rename(deserialize = "Description"))]
description: Option<&'static str>,
#[serde(rename(deserialize = "Rarity"))]
rarity: f32,
#[serde(rename(deserialize = "URL"))]
url: Option<&'static str>,
#[serde(rename(deserialize = "Tags"))]
tags: Vec<&'static str>,
}

fn main() {
let mut data: Vec<Data> = serde_json::from_str(include_str!("./src/data/regex.json")).unwrap();

data.iter_mut().for_each(|d| {
d.boundaryless = Regex::new(r"(?<!\\)\^(?![^\[\]]*(?<!\\)\])")
.expect("can't compile for boundaryless")
.replace(&d.regex, "")
.to_string();
d.boundaryless = Regex::new(r"(?<!\\)\$(?![^\[\]]*(?<!\\)\])")
.expect("can't compile for boundaryless")
.replace(&d.boundaryless, "")
.to_string();
});

let mut out_data_str = format!("{:?}", data);

// we want reference to [], i.e. &[]
out_data_str = out_data_str.replace("tags: [", "tags: &[");

let out_dir = env::var_os("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("regex_data.rs");
fs::write(
&dest_path,
format!("const DATA: [Data; {}] = {};", data.len(), out_data_str),
)
.unwrap();
}
36 changes: 10 additions & 26 deletions src/identifier/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ use regex::Regex;
use crate::Data;
use crate::Match;

static DATA: Lazy<Vec<Data>> = Lazy::new(load_data);
static REGEX_DATA: Lazy<Vec<RegexData>> = Lazy::new(build_regexes);
static BOUNDARYLESS_REGEX_DATA: Lazy<Vec<RegexData>> = Lazy::new(build_boundaryless_regexes);

// this is DATA
include!(concat!(env!("OUT_DIR"), "/regex_data.rs"));

struct RegexData {
compiled_regex: Regex,
data: Data,
Expand Down Expand Up @@ -299,24 +301,15 @@ fn read_file_to_strings(filename: &str) -> Vec<String> {
printable_text
}

fn load_data() -> Vec<Data> {
// include_str! will include the data in binary
// so we don't have to keep track of JSON file all the time after compiling the binary
const DATA: &str = include_str!("../data/regex.json");
serde_json::from_str::<Vec<Data>>(DATA).expect("Failed to parse JSON")
}

fn build_regexes() -> Vec<RegexData> {
let mut regexes: Vec<RegexData> = Vec::new();

for data in DATA.iter() {
// Some regex from pywhat's regex.json might not work with fancy_regex
// So we are just considering the ones which are valid.
let result = Regex::new(&data.regex);
let result = Regex::new(data.regex); //call .unwrap() here if you want to see which regexes fail
if let Ok(result) = result {
regexes.push(RegexData::new(result, data.to_owned()))
} else {
println!("Data: {}\n{result:#?}", data.name);
}
}
regexes
Expand All @@ -325,21 +318,12 @@ fn build_regexes() -> Vec<RegexData> {
fn build_boundaryless_regexes() -> Vec<RegexData> {
let mut regexes: Vec<RegexData> = Vec::new();

for mut data in DATA.iter().cloned() {
data.regex = Regex::new(r"(?<!\\)\^(?![^\[\]]*(?<!\\)\])")
.expect("can't compile for boundaryless")
.replace(&data.regex, "")
.to_string();
data.regex = Regex::new(r"(?<!\\)\$(?![^\[\]]*(?<!\\)\])")
.expect("can't compile for boundaryless")
.replace(&data.regex, "")
.to_string();
for data in DATA.iter() {
// Some regex from pywhat's regex.json might not work with fancy_regex
// So we are just considering the ones which are valid.
if let Ok(result) = Regex::new(&data.regex) {
regexes.push(RegexData::new(result, data))
} else {
panic!("Can't compile {data:#?}");
let result = Regex::new(data.boundaryless); //call .unwrap() here if you want to see which regexes fail
if let Ok(result) = result {
regexes.push(RegexData::new(result, data.to_owned()))
}
}
regexes
Expand All @@ -356,14 +340,14 @@ fn is_valid_filter(configs: &Identifier, regex_data: &RegexData) -> bool {
if configs
.tags
.iter()
.any(|y| !regex_data.data.tags.contains(y))
.any(|y| !regex_data.data.tags.iter().any(|x| x == y))
{
return false;
}
if configs
.exclude_tags
.iter()
.any(|y| regex_data.data.tags.contains(y))
.any(|y| regex_data.data.tags.iter().any(|x| x == y))
{
return false;
}
Expand Down
21 changes: 8 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
pub mod identifier;
pub use self::identifier::Identifier;

use serde::{Deserialize, Serialize};
use serde::Serialize;

#[cfg(not(target_arch = "wasm32"))]
#[cfg(feature = "cli")]
Expand All @@ -48,21 +48,16 @@ pub use self::output::PrintMode;

// TODO: Try not to use String
/// structure for parsing data from JSON file.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[derive(Serialize, Debug, Clone)]
pub struct Data {
#[serde(rename = "Name")]
pub name: String,
#[serde(rename = "Regex")]
pub regex: String,
pub name: &'static str,
pub regex: &'static str,
boundaryless: &'static str,
pub plural_name: bool,
#[serde(rename = "Description")]
pub description: Option<String>,
#[serde(rename = "Rarity")]
pub description: Option<&'static str>,
pub rarity: f32,
#[serde(rename = "URL")]
pub url: Option<String>,
#[serde(rename = "Tags")]
pub tags: Vec<String>,
pub url: Option<&'static str>,
pub tags: &'static [&'static str],
}

/// structure containing the text and it's possible identification.
Expand Down
2 changes: 1 addition & 1 deletion src/output/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ fn pretty_print(result: &[Match], output_format: PrintMode) {
result.iter().for_each(|item| {
let description = match (&item.data.description, &item.data.url) {
(Some(des), Some(url)) => format!("{des}\n Check URL: {url}{}", &item.text),
(Some(des), None) => des.to_owned(),
(Some(des), None) => des.to_string(),
(None, Some(url)) => format!("URL:\n {url}{}", &item.text),
(None, None) => "None".to_owned(),
};
Expand Down

0 comments on commit 5c7ad31

Please sign in to comment.