|
| 1 | +use crate::format::{parse_number, parse_precision}; |
| 2 | +/// Implementation of Printf-Style string formatting |
| 3 | +/// [https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting] |
| 4 | +use num_bigint::{BigInt, Sign}; |
| 5 | +use num_traits::Signed; |
| 6 | +use std::cmp; |
| 7 | +use std::str::FromStr; |
| 8 | + |
| 9 | +#[derive(Debug, PartialEq)] |
| 10 | +pub enum CFormatErrorType { |
| 11 | + UnmatchedKeyParentheses, |
| 12 | + MissingModuloSign, |
| 13 | + UnescapedModuloSignInLiteral, |
| 14 | + UnsupportedFormatChar(char), |
| 15 | + IncompleteFormat, |
| 16 | + Unimplemented, |
| 17 | +} |
| 18 | + |
| 19 | +// also contains how many chars the parsing function consumed |
| 20 | +type ParsingError = (CFormatErrorType, usize); |
| 21 | + |
| 22 | +pub struct CFormatError { |
| 23 | + pub typ: CFormatErrorType, |
| 24 | + pub index: usize, |
| 25 | +} |
| 26 | + |
| 27 | +#[derive(Debug, PartialEq)] |
| 28 | +pub enum CFormatPreconversor { |
| 29 | + Repr, |
| 30 | + Str, |
| 31 | + Ascii, |
| 32 | +} |
| 33 | + |
| 34 | +#[derive(Debug, PartialEq)] |
| 35 | +pub enum CFormatCase { |
| 36 | + Lowercase, |
| 37 | + Uppercase, |
| 38 | +} |
| 39 | + |
| 40 | +#[derive(Debug, PartialEq)] |
| 41 | +pub enum CNumberType { |
| 42 | + Decimal, |
| 43 | + Octal, |
| 44 | + Hex(CFormatCase), |
| 45 | +} |
| 46 | + |
| 47 | +#[derive(Debug, PartialEq)] |
| 48 | +pub enum CFloatType { |
| 49 | + Exponent(CFormatCase), |
| 50 | + PointDecimal, |
| 51 | + General(CFormatCase), |
| 52 | +} |
| 53 | + |
| 54 | +#[derive(Debug, PartialEq)] |
| 55 | +pub enum CFormatType { |
| 56 | + Number(CNumberType), |
| 57 | + Float(CFloatType), |
| 58 | + Character, |
| 59 | + String(CFormatPreconversor), |
| 60 | +} |
| 61 | + |
| 62 | +bitflags! { |
| 63 | + pub struct CConversionFlags: u32 { |
| 64 | + const ALTERNATE_FORM = 0b0000_0001; |
| 65 | + const ZERO_PAD = 0b0000_0010; |
| 66 | + const LEFT_ADJUST = 0b0000_0100; |
| 67 | + const BLANK_SIGN = 0b0000_1000; |
| 68 | + const SIGN_CHAR = 0b0001_0000; |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +#[derive(Debug, PartialEq)] |
| 73 | +pub struct CFormatSpec { |
| 74 | + pub mapping_key: Option<String>, |
| 75 | + pub flags: CConversionFlags, |
| 76 | + pub min_field_width: Option<usize>, |
| 77 | + pub precision: Option<usize>, |
| 78 | + pub format_type: CFormatType, |
| 79 | + pub format_char: char, |
| 80 | +} |
| 81 | + |
| 82 | +#[derive(Debug, PartialEq)] |
| 83 | +pub enum CFormatPart { |
| 84 | + Literal(String), |
| 85 | + Spec(CFormatSpec), |
| 86 | +} |
| 87 | + |
| 88 | +#[derive(Debug, PartialEq)] |
| 89 | +pub struct CFormatString { |
| 90 | + pub format_parts: Vec<(usize, CFormatPart)>, |
| 91 | +} |
| 92 | + |
| 93 | +impl FromStr for CFormatString { |
| 94 | + type Err = CFormatError; |
| 95 | + |
| 96 | + fn from_str(text: &str) -> Result<Self, Self::Err> { |
| 97 | + let mut cur_text: &str = text; |
| 98 | + let mut index = 0; |
| 99 | + let mut parts: Vec<(usize, CFormatPart)> = Vec::new(); |
| 100 | + while !cur_text.is_empty() { |
| 101 | + cur_text = parse_literal(cur_text) |
| 102 | + .or_else(|_| parse_specifier(cur_text)) |
| 103 | + .map(|(format_part, new_text, consumed)| { |
| 104 | + parts.push((index, format_part)); |
| 105 | + index = index + consumed; |
| 106 | + new_text |
| 107 | + }) |
| 108 | + .map_err(|(e, consumed)| { |
| 109 | + CFormatError { |
| 110 | + typ: e, |
| 111 | + index: index + consumed, |
| 112 | + } |
| 113 | + })?; |
| 114 | + } |
| 115 | + |
| 116 | + Ok(CFormatString { |
| 117 | + format_parts: parts, |
| 118 | + }) |
| 119 | + } |
| 120 | +} |
| 121 | + |
| 122 | +fn parse_literal_single(text: &str) -> Result<(char, &str), CFormatErrorType> { |
| 123 | + let mut chars = text.chars(); |
| 124 | + // TODO get rid of the unwrap |
| 125 | + let first_char = chars.next().unwrap(); |
| 126 | + if first_char == '%' { |
| 127 | + // if we see a %, it has to be escaped |
| 128 | + match chars.next() { |
| 129 | + Some(next_char) => { |
| 130 | + if next_char != first_char { |
| 131 | + Err(CFormatErrorType::UnescapedModuloSignInLiteral) |
| 132 | + } else { |
| 133 | + Ok((first_char, chars.as_str())) |
| 134 | + } |
| 135 | + } |
| 136 | + None => Err(CFormatErrorType::IncompleteFormat), |
| 137 | + } |
| 138 | + } else { |
| 139 | + Ok((first_char, chars.as_str())) |
| 140 | + } |
| 141 | +} |
| 142 | + |
| 143 | +fn parse_literal(text: &str) -> Result<(CFormatPart, &str, usize), ParsingError> { |
| 144 | + let mut cur_text = text; |
| 145 | + let mut result_string = String::new(); |
| 146 | + let mut consumed = 0; |
| 147 | + while !cur_text.is_empty() { |
| 148 | + match parse_literal_single(cur_text) { |
| 149 | + Ok((next_char, remaining)) => { |
| 150 | + result_string.push(next_char); |
| 151 | + consumed = consumed + 1; |
| 152 | + cur_text = remaining; |
| 153 | + } |
| 154 | + Err(err) => { |
| 155 | + if !result_string.is_empty() { |
| 156 | + return Ok(( |
| 157 | + CFormatPart::Literal(result_string.to_string()), |
| 158 | + cur_text, |
| 159 | + consumed, |
| 160 | + )); |
| 161 | + } else { |
| 162 | + return Err((err, consumed)); |
| 163 | + } |
| 164 | + } |
| 165 | + } |
| 166 | + } |
| 167 | + Ok(( |
| 168 | + CFormatPart::Literal(result_string.to_string()), |
| 169 | + "", |
| 170 | + text.len(), |
| 171 | + )) |
| 172 | +} |
| 173 | + |
| 174 | +fn parse_spec_mapping_key(text: &str) -> Result<(Option<String>, &str), CFormatErrorType> { |
| 175 | + let mut chars = text.chars(); |
| 176 | + |
| 177 | + let next_char = chars.next(); |
| 178 | + if next_char == Some('(') { |
| 179 | + // Get remaining characters after opening parentheses. |
| 180 | + let cur_text = chars.as_str(); |
| 181 | + match cur_text.find(')') { |
| 182 | + Some(position) => { |
| 183 | + let (left, right) = cur_text.split_at(position); |
| 184 | + |
| 185 | + Ok((Some(left.to_string()), &right[1..])) |
| 186 | + } |
| 187 | + None => Err(CFormatErrorType::UnmatchedKeyParentheses), |
| 188 | + } |
| 189 | + } else { |
| 190 | + Ok((None, text)) |
| 191 | + } |
| 192 | +} |
| 193 | + |
| 194 | +fn parse_flag_single(text: &str) -> (Option<CConversionFlags>, &str) { |
| 195 | + let mut chars = text.chars(); |
| 196 | + match chars.next() { |
| 197 | + Some('#') => (Some(CConversionFlags::ALTERNATE_FORM), chars.as_str()), |
| 198 | + Some('0') => (Some(CConversionFlags::ZERO_PAD), chars.as_str()), |
| 199 | + Some('-') => (Some(CConversionFlags::LEFT_ADJUST), chars.as_str()), |
| 200 | + Some(' ') => (Some(CConversionFlags::BLANK_SIGN), chars.as_str()), |
| 201 | + Some('+') => (Some(CConversionFlags::SIGN_CHAR), chars.as_str()), |
| 202 | + _ => (None, text), |
| 203 | + } |
| 204 | +} |
| 205 | + |
| 206 | +fn parse_flags(text: &str) -> (CConversionFlags, &str) { |
| 207 | + let mut flags = CConversionFlags::empty(); |
| 208 | + let mut cur_text = text; |
| 209 | + while !cur_text.is_empty() { |
| 210 | + match parse_flag_single(cur_text) { |
| 211 | + (Some(flag), text) => { |
| 212 | + flags |= flag; |
| 213 | + cur_text = text; |
| 214 | + } |
| 215 | + |
| 216 | + (None, text) => { |
| 217 | + return (flags, text); |
| 218 | + } |
| 219 | + } |
| 220 | + } |
| 221 | + |
| 222 | + (flags, "") |
| 223 | +} |
| 224 | + |
| 225 | +fn consume_length(text: &str) -> &str { |
| 226 | + let mut chars = text.chars(); |
| 227 | + match chars.next() { |
| 228 | + Some('h') | Some('l') | Some('L') => chars.as_str(), |
| 229 | + _ => text, |
| 230 | + } |
| 231 | +} |
| 232 | + |
| 233 | +fn parse_format_type(text: &str) -> Result<(CFormatType, &str, char), CFormatErrorType> { |
| 234 | + use CFloatType::*; |
| 235 | + use CFormatCase::{Lowercase, Uppercase}; |
| 236 | + use CNumberType::*; |
| 237 | + let mut chars = text.chars(); |
| 238 | + let next_char = chars.next(); |
| 239 | + match next_char { |
| 240 | + Some('d') | Some('i') | Some('u') => Ok(( |
| 241 | + CFormatType::Number(Decimal), |
| 242 | + chars.as_str(), |
| 243 | + next_char.unwrap(), |
| 244 | + )), |
| 245 | + Some('o') => Ok(( |
| 246 | + CFormatType::Number(Octal), |
| 247 | + chars.as_str(), |
| 248 | + next_char.unwrap(), |
| 249 | + )), |
| 250 | + Some('x') => Ok(( |
| 251 | + CFormatType::Number(Hex(Lowercase)), |
| 252 | + chars.as_str(), |
| 253 | + next_char.unwrap(), |
| 254 | + )), |
| 255 | + Some('X') => Ok(( |
| 256 | + CFormatType::Number(Hex(Uppercase)), |
| 257 | + chars.as_str(), |
| 258 | + next_char.unwrap(), |
| 259 | + )), |
| 260 | + Some('e') => Ok(( |
| 261 | + CFormatType::Float(Exponent(Lowercase)), |
| 262 | + chars.as_str(), |
| 263 | + next_char.unwrap(), |
| 264 | + )), |
| 265 | + Some('E') => Ok(( |
| 266 | + CFormatType::Float(Exponent(Uppercase)), |
| 267 | + chars.as_str(), |
| 268 | + next_char.unwrap(), |
| 269 | + )), |
| 270 | + Some('f') => Ok(( |
| 271 | + CFormatType::Float(PointDecimal), |
| 272 | + chars.as_str(), |
| 273 | + next_char.unwrap(), |
| 274 | + )), |
| 275 | + Some('F') => Ok(( |
| 276 | + CFormatType::Float(PointDecimal), |
| 277 | + chars.as_str(), |
| 278 | + next_char.unwrap(), |
| 279 | + )), |
| 280 | + Some('g') => Ok(( |
| 281 | + CFormatType::Float(General(Lowercase)), |
| 282 | + text, |
| 283 | + next_char.unwrap(), |
| 284 | + )), |
| 285 | + Some('G') => Ok(( |
| 286 | + CFormatType::Float(General(Uppercase)), |
| 287 | + text, |
| 288 | + next_char.unwrap(), |
| 289 | + )), |
| 290 | + Some('c') => Ok((CFormatType::Character, chars.as_str(), next_char.unwrap())), |
| 291 | + Some('r') => Ok(( |
| 292 | + CFormatType::String(CFormatPreconversor::Repr), |
| 293 | + chars.as_str(), |
| 294 | + next_char.unwrap(), |
| 295 | + )), |
| 296 | + Some('s') => Ok(( |
| 297 | + CFormatType::String(CFormatPreconversor::Str), |
| 298 | + chars.as_str(), |
| 299 | + next_char.unwrap(), |
| 300 | + )), |
| 301 | + Some('a') => Ok(( |
| 302 | + CFormatType::String(CFormatPreconversor::Ascii), |
| 303 | + chars.as_str(), |
| 304 | + next_char.unwrap(), |
| 305 | + )), |
| 306 | + Some(c) => Err(CFormatErrorType::UnsupportedFormatChar(c)), |
| 307 | + None => Err(CFormatErrorType::IncompleteFormat), // should not happen because it is handled earlier in the parsing |
| 308 | + } |
| 309 | +} |
| 310 | + |
| 311 | +fn parse_specifier(text: &str) -> Result<(CFormatPart, &str, usize), ParsingError> { |
| 312 | + let consumed = 0; |
| 313 | + let mut chars = text.chars(); |
| 314 | + if chars.next() != Some('%') { |
| 315 | + return Err((CFormatErrorType::MissingModuloSign, consumed)); |
| 316 | + } |
| 317 | + let consumed = consumed + 1; |
| 318 | + |
| 319 | + let (mapping_key, after_mapping_key) = |
| 320 | + parse_spec_mapping_key(chars.as_str()).map_err(|err| (err, consumed))?; |
| 321 | + let consumed = text.find(after_mapping_key).unwrap(); |
| 322 | + let (flags, after_flags) = parse_flags(after_mapping_key); |
| 323 | + let (width, after_width) = parse_number(after_flags); |
| 324 | + let (precision, after_precision) = parse_precision(after_width); |
| 325 | + // A length modifier (h, l, or L) may be present, |
| 326 | + // but is ignored as it is not necessary for Python – so e.g. %ld is identical to %d. |
| 327 | + let after_length = consume_length(after_precision); |
| 328 | + let (format_type, remaining_text, format_char) = |
| 329 | + parse_format_type(after_length).map_err(|err| (err, consumed))?; |
| 330 | + let consumed = text.find(remaining_text).unwrap(); |
| 331 | + |
| 332 | + // apply default precision for float types |
| 333 | + let precision = match precision { |
| 334 | + Some(precision) => Some(precision), |
| 335 | + None => match format_type { |
| 336 | + CFormatType::Float(_) => Some(6), |
| 337 | + _ => None, |
| 338 | + }, |
| 339 | + }; |
| 340 | + |
| 341 | + Ok(( |
| 342 | + CFormatPart::Spec(CFormatSpec { |
| 343 | + mapping_key: mapping_key, |
| 344 | + flags: flags, |
| 345 | + min_field_width: width, |
| 346 | + precision: precision, |
| 347 | + format_type: format_type, |
| 348 | + format_char: format_char, |
| 349 | + }), |
| 350 | + remaining_text, |
| 351 | + consumed, |
| 352 | + )) |
| 353 | +} |
0 commit comments