Merge pull request RustPython#2316 from ChJR/feature/format_float

coolreader18 · web-flow · commit f48e547981e4 · 2020-11-21T17:40:46.000-06:00
Unify float formatting
diff --git a/common/src/float_ops.rs b/common/src/float_ops.rs
@@ -83,6 +83,96 @@ pub fn is_integer(v: f64) -> bool {
     (v - v.round()).abs() < std::f64::EPSILON
 }
 
+#[derive(Debug)]
+pub enum Case {
+    Lower,
+    Upper,
+}
+
+fn format_nan(case: Case) -> String {
+    let nan = match case {
+        Case::Lower => "nan",
+        Case::Upper => "NAN",
+    };
+
+    nan.to_string()
+}
+
+fn format_inf(case: Case) -> String {
+    let inf = match case {
+        Case::Lower => "inf",
+        Case::Upper => "INF",
+    };
+
+    inf.to_string()
+}
+
+pub fn format_fixed(precision: usize, magnitude: f64, case: Case) -> String {
+    match magnitude {
+        magnitude if magnitude.is_finite() => format!("{:.*}", precision, magnitude),
+        magnitude if magnitude.is_nan() => format_nan(case),
+        magnitude if magnitude.is_infinite() => format_inf(case),
+        _ => "".to_string(),
+    }
+}
+
+// Formats floats into Python style exponent notation, by first formatting in Rust style
+// exponent notation (`1.0000e0`), then convert to Python style (`1.0000e+00`).
+pub fn format_exponent(precision: usize, magnitude: f64, case: Case) -> String {
+    match magnitude {
+        magnitude if magnitude.is_finite() => {
+            let r_exp = format!("{:.*e}", precision, magnitude);
+            let mut parts = r_exp.splitn(2, 'e');
+            let base = parts.next().unwrap();
+            let exponent = parts.next().unwrap().parse::<i64>().unwrap();
+            let e = match case {
+                Case::Lower => 'e',
+                Case::Upper => 'E',
+            };
+            format!("{}{}{:+#03}", base, e, exponent)
+        }
+        magnitude if magnitude.is_nan() => format_nan(case),
+        magnitude if magnitude.is_infinite() => format_inf(case),
+        _ => "".to_string(),
+    }
+}
+
+fn remove_trailing_zeros(s: String) -> String {
+    let mut s = s;
+    while s.ends_with('0') || s.ends_with('.') {
+        s.truncate(s.len() - 1);
+    }
+
+    s
+}
+
+pub fn format_general(precision: usize, magnitude: f64, case: Case) -> String {
+    match magnitude {
+        magnitude if magnitude.is_finite() => {
+            let r_exp = format!("{:.*e}", precision.saturating_sub(1), magnitude);
+            let mut parts = r_exp.splitn(2, 'e');
+            let base = parts.next().unwrap();
+            let exponent = parts.next().unwrap().parse::<i64>().unwrap();
+            if exponent < -4 || exponent >= (precision as i64) {
+                let e = match case {
+                    Case::Lower => 'e',
+                    Case::Upper => 'E',
+                };
+
+                let base = remove_trailing_zeros(format!("{:.*}", precision + 1, base));
+                format!("{}{}{:+#03}", base, e, exponent)
+            } else {
+                let precision = (precision as i64) - 1 - exponent;
+                let precision = precision as usize;
+                remove_trailing_zeros(format!("{:.*}", precision, magnitude))
+            }
+        }
+        magnitude if magnitude.is_nan() => format_nan(case),
+        magnitude if magnitude.is_infinite() => format_inf(case),
+        _ => "".to_string(),
+    }
+}
+
 pub fn to_string(value: f64) -> String {
     let lit = format!("{:e}", value);
     if let Some(position) = lit.find('e') {
diff --git a/extra_tests/snippets/strings.py b/extra_tests/snippets/strings.py
@@ -232,6 +232,73 @@ def __repr__(self):
 assert "%e" % 0.1 == '1.000000e-01'
 assert "%e" % 10 == '1.000000e+01'
 assert "%.10e" % 1.2345678901234567890 == '1.2345678901e+00'
+assert '%e' % float('nan') == 'nan'
+assert '%e' % float('-nan') == 'nan'
+assert '%E' % float('nan') == 'NAN'
+assert '%e' % float('inf') == 'inf'
+assert '%e' % float('-inf') == '-inf'
+assert '%E' % float('inf') == 'INF'
+assert "%g" % 123456.78901234567890 == '123457'
+assert "%.0g" % 123456.78901234567890 == '1e+05'
+assert "%.1g" % 123456.78901234567890 == '1e+05'
+assert "%.2g" % 123456.78901234567890 == '1.2e+05'
+assert "%g" % 1234567.8901234567890 == '1.23457e+06'
+assert "%.0g" % 1234567.8901234567890 == '1e+06'
+assert "%.1g" % 1234567.8901234567890 == '1e+06'
+assert "%.2g" % 1234567.8901234567890 == '1.2e+06'
+assert "%.3g" % 1234567.8901234567890 == '1.23e+06'
+assert "%.5g" % 1234567.8901234567890 == '1.2346e+06'
+assert "%.6g" % 1234567.8901234567890 == '1.23457e+06'
+assert "%.7g" % 1234567.8901234567890 == '1234568'
+assert "%.8g" % 1234567.8901234567890 == '1234567.9'
+assert "%G" % 123456.78901234567890 == '123457'
+assert "%.0G" % 123456.78901234567890 == '1E+05'
+assert "%.1G" % 123456.78901234567890 == '1E+05'
+assert "%.2G" % 123456.78901234567890 == '1.2E+05'
+assert "%G" % 1234567.8901234567890 == '1.23457E+06'
+assert "%.0G" % 1234567.8901234567890 == '1E+06'
+assert "%.1G" % 1234567.8901234567890 == '1E+06'
+assert "%.2G" % 1234567.8901234567890 == '1.2E+06'
+assert "%.3G" % 1234567.8901234567890 == '1.23E+06'
+assert "%.5G" % 1234567.8901234567890 == '1.2346E+06'
+assert "%.6G" % 1234567.8901234567890 == '1.23457E+06'
+assert "%.7G" % 1234567.8901234567890 == '1234568'
+assert "%.8G" % 1234567.8901234567890 == '1234567.9'
+assert '%g' % 0.12345678901234567890 == '0.123457'
+assert '%g' % 0.12345678901234567890e-1 == '0.0123457'
+assert '%g' % 0.12345678901234567890e-2 == '0.00123457'
+assert '%g' % 0.12345678901234567890e-3 == '0.000123457'
+assert '%g' % 0.12345678901234567890e-4 == '1.23457e-05'
+assert '%g' % 0.12345678901234567890e-5 == '1.23457e-06'
+assert '%.6g' % 0.12345678901234567890e-5 == '1.23457e-06'
+assert '%.10g' % 0.12345678901234567890e-5 == '1.23456789e-06'
+assert '%.20g' % 0.12345678901234567890e-5 == '1.2345678901234567384e-06'
+assert '%G' % 0.12345678901234567890 == '0.123457'
+assert '%G' % 0.12345678901234567890E-1 == '0.0123457'
+assert '%G' % 0.12345678901234567890E-2 == '0.00123457'
+assert '%G' % 0.12345678901234567890E-3 == '0.000123457'
+assert '%G' % 0.12345678901234567890E-4 == '1.23457E-05'
+assert '%G' % 0.12345678901234567890E-5 == '1.23457E-06'
+assert '%.6G' % 0.12345678901234567890E-5 == '1.23457E-06'
+assert '%.10G' % 0.12345678901234567890E-5 == '1.23456789E-06'
+assert '%.20G' % 0.12345678901234567890E-5 == '1.2345678901234567384E-06'
+assert '%g' % float('nan') == 'nan'
+assert '%g' % float('-nan') == 'nan'
+assert '%G' % float('nan') == 'NAN'
+assert '%g' % float('inf') == 'inf'
+assert '%g' % float('-inf') == '-inf'
+assert '%G' % float('inf') == 'INF'
+assert "%.0g" % 1.020e-13 == '1e-13'
+assert "%.0g" % 1.020e-13 == '1e-13'
+assert "%.1g" % 1.020e-13 == '1e-13'
+assert "%.2g" % 1.020e-13 == '1e-13'
+assert "%.3g" % 1.020e-13 == '1.02e-13'
+assert "%.4g" % 1.020e-13 == '1.02e-13'
+assert "%.5g" % 1.020e-13 == '1.02e-13'
+assert "%.6g" % 1.020e-13 == '1.02e-13'
+assert "%.7g" % 1.020e-13 == '1.02e-13'
+assert "%g" % 1.020e-13 == '1.02e-13'
+assert "%g" % 1.020e-4 == '0.000102'
 
 assert_raises(TypeError, lambda: "My name is %s and I'm %(age)d years old" % ("Foo", 25), _msg='format requires a mapping')
 assert_raises(TypeError, lambda: "My name is %(name)s" % "Foo", _msg='format requires a mapping')
@@ -477,6 +544,68 @@ def try_mutate_str():
 assert '{:e}'.format(float('-inf')) == '-inf'
 assert '{:E}'.format(float('inf')) == 'INF'
 
+# Test g & G formatting
+assert '{:g}'.format(123456.78901234567890) == '123457'
+assert '{:.0g}'.format(123456.78901234567890) == '1e+05'
+assert '{:.1g}'.format(123456.78901234567890) == '1e+05'
+assert '{:.2g}'.format(123456.78901234567890) == '1.2e+05'
+assert '{:g}'.format(1234567.8901234567890) == '1.23457e+06'
+assert '{:.0g}'.format(1234567.8901234567890) == '1e+06'
+assert '{:.1g}'.format(1234567.8901234567890) == '1e+06'
+assert '{:.2g}'.format(1234567.8901234567890) == '1.2e+06'
+assert '{:.3g}'.format(1234567.8901234567890) == '1.23e+06'
+assert '{:.5g}'.format(1234567.8901234567890) == '1.2346e+06'
+assert '{:.6g}'.format(1234567.8901234567890) == '1.23457e+06'
+assert '{:.7g}'.format(1234567.8901234567890) == '1234568'
+assert '{:.8g}'.format(1234567.8901234567890) == '1234567.9'
+assert '{:G}'.format(123456.78901234567890) == '123457'
+assert '{:.0G}'.format(123456.78901234567890) == '1E+05'
+assert '{:.1G}'.format(123456.78901234567890) == '1E+05'
+assert '{:.2G}'.format(123456.78901234567890) == '1.2E+05'
+assert '{:G}'.format(1234567.8901234567890) == '1.23457E+06'
+assert '{:.0G}'.format(1234567.8901234567890) == '1E+06'
+assert '{:.1G}'.format(1234567.8901234567890) == '1E+06'
+assert '{:.2G}'.format(1234567.8901234567890) == '1.2E+06'
+assert '{:.3G}'.format(1234567.8901234567890) == '1.23E+06'
+assert '{:.5G}'.format(1234567.8901234567890) == '1.2346E+06'
+assert '{:.6G}'.format(1234567.8901234567890) == '1.23457E+06'
+assert '{:.7G}'.format(1234567.8901234567890) == '1234568'
+assert '{:.8G}'.format(1234567.8901234567890) == '1234567.9'
+assert '{:g}'.format(0.12345678901234567890) == '0.123457'
+assert '{:g}'.format(0.12345678901234567890e-1) == '0.0123457'
+assert '{:g}'.format(0.12345678901234567890e-2) == '0.00123457'
+assert '{:g}'.format(0.12345678901234567890e-3) == '0.000123457'
+assert '{:g}'.format(0.12345678901234567890e-4) == '1.23457e-05'
+assert '{:g}'.format(0.12345678901234567890e-5) == '1.23457e-06'
+assert '{:.6g}'.format(0.12345678901234567890e-5) == '1.23457e-06'
+assert '{:.10g}'.format(0.12345678901234567890e-5) == '1.23456789e-06'
+assert '{:.20g}'.format(0.12345678901234567890e-5) == '1.2345678901234567384e-06'
+assert '{:G}'.format(0.12345678901234567890) == '0.123457'
+assert '{:G}'.format(0.12345678901234567890E-1) == '0.0123457'
+assert '{:G}'.format(0.12345678901234567890E-2) == '0.00123457'
+assert '{:G}'.format(0.12345678901234567890E-3) == '0.000123457'
+assert '{:G}'.format(0.12345678901234567890E-4) == '1.23457E-05'
+assert '{:G}'.format(0.12345678901234567890E-5) == '1.23457E-06'
+assert '{:.6G}'.format(0.12345678901234567890E-5) == '1.23457E-06'
+assert '{:.10G}'.format(0.12345678901234567890E-5) == '1.23456789E-06'
+assert '{:.20G}'.format(0.12345678901234567890E-5) == '1.2345678901234567384E-06'
+assert '{:g}'.format(float('nan')) == 'nan'
+assert '{:g}'.format(float('-nan')) == 'nan'
+assert '{:G}'.format(float('nan')) == 'NAN'
+assert '{:g}'.format(float('inf')) == 'inf'
+assert '{:g}'.format(float('-inf')) == '-inf'
+assert '{:G}'.format(float('inf')) == 'INF'
+assert '{:.0g}'.format(1.020e-13) == '1e-13'
+assert '{:.0g}'.format(1.020e-13) == '1e-13'
+assert '{:.1g}'.format(1.020e-13) == '1e-13'
+assert '{:.2g}'.format(1.020e-13) == '1e-13'
+assert '{:.3g}'.format(1.020e-13) == '1.02e-13'
+assert '{:.4g}'.format(1.020e-13) == '1.02e-13'
+assert '{:.5g}'.format(1.020e-13) == '1.02e-13'
+assert '{:.6g}'.format(1.020e-13) == '1.02e-13'
+assert '{:.7g}'.format(1.020e-13) == '1.02e-13'
+assert '{:g}'.format(1.020e-13) == '1.02e-13'
+assert "{:g}".format(1.020e-4) == '0.000102'
 
 # remove*fix test
 def test_removeprefix():
diff --git a/vm/src/builtins/pystr.rs b/vm/src/builtins/pystr.rs
@@ -180,7 +180,7 @@ impl PyIter for PyStrReverseIterator {
                     break;
                 }
             }
-            start.unwrap_or(end - 4)
+            start.unwrap_or_else(|| end.saturating_sub(4))
         };
 
         let stored = zelf.position.swap(start);
diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs
@@ -4,6 +4,7 @@ use crate::builtins::float::{try_bigint, IntoPyFloat, PyFloat};
 use crate::builtins::int::{self, PyInt};
 use crate::builtins::pystr::PyStr;
 use crate::builtins::{memory::try_buffer_from_object, tuple, PyBytes};
+use crate::common::float_ops;
 use crate::pyobject::{
     BorrowValue, ItemProtocol, PyObjectRef, PyResult, TryFromObject, TypeProtocol,
 };
@@ -76,7 +77,7 @@ enum CNumberType {
 #[derive(Debug, PartialEq)]
 enum CFloatType {
     Exponent(CFormatCase),
-    PointDecimal,
+    PointDecimal(CFormatCase),
     General(CFormatCase),
 }
 
@@ -292,61 +293,43 @@ impl CFormatSpec {
         }
     }
 
-    fn normalize_float(&self, num: f64) -> (f64, i32) {
-        let mut fraction = num;
-        let mut exponent = 0;
-        loop {
-            if fraction >= 10.0 {
-                fraction /= 10.0;
-                exponent += 1;
-            } else if fraction < 1.0 && fraction > 0.0 {
-                fraction *= 10.0;
-                exponent -= 1;
-            } else {
-                break;
-            }
-        }
-
-        (fraction, exponent)
-    }
-
     pub(crate) fn format_float(&self, num: f64) -> String {
-        let sign_string = if num.is_sign_positive() {
-            self.flags.sign_string()
-        } else {
+        let sign_string = if num.is_sign_negative() && !num.is_nan() {
             "-"
+        } else {
+            self.flags.sign_string()
         };
+
         let precision = match self.precision {
             Some(CFormatQuantity::Amount(p)) => p,
             _ => 6,
         };
 
         let magnitude_string = match &self.format_type {
-            CFormatType::Float(CFloatType::PointDecimal) => {
+            CFormatType::Float(CFloatType::PointDecimal(case)) => {
+                let case = match case {
+                    CFormatCase::Lowercase => float_ops::Case::Lower,
+                    CFormatCase::Uppercase => float_ops::Case::Upper,
+                };
                 let magnitude = num.abs();
-                format!("{:.*}", precision, magnitude)
+                float_ops::format_fixed(precision, magnitude, case)
             }
             CFormatType::Float(CFloatType::Exponent(case)) => {
-                let (fraction, exponent) = self.normalize_float(num.abs());
                 let case = match case {
-                    CFormatCase::Lowercase => 'e',
-                    CFormatCase::Uppercase => 'E',
+                    CFormatCase::Lowercase => float_ops::Case::Lower,
+                    CFormatCase::Uppercase => float_ops::Case::Upper,
                 };
-                format!("{:.*}{}{:+03}", precision, fraction, case, exponent)
+                let magnitude = num.abs();
+                float_ops::format_exponent(precision, magnitude, case)
             }
             CFormatType::Float(CFloatType::General(case)) => {
                 let precision = if precision == 0 { 1 } else { precision };
-                let (fraction, exponent) = self.normalize_float(num.abs());
-                if exponent < -4 || exponent >= (precision as i32) {
-                    let case = match case {
-                        CFormatCase::Lowercase => 'e',
-                        CFormatCase::Uppercase => 'E',
-                    };
-                    format!("{}{}{:+03}", fraction, case, exponent)
-                } else {
-                    let magnitude = num.abs();
-                    format!("{}", magnitude)
-                }
+                let case = match case {
+                    CFormatCase::Lowercase => float_ops::Case::Lower,
+                    CFormatCase::Uppercase => float_ops::Case::Upper,
+                };
+                let magnitude = num.abs();
+                float_ops::format_general(precision, magnitude, case)
             }
             _ => unreachable!(),
         };
@@ -1053,8 +1036,8 @@ where
         'X' => CFormatType::Number(Hex(Uppercase)),
         'e' => CFormatType::Float(Exponent(Lowercase)),
         'E' => CFormatType::Float(Exponent(Uppercase)),
-        'f' => CFormatType::Float(PointDecimal),
-        'F' => CFormatType::Float(PointDecimal),
+        'f' => CFormatType::Float(PointDecimal(Lowercase)),
+        'F' => CFormatType::Float(PointDecimal(Uppercase)),
         'g' => CFormatType::Float(General(Lowercase)),
         'G' => CFormatType::Float(General(Uppercase)),
         'c' => CFormatType::Character,
diff --git a/vm/src/format.rs b/vm/src/format.rs

Original file line number	Diff line number	Diff line change
`@@ -180,7 +180,7 @@ impl PyIter for PyStrReverseIterator {`
`180`	`180`	`break;`
`181`	`181`	`}`
`182`	`182`	`}`
`183`		`- start.unwrap_or(end - 4)`
	`183`	`+ start.unwrap_or_else(\|\| end.saturating_sub(4))`
`184`	`184`	`};`
`185`	`185`
`186`	`186`	`let stored = zelf.position.swap(start);`