diff --git a/genfft/complex.ml b/genfft/complex.ml index 89dbc6889..555aea734 100644 --- a/genfft/complex.ml +++ b/genfft/complex.ml @@ -48,9 +48,20 @@ let nan x = CE (NaN x, makeNum Number.zero) let half = inverse_int 2 +let times3x3 (CE (a, b)) (CE (c, d)) = + CE (makePlus [makeTimes (c, makePlus [a; makeUminus (b)]); + makeTimes (b, makePlus [c; makeUminus (d)])], + makePlus [makeTimes (a, makePlus [c; d]); + makeUminus(makeTimes (c, makePlus [a; makeUminus (b)]))]) + let times (CE (a, b)) (CE (c, d)) = - CE (makePlus [makeTimes (a, c); makeUminus (makeTimes (b, d))], - makePlus [makeTimes (a, d); makeTimes (b, c)]) + if not !Magic.threemult then + CE (makePlus [makeTimes (a, c); makeUminus (makeTimes (b, d))], + makePlus [makeTimes (a, d); makeTimes (b, c)]) + else if is_constant c && is_constant d then + times3x3 (CE (a, b)) (CE (c, d)) + else (* hope a and b are constant expressions *) + times3x3 (CE (c, d)) (CE (a, b)) let ctimes (CE (a, _)) (CE (c, _)) = CE (CTimes (a, c), makeNum Number.zero) diff --git a/genfft/magic.ml b/genfft/magic.ml index 95f703757..eccea94ad 100644 --- a/genfft/magic.ml +++ b/genfft/magic.ml @@ -28,6 +28,7 @@ let circular_min = ref 64 let rader_min = ref 13 let rader_list = ref [5] let alternate_convolution = ref 17 +let threemult = ref false let inline_single = ref true let inline_loads = ref false let inline_loads_constants = ref false @@ -78,6 +79,9 @@ let speclist = [ "-rader-min", set_int rader_min, " : Use Rader's algorithm for prime sizes >= "; + "-threemult", set_bool threemult, + " Use 3-multiply complex multiplications"; + "-karatsuba-min", set_int karatsuba_min, undocumented; "-karatsuba-variant", set_int karatsuba_variant, undocumented; "-circular-min", set_int circular_min, undocumented;