Skip to content

Commit

Permalink
Merge multiplications by twiddle with multiplications by i for faster…
Browse files Browse the repository at this point in the history
… r2c transforms.

[empty commit message]
  • Loading branch information
matteo-frigo committed Jan 22, 2007
1 parent 37defea commit 44d6228
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 0 deletions.
4 changes: 4 additions & 0 deletions genfft/algsimp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ end = struct
and stimesM = function
| (Uminus a, b) -> stimesM (a, b) >>= suminusM
| (a, Uminus b) -> stimesM (a, b) >>= suminusM
| (NaN I, CTimes (a, b)) -> stimesM (NaN I, b) >>=
fun ib -> sctimesM (a, ib)
| (NaN I, CTimesJ (a, b)) -> stimesM (NaN I, b) >>=
fun ib -> sctimesjM (a, ib)
| (Num a, Num b) -> snumM (Number.mul a b)
| (Num a, Times (Num b, c)) ->
snumM (Number.mul a b) >>= fun x -> stimesM (x, c)
Expand Down
4 changes: 4 additions & 0 deletions genfft/simd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,12 @@ and unparse_expr =
| Times(NaN CONJ,b) -> op1 "VCONJ" b
| Times(a,b) ->
sprintf "VMUL(%s, %s)" (unparse_expr a) (unparse_expr b)
| CTimes(a,Times(NaN I, b)) ->
sprintf "VZMULI(%s, %s)" (unparse_expr a) (unparse_expr b)
| CTimes(a,b) ->
sprintf "VZMUL(%s, %s)" (unparse_expr a) (unparse_expr b)
| CTimesJ(a,Times(NaN I, b)) ->
sprintf "VZMULIJ(%s, %s)" (unparse_expr a) (unparse_expr b)
| CTimesJ(a,b) ->
sprintf "VZMULJ(%s, %s)" (unparse_expr a) (unparse_expr b)
| Uminus a when !Magic.vneg -> op1 "VNEG" a
Expand Down
24 changes: 24 additions & 0 deletions simd/simd-altivec.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,30 @@ static inline V VZMULJ(V tx, V sr)
return VFNMS(ti, si, VMUL(tr, sr));
}

static inline V VZMULI(V tx, V si)
{
const vector unsigned int real =
VLIT(0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b);
const vector unsigned int imag =
VLIT(0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f);
V sr = VBYI(si);
V tr = vec_perm(tx, tx, (vector unsigned char)real);
V ti = vec_perm(tx, tx, (vector unsigned char)imag);
return VFNMS(ti, si, VMUL(tr, sr));
}

static inline V VZMULIJ(V tx, V si)
{
const vector unsigned int real =
VLIT(0x00010203, 0x00010203, 0x08090a0b, 0x08090a0b);
const vector unsigned int imag =
VLIT(0x04050607, 0x04050607, 0x0c0d0e0f, 0x0c0d0e0f);
V sr = VBYI(si);
V tr = vec_perm(tx, tx, (vector unsigned char)real);
V ti = vec_perm(tx, tx, (vector unsigned char)imag);
return VFMA(ti, si, VMUL(tr, sr));
}

/* twiddle storage #1: compact, slower */
#define VTW1(v,x) \
{TW_COS, v, x}, {TW_COS, v+1, x}, {TW_SIN, v, x}, {TW_SIN, v+1, x}
Expand Down
18 changes: 18 additions & 0 deletions simd/simd-sse.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,24 @@ static inline V VZMULJ(V tx, V sr)
return VSUB(tr, VMUL(ti, sr));
}

static inline V VZMULI(V tx, V sr)
{
V tr = SHUFPS(tx, tx, SHUFVAL(0, 0, 2, 2));
V ti = SHUFPS(tx, tx, SHUFVAL(1, 1, 3, 3));
ti = VMUL(ti, sr);
sr = VBYI(sr);
return VSUB(VMUL(tr, sr), ti);
}

static inline V VZMULIJ(V tx, V sr)
{
V tr = SHUFPS(tx, tx, SHUFVAL(0, 0, 2, 2));
V ti = SHUFPS(tx, tx, SHUFVAL(1, 1, 3, 3));
ti = VMUL(ti, sr);
sr = VBYI(sr);
return VADD(VMUL(tr, sr), ti);
}

#define VFMAI(b, c) VADD(c, VBYI(b))
#define VFNMSI(b, c) VSUB(c, VBYI(b))

Expand Down
18 changes: 18 additions & 0 deletions simd/simd-sse2.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,24 @@ static inline V VZMULJ(V tx, V sr)
return VSUB(tr, VMUL(ti, sr));
}

static inline V VZMULI(V tx, V sr)
{
V tr = UNPCKL(tx, tx);
V ti = UNPCKH(tx, tx);
ti = VMUL(ti, sr);
sr = VBYI(sr);
return VSUB(VMUL(tr, sr), ti);
}

static inline V VZMULIJ(V tx, V sr)
{
V tr = UNPCKL(tx, tx);
V ti = UNPCKH(tx, tx);
ti = VMUL(ti, sr);
sr = VBYI(sr);
return VADD(VMUL(tr, sr), ti);
}

/* twiddle storage #1: compact, slower */
#define VTW1(v,x) {TW_CEXP, v, x}
#define TWVL1 1
Expand Down

0 comments on commit 44d6228

Please sign in to comment.