Skip to content

Commit

Permalink
Put cpy2d_pair into its own file, so that I can experiment with
Browse files Browse the repository at this point in the history
buffering of nontwiddle codelets.
  • Loading branch information
matteo-frigo committed Feb 23, 2005
1 parent e7d485c commit 5844ac6
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 34 deletions.
35 changes: 7 additions & 28 deletions dft/dftw-directbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*
*/

/* $Id: dftw-directbuf.c,v 1.4 2005-02-19 14:31:14 athena Exp $ */
/* $Id: dftw-directbuf.c,v 1.5 2005-02-23 03:32:06 athena Exp $ */

#include "ct.h"

Expand All @@ -41,37 +41,16 @@ typedef struct {
const S *slv;
} P;

/*
Copy A -> B, where A and B are n0 x n1 complex matrices
such that the (i0, i1) element has index (i0 * s0 + i1 * s1).
*/
static void cpy(int n0, int n1,
const R *rA, const R *iA, int sa0, int sa1,
R *rB, R *iB, int sb0, int sb1)
{
int i0, i1;
ptrdiff_t ima = iA - rA, imb = iB - rB;

for (i0 = 0; i0 < n0; ++i0) {
const R *pa;
R *pb;

pa = rA; rA += sa0;
pb = rB; rB += sb0;
for (i1 = 0; i1 < n1; ++i1) {
R xr = pa[0], xi = pa[ima];
pb[0] = xr; pb[imb] = xi;
pa += sa1; pb += sb1;
}
}
}

static const R *doit(kdftw k, R *rA, R *iA, const R *W, int ios, int dist,
int r, int batchsz, R *buf, stride bufstride)
{
cpy(r, batchsz, rA, iA, ios, dist, buf, buf + 1, 2, 2 * r);
X(cpy2d_pair_ci)(rA, iA, buf, buf + 1,
r, ios, 2,
batchsz, dist, 2 * r);
W = k(buf, buf + 1, W, bufstride, batchsz, 2 * r);
cpy(r, batchsz, buf, buf + 1, 2, 2 * r, rA, iA, ios, dist);
X(cpy2d_pair_co)(buf, buf + 1, rA, iA,
r, 2, ios,
batchsz, 2 * r, dist);
return W;
}

Expand Down
11 changes: 6 additions & 5 deletions kernel/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ AM_CPPFLAGS = -I$(top_srcdir)/simd
noinst_LTLIBRARIES = libkernel.la

libkernel_la_SOURCES = align.c alloc.c assert.c awake.c buffered.c \
cpy1d.c cpy2d.c ct.c debug.c hash.c iabs.c kalloc.c md5-1.c md5.c \
minmax.c ops.c pickdim.c plan.c planner.c primes.c print.c problem.c \
rader.c scan.c solver.c solvtab.c stride.c tensor.c tensor1.c \
tensor2.c tensor4.c tensor5.c tensor7.c tensor8.c tensor9.c timer.c \
transpose-rec.c transpose.c trig.c trig1.c twiddle.c cycle.h ifftw.h
cpy1d.c cpy2d-pair.c cpy2d.c ct.c debug.c hash.c iabs.c kalloc.c \
md5-1.c md5.c minmax.c ops.c pickdim.c plan.c planner.c primes.c \
print.c problem.c rader.c scan.c solver.c solvtab.c stride.c tensor.c \
tensor1.c tensor2.c tensor4.c tensor5.c tensor7.c tensor8.c tensor9.c \
timer.c transpose-rec.c transpose.c trig.c trig1.c twiddle.c cycle.h \
ifftw.h
61 changes: 61 additions & 0 deletions kernel/cpy2d-pair.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2003 Matteo Frigo
* Copyright (c) 2003 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/

/* out of place copy routines for pairs of isomorphic 2D arrays */
#include "ifftw.h"

void X(cpy2d_pair)(R *I0, R *I1, R *O0, R *O1,
int n0, int is0, int os0,
int n1, int is1, int os1)
{
int i0, i1;

for (i1 = 0; i1 < n1; ++i1)
for (i0 = 0; i0 < n0; ++i0) {
R x0 = I0[i0 * is0 + i1 * is1];
R x1 = I1[i0 * is0 + i1 * is1];
O0[i0 * os0 + i1 * os1] = x0;
O1[i0 * os0 + i1 * os1] = x1;
}
}

#define IABS(x) (((x) < 0) ? (-(x)) : (x))

/* like cpy2d_pair, but read input contiguously if possible */
void X(cpy2d_pair_ci)(R *I0, R *I1, R *O0, R *O1,
int n0, int is0, int os0,
int n1, int is1, int os1)
{
if (IABS(is0) < IABS(is1)) /* inner loop is for n0 */
X(cpy2d_pair) (I0, I1, O0, O1, n0, is0, os0, n1, is1, os1);
else
X(cpy2d_pair) (I0, I1, O0, O1, n1, is1, os1, n0, is0, os0);
}

/* like cpy2d_pair, but write output contiguously if possible */
void X(cpy2d_pair_co)(R *I0, R *I1, R *O0, R *O1,
int n0, int is0, int os0,
int n1, int is1, int os1)
{
if (IABS(os0) < IABS(os1)) /* inner loop is for n0 */
X(cpy2d_pair) (I0, I1, O0, O1, n0, is0, os0, n1, is1, os1);
else
X(cpy2d_pair) (I0, I1, O0, O1, n1, is1, os1, n0, is0, os0);
}
12 changes: 11 additions & 1 deletion kernel/ifftw.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*
*/

/* $Id: ifftw.h,v 1.247 2005-02-22 15:06:13 athena Exp $ */
/* $Id: ifftw.h,v 1.248 2005-02-23 03:32:06 athena Exp $ */

/* FFTW internal header file */
#ifndef __IFFTW_H__
Expand Down Expand Up @@ -741,6 +741,16 @@ void X(cpy2d_recbuf)(R *I, R *O,
int n0, int is0, int os0,
int n1, int is1, int os1,
int vl);
void X(cpy2d_pair)(R *I0, R *I1, R *O0, R *O1,
int n0, int is0, int os0,
int n1, int is1, int os1);
void X(cpy2d_pair_ci)(R *I0, R *I1, R *O0, R *O1,
int n0, int is0, int os0,
int n1, int is1, int os1);
void X(cpy2d_pair_co)(R *I0, R *I1, R *O0, R *O1,
int n0, int is0, int os0,
int n1, int is1, int os1);

void X(transpose)(R *I, int n, int s0, int s1, int vl);
void X(transpose_rec)(R *I, int n, int s0, int s1, int vl) ;
void X(transpose_recbuf)(R *I, int n, int s0, int s1, int vl) ;
Expand Down

0 comments on commit 5844ac6

Please sign in to comment.