Vectorize (vec4) matmul and convolutions. (tensorflow#129)

* vec4 matmul and conv * for conv benchmarks, make input depth 10 * update depth in html to reflect new 10 input depth * checkpointing, pooling ops faster now * add avg pooling benchmark * change conv depth to 16 in benchmark * Add unit tests * fix lint * respond to comments * lint
cvalenzuela · Sep 17, 2017 · 7f87d33 · 7f87d33
1 parent a467449
commit 7f87d33
Show file tree

Hide file tree

Showing 25 changed files with 814 additions and 729 deletions.
diff --git a/demos/benchmarks/benchmark.ts b/demos/benchmarks/benchmark.ts
@@ -25,6 +25,7 @@ export interface BenchmarkRunGroup {
   // A transformation of step to the size passed to the benchmark test.
   stepToSizeTransformation?: (step: number) => number;
   benchmarkRuns: BenchmarkRun[];
+  params: {};
 }
 
 export class BenchmarkRun {
@@ -39,4 +40,7 @@ export class BenchmarkRun {
   }
 }
 
-export interface BenchmarkTest { (size: number): number; }
+export abstract class BenchmarkTest {
+  constructor(protected params?: {}) {}
+  abstract run(size: number): number;
+}
diff --git a/demos/benchmarks/conv_benchmarks.ts b/demos/benchmarks/conv_benchmarks.ts
@@ -0,0 +1,83 @@
+/**
+ * @license
+ * Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {initializeGPU} from '../../src/math/ndarray';
+import {Conv2DProgram} from '../../src/math/webgl/conv_gpu';
+import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
+import {TextureManager} from '../../src/math/webgl/texture_manager';
+import {Array1D, Array3D, Array4D, conv_util, GPGPUContext} from '../deeplearn';
+
+import {BenchmarkTest} from './benchmark';
+
+const OP_RUNS = 40;
+
+export interface ConvBenchmarkParams {
+  inDepth: number;
+  outDepth: number;
+  filterSize: number;
+  stride: number;
+}
+
+export abstract class ConvBenchmark extends BenchmarkTest {
+  constructor(protected params: ConvBenchmarkParams) {
+    super(params);
+  }
+}
+
+export class ConvGPUBenchmark extends ConvBenchmark {
+  run(size: number): number {
+    const gpgpu = new GPGPUContext();
+    const texManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, texManager);
+
+    const inDepth = this.params.inDepth;
+    const inShape: [number, number, number] = [size, size, inDepth];
+    const outDepth = this.params.outDepth;
+    const filterSize = this.params.filterSize;
+    const stride = this.params.stride;
+    const hasBias = true;
+    const convInfo = conv_util.computeConvInfo(
+        inShape, filterSize, filterSize, outDepth, stride, stride, 'same');
+    const program = new Conv2DProgram(convInfo, hasBias);
+    const outputShape = program.outputShape as [number, number, number];
+    const out = Array3D.zeros(outputShape);
+    const x = Array3D.randUniform(inShape, -1, 1);
+    const wShape =
+        conv_util.computeWeightsShape4D(1, outDepth, filterSize, filterSize);
+    const W = Array4D.randUniform(wShape, -1, 1);
+    const b = Array1D.randUniform([outDepth], -1, 1);
+    const inputs = [x, W, b];
+    const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, out);
+
+    const start = performance.now();
+    for (let i = 0; i < OP_RUNS; i++) {
+      gpgpu_math.runProgram(binary, inputs, out);
+    }
+    out.getValues();
+    const avgTime = (performance.now() - start) / OP_RUNS;
+
+    x.dispose();
+    W.dispose();
+    b.dispose();
+    out.dispose();
+    texManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
+    gpgpu.dispose();
+
+    return avgTime;
+  }
+}
diff --git a/demos/benchmarks/conv_gpu_benchmark.ts b/demos/benchmarks/conv_gpu_benchmark.ts
diff --git a/demos/benchmarks/conv_transpose_gpu_benchmark.ts b/demos/benchmarks/conv_transpose_gpu_benchmark.ts
diff --git a/demos/benchmarks/conv_transposed_benchmarks.ts b/demos/benchmarks/conv_transposed_benchmarks.ts
@@ -0,0 +1,79 @@
+/**
+ * @license
+ * Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {initializeGPU} from '../../src/math/ndarray';
+import {Conv2DDerInputProgram} from '../../src/math/webgl/conv_backprop_gpu';
+import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
+import {TextureManager} from '../../src/math/webgl/texture_manager';
+import {Array3D, Array4D, conv_util, GPGPUContext} from '../deeplearn';
+
+import {BenchmarkTest} from './benchmark';
+
+const OP_RUNS = 40;
+
+export interface ConvTransposedBenchmarkParams {
+  inDepth: number;
+  outDepth: number;
+  filterSize: number;
+  stride: number;
+}
+
+export abstract class ConvTransposedBenchmark extends BenchmarkTest {
+  constructor(protected params: ConvTransposedBenchmarkParams) {
+    super(params);
+  }
+}
+
+export class ConvTransposedGPUBenchmark extends ConvTransposedBenchmark {
+  run(size: number): number {
+    const origInputDepth = 1;
+    const origOutputDepth = 1;
+    const xShape: [number, number, number] = [size, size, origOutputDepth];
+    const fieldSize = 11;
+    const origStride = 1;
+    const origPad = 1;
+
+    const gpgpu = new GPGPUContext();
+    const texManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, texManager);
+    gpgpu.enableAutomaticDebugValidation(true);
+
+    const convInfo = conv_util.computeConvInfo(
+        xShape, fieldSize, fieldSize, origOutputDepth, origStride, origStride,
+        origPad);
+    const program = new Conv2DDerInputProgram(convInfo);
+    const outputShape = program.outputShape as [number, number, number];
+    const out = Array3D.zeros(outputShape);
+    const x = Array3D.randUniform(xShape, -1, 1);
+    const wShape = conv_util.computeWeightsShape4D(
+        origInputDepth, origOutputDepth, fieldSize, fieldSize);
+    const W = Array4D.randUniform(wShape, -1, 1);
+    const inputs = [x, W];
+    const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, out);
+    const start = performance.now();
+    for (let i = 0; i < OP_RUNS; i++) {
+      gpgpu_math.runProgram(binary, inputs, out);
+    }
+    out.getValues();
+    const avgTime = (performance.now() - start) / OP_RUNS;
+
+    texManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
+    gpgpu.dispose();
+    return avgTime;
+  }
+}
diff --git a/demos/benchmarks/logsumexp_benchmarks.ts b/demos/benchmarks/logsumexp_benchmarks.ts
@@ -0,0 +1,67 @@
+/**
+ * @license
+ * Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {initializeGPU} from '../../src/math/ndarray';
+import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
+import {LogSumExpProgram} from '../../src/math/webgl/logsumexp_gpu';
+import {TextureManager} from '../../src/math/webgl/texture_manager';
+// tslint:disable-next-line:max-line-length
+import {Array2D, GPGPUContext, NDArray, NDArrayMathCPU, Scalar} from '../deeplearn';
+
+import {BenchmarkTest} from './benchmark';
+
+const CPU_OPS_PER_RUN = 10;
+const GPU_OPS_PER_RUN = 10;
+
+export class LogSumExpCPUBenchmark extends BenchmarkTest {
+  run(size: number): number {
+    const math = new NDArrayMathCPU();
+    const a = NDArray.randUniform<Array2D>([size, size], -1, 1);
+    const start = performance.now();
+    for (let i = 0; i < CPU_OPS_PER_RUN; i++) {
+      math.logSumExp(a);
+    }
+    const end = performance.now();
+    return (end - start) / CPU_OPS_PER_RUN;
+  }
+}
+
+export class LogSumExpGPUBenchmark extends BenchmarkTest {
+  run(size: number): number {
+    const gpgpu = new GPGPUContext();
+    const texManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, texManager);
+    const out = new Scalar({texture: texManager.acquireTexture([1, 1])});
+    const a = Array2D.randUniform([size, size], -1, 1);
+    const program = new LogSumExpProgram(a.size);
+    const binary = gpgpu_math.compileProgram(gpgpu, program, [a], out);
+
+    const start = performance.now();
+    for (let i = 0; i < GPU_OPS_PER_RUN; i++) {
+      gpgpu_math.runProgram(binary, [a], out);
+    }
+    out.getValues();
+    const avgTime = (performance.now() - start) / GPU_OPS_PER_RUN;
+    a.dispose();
+    out.dispose();
+    texManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
+    gpgpu.dispose();
+
+    return avgTime;
+  }
+}