Skip to content

Commit

Permalink
Vectorize (vec4) matmul and convolutions. (tensorflow#129)
Browse files Browse the repository at this point in the history
* vec4 matmul and conv

* for conv benchmarks, make input depth 10

* update depth in html to reflect new 10 input depth

* checkpointing, pooling ops faster now

* add avg pooling benchmark

* change conv depth to 16 in benchmark

* Add unit tests

* fix lint

* respond to comments

* lint
  • Loading branch information
Nikhil Thorat authored Sep 17, 2017
1 parent a467449 commit 7f87d33
Show file tree
Hide file tree
Showing 25 changed files with 814 additions and 729 deletions.
6 changes: 5 additions & 1 deletion demos/benchmarks/benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export interface BenchmarkRunGroup {
// A transformation of step to the size passed to the benchmark test.
stepToSizeTransformation?: (step: number) => number;
benchmarkRuns: BenchmarkRun[];
params: {};
}

export class BenchmarkRun {
Expand All @@ -39,4 +40,7 @@ export class BenchmarkRun {
}
}

export interface BenchmarkTest { (size: number): number; }
export abstract class BenchmarkTest {
constructor(protected params?: {}) {}
abstract run(size: number): number;
}
83 changes: 83 additions & 0 deletions demos/benchmarks/conv_benchmarks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/**
* @license
* Copyright 2017 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/

import {initializeGPU} from '../../src/math/ndarray';
import {Conv2DProgram} from '../../src/math/webgl/conv_gpu';
import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
import {TextureManager} from '../../src/math/webgl/texture_manager';
import {Array1D, Array3D, Array4D, conv_util, GPGPUContext} from '../deeplearn';

import {BenchmarkTest} from './benchmark';

const OP_RUNS = 40;

export interface ConvBenchmarkParams {
inDepth: number;
outDepth: number;
filterSize: number;
stride: number;
}

export abstract class ConvBenchmark extends BenchmarkTest {
constructor(protected params: ConvBenchmarkParams) {
super(params);
}
}

export class ConvGPUBenchmark extends ConvBenchmark {
run(size: number): number {
const gpgpu = new GPGPUContext();
const texManager = new TextureManager(gpgpu);
initializeGPU(gpgpu, texManager);

const inDepth = this.params.inDepth;
const inShape: [number, number, number] = [size, size, inDepth];
const outDepth = this.params.outDepth;
const filterSize = this.params.filterSize;
const stride = this.params.stride;
const hasBias = true;
const convInfo = conv_util.computeConvInfo(
inShape, filterSize, filterSize, outDepth, stride, stride, 'same');
const program = new Conv2DProgram(convInfo, hasBias);
const outputShape = program.outputShape as [number, number, number];
const out = Array3D.zeros(outputShape);
const x = Array3D.randUniform(inShape, -1, 1);
const wShape =
conv_util.computeWeightsShape4D(1, outDepth, filterSize, filterSize);
const W = Array4D.randUniform(wShape, -1, 1);
const b = Array1D.randUniform([outDepth], -1, 1);
const inputs = [x, W, b];
const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, out);

const start = performance.now();
for (let i = 0; i < OP_RUNS; i++) {
gpgpu_math.runProgram(binary, inputs, out);
}
out.getValues();
const avgTime = (performance.now() - start) / OP_RUNS;

x.dispose();
W.dispose();
b.dispose();
out.dispose();
texManager.dispose();
gpgpu.deleteProgram(binary.webGLProgram);
gpgpu.dispose();

return avgTime;
}
}
69 changes: 0 additions & 69 deletions demos/benchmarks/conv_gpu_benchmark.ts

This file was deleted.

64 changes: 0 additions & 64 deletions demos/benchmarks/conv_transpose_gpu_benchmark.ts

This file was deleted.

79 changes: 79 additions & 0 deletions demos/benchmarks/conv_transposed_benchmarks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/**
* @license
* Copyright 2017 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/

import {initializeGPU} from '../../src/math/ndarray';
import {Conv2DDerInputProgram} from '../../src/math/webgl/conv_backprop_gpu';
import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
import {TextureManager} from '../../src/math/webgl/texture_manager';
import {Array3D, Array4D, conv_util, GPGPUContext} from '../deeplearn';

import {BenchmarkTest} from './benchmark';

const OP_RUNS = 40;

export interface ConvTransposedBenchmarkParams {
inDepth: number;
outDepth: number;
filterSize: number;
stride: number;
}

export abstract class ConvTransposedBenchmark extends BenchmarkTest {
constructor(protected params: ConvTransposedBenchmarkParams) {
super(params);
}
}

export class ConvTransposedGPUBenchmark extends ConvTransposedBenchmark {
run(size: number): number {
const origInputDepth = 1;
const origOutputDepth = 1;
const xShape: [number, number, number] = [size, size, origOutputDepth];
const fieldSize = 11;
const origStride = 1;
const origPad = 1;

const gpgpu = new GPGPUContext();
const texManager = new TextureManager(gpgpu);
initializeGPU(gpgpu, texManager);
gpgpu.enableAutomaticDebugValidation(true);

const convInfo = conv_util.computeConvInfo(
xShape, fieldSize, fieldSize, origOutputDepth, origStride, origStride,
origPad);
const program = new Conv2DDerInputProgram(convInfo);
const outputShape = program.outputShape as [number, number, number];
const out = Array3D.zeros(outputShape);
const x = Array3D.randUniform(xShape, -1, 1);
const wShape = conv_util.computeWeightsShape4D(
origInputDepth, origOutputDepth, fieldSize, fieldSize);
const W = Array4D.randUniform(wShape, -1, 1);
const inputs = [x, W];
const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, out);
const start = performance.now();
for (let i = 0; i < OP_RUNS; i++) {
gpgpu_math.runProgram(binary, inputs, out);
}
out.getValues();
const avgTime = (performance.now() - start) / OP_RUNS;

texManager.dispose();
gpgpu.deleteProgram(binary.webGLProgram);
gpgpu.dispose();
return avgTime;
}
}
67 changes: 67 additions & 0 deletions demos/benchmarks/logsumexp_benchmarks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* @license
* Copyright 2017 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/

import {initializeGPU} from '../../src/math/ndarray';
import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
import {LogSumExpProgram} from '../../src/math/webgl/logsumexp_gpu';
import {TextureManager} from '../../src/math/webgl/texture_manager';
// tslint:disable-next-line:max-line-length
import {Array2D, GPGPUContext, NDArray, NDArrayMathCPU, Scalar} from '../deeplearn';

import {BenchmarkTest} from './benchmark';

const CPU_OPS_PER_RUN = 10;
const GPU_OPS_PER_RUN = 10;

export class LogSumExpCPUBenchmark extends BenchmarkTest {
run(size: number): number {
const math = new NDArrayMathCPU();
const a = NDArray.randUniform<Array2D>([size, size], -1, 1);
const start = performance.now();
for (let i = 0; i < CPU_OPS_PER_RUN; i++) {
math.logSumExp(a);
}
const end = performance.now();
return (end - start) / CPU_OPS_PER_RUN;
}
}

export class LogSumExpGPUBenchmark extends BenchmarkTest {
run(size: number): number {
const gpgpu = new GPGPUContext();
const texManager = new TextureManager(gpgpu);
initializeGPU(gpgpu, texManager);
const out = new Scalar({texture: texManager.acquireTexture([1, 1])});
const a = Array2D.randUniform([size, size], -1, 1);
const program = new LogSumExpProgram(a.size);
const binary = gpgpu_math.compileProgram(gpgpu, program, [a], out);

const start = performance.now();
for (let i = 0; i < GPU_OPS_PER_RUN; i++) {
gpgpu_math.runProgram(binary, [a], out);
}
out.getValues();
const avgTime = (performance.now() - start) / GPU_OPS_PER_RUN;
a.dispose();
out.dispose();
texManager.dispose();
gpgpu.deleteProgram(binary.webGLProgram);
gpgpu.dispose();

return avgTime;
}
}
Loading

0 comments on commit 7f87d33

Please sign in to comment.