From 25f8967b4bb813ca496a8ad8893265ea53b8185f Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Tue, 5 Sep 2017 22:30:53 -0400 Subject: [PATCH] Switch shader indexing from float to int (#93) * switch shader indexing from float to int * revert graph_runner_test * self review --- src/math/math.ts | 176 +++++++++++------- src/math/math_gpu.ts | 6 +- src/math/webgl/argminmax_gpu.ts | 9 +- src/math/webgl/concat3d_gpu.ts | 12 +- src/math/webgl/conv_backprop_gpu.ts | 67 ++++--- src/math/webgl/conv_gpu.ts | 33 ++-- src/math/webgl/copy_gpu.ts | 18 +- src/math/webgl/gpgpu_context.ts | 10 +- src/math/webgl/gpgpu_context_test.ts | 22 ++- src/math/webgl/gpgpu_math.ts | 2 +- src/math/webgl/gpgpu_util.ts | 23 ++- src/math/webgl/logsumexp_gpu.ts | 6 +- src/math/webgl/max_pool_backprop_gpu.ts | 36 ++-- src/math/webgl/minmax_gpu.ts | 4 +- src/math/webgl/mulmat_gpu.ts | 7 +- src/math/webgl/mulmat_gpu_test.ts | 14 ++ src/math/webgl/pool_gpu.ts | 47 ++--- src/math/webgl/reducesum_gpu.ts | 2 +- src/math/webgl/resize_bilinear_gpu.ts | 35 ++-- src/math/webgl/shader_compiler.ts | 227 ++++++++++++++---------- src/math/webgl/webgl_util.ts | 15 +- src/test_util.ts | 6 +- src/util.ts | 10 +- 23 files changed, 458 insertions(+), 329 deletions(-) diff --git a/src/math/math.ts b/src/math/math.ts index b599d180d0..2979b0f276 100644 --- a/src/math/math.ts +++ b/src/math/math.ts @@ -19,7 +19,7 @@ import * as copy2d_util from './copy2d_util'; import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray'; -export type ScopeResult = NDArray[]|NDArray|void; +export type ScopeResult = NDArray[] | NDArray | void; export interface LSTMCell { (data: Array2D, c: Array2D, h: Array2D): [Array2D, Array2D]; @@ -162,11 +162,10 @@ export abstract class NDArrayMath { return result; } - private checkForNaN(arr: NDArray): void { - const vals = arr.getValues(); + private checkForNaN(vals: Float32Array, name: string): void { for (let i = 0; i < vals.length; i++) { if (isNaN(vals[i])) { - throw Error('The result NDArray of the last math call has NaNs.'); + throw Error(`The result of the last math.${name} has NaNs.`); } } } @@ -177,9 +176,6 @@ export abstract class NDArrayMath { * @param result The NDArray to track in the current scope. */ track(result: T): T { - if (this.debugMode) { - this.checkForNaN(result); - } if (this.activeScope == null) { if (this.safeMode) { throw new Error( @@ -225,8 +221,31 @@ export abstract class NDArrayMath { `${b.shape} and orientations ${MatrixOrientation[aOrientation]}` + ` and ${MatrixOrientation[bOrientation]} must match.`); - return this.track(this.matMulInternal(a, b, aOrientation, bOrientation)); + return this.executeOp( + 'matMul', () => this.matMulInternal(a, b, aOrientation, bOrientation)); + } + + private executeOp(name: string, f: () => T): T { + let start: number; + if (this.debugMode) { + start = performance.now(); + } + const result = f(); + if (this.debugMode) { + const vals = result.getValues(); + const time = util.rightPad((performance.now() - start) + 'ms', 9); + const paddedName = util.rightPad(name, 25); + const rank = result.rank; + const size = result.size; + const shape = util.rightPad(result.shape + '', 14); + console.log( + `%c${paddedName}\t%c${time}\t%c${rank}D ${shape}\t%c${size}`, + 'font-weight:bold', 'color:red', 'color:blue', 'color: orange'); + this.checkForNaN(vals, name); + } + return this.track(result); } + protected abstract matMulInternal( a: Array2D, b: Array2D, aOrientation: MatrixOrientation, bOrientation: MatrixOrientation): Array2D; @@ -317,7 +336,7 @@ export abstract class NDArrayMath { * @param ndarray The NDArray to clone. */ clone(ndarray: T): T { - return this.track(this.cloneInternal(ndarray)); + return this.executeOp('clone', () => this.cloneInternal(ndarray)); } protected abstract cloneInternal(ndarray: T): T; @@ -347,7 +366,8 @@ export abstract class NDArrayMath { begin[1] + size[1] <= input.shape[1], `Error in slice2D: requested start position ${begin} and size ` + `${size} would overflow input of shape ${input.shape}.`); - return this.track(this.slice2DInternal(input, begin, size)); + return this.executeOp( + 'slice2D', () => this.slice2DInternal(input, begin, size)); } protected abstract slice2DInternal( input: Array2D, begin: [number, number], size: [number, number]): Array2D; @@ -366,7 +386,7 @@ export abstract class NDArrayMath { copy2D( source: Array2D, sourceBegin: [number, number], sourceSize: [number, number], dest: Array2D, destBegin: [number, number], - destSize: [number, number]) { + destSize: [number, number]): void { util.assert( sourceBegin[0] + sourceSize[0] <= source.shape[0] && sourceBegin[1] + sourceSize[1] <= source.shape[1], @@ -381,8 +401,11 @@ export abstract class NDArrayMath { `shape ${dest.shape}.`); copy2d_util.validateShapes(sourceSize, destSize); - return this.copy2DInternal( - source, sourceBegin, sourceSize, dest, destBegin, destSize); + this.executeOp('copy2D', () => { + this.copy2DInternal( + source, sourceBegin, sourceSize, dest, destBegin, destSize); + return dest; + }); } protected abstract copy2DInternal( source: Array2D, sourceBegin: [number, number], @@ -422,7 +445,8 @@ export abstract class NDArrayMath { concat3D(ndarray1: Array3D, ndarray2: Array3D, axis: number): Array3D { concat3d_util.assertConcat3DShapesMatch( ndarray1.shape, ndarray2.shape, axis, 'Error in concat3d: '); - return this.track(this.concat3DInternal(ndarray1, ndarray2, axis)); + return this.executeOp( + 'concat3D', () => this.concat3DInternal(ndarray1, ndarray2, axis)); } protected abstract concat3DInternal( ndarray1: Array3D, ndarray2: Array3D, axis: number): Array3D; @@ -436,7 +460,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray to compute the logSumExp over. */ logSumExp(ndarray: NDArray): Scalar { - return this.track(this.logSumExpInternal(ndarray)); + return this.executeOp('logSumExp', () => this.logSumExpInternal(ndarray)); } protected abstract logSumExpInternal(ndarray: NDArray): Scalar; @@ -445,7 +469,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray to compute the sum over. */ sum(ndarray: NDArray): Scalar { - return this.track(this.sumInternal(ndarray)); + return this.executeOp('sum', () => this.sumInternal(ndarray)); } protected abstract sumInternal(ndarray: NDArray): Scalar; @@ -454,7 +478,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ argMin(ndarray: NDArray): Scalar { - return this.track(this.argMinInternal(ndarray)); + return this.executeOp('argMin', () => this.argMinInternal(ndarray)); } protected abstract argMinInternal(ndarray: NDArray): Scalar; @@ -463,7 +487,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ argMax(ndarray: NDArray): Scalar { - return this.track(this.argMaxInternal(ndarray)); + return this.executeOp('argMax', () => this.argMaxInternal(ndarray)); } protected abstract argMaxInternal(ndarray: NDArray): Scalar; @@ -474,7 +498,8 @@ export abstract class NDArrayMath { */ argMaxEquals(x1: NDArray, x2: NDArray): Scalar { util.assertShapesMatch(x1.shape, x2.shape, 'Error in argMaxEquals: '); - return this.track(this.argMaxEqualsInternal(x1, x2)); + return this.executeOp( + 'argMaxEquals', () => this.argMaxEqualsInternal(x1, x2)); } protected abstract argMaxEqualsInternal(x1: NDArray, x2: NDArray): Scalar; @@ -488,8 +513,11 @@ export abstract class NDArrayMath { k <= ndarray.size, `Error in topK: k value (${k}) must be less than size of input ` + `ndarray, got shape ${ndarray.shape}.`); - const result = this.topKInternal(ndarray, k); - this.track(result.values); + let result: {values: Array1D, indices: Array1D}; + this.executeOp('topK', () => { + result = this.topKInternal(ndarray, k); + return result.values; + }); this.track(result.indices); return result; } @@ -501,7 +529,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ min(ndarray: NDArray): Scalar { - return this.track(this.minInternal(ndarray)); + return this.executeOp('min', () => this.minInternal(ndarray)); } protected abstract minInternal(ndarray: NDArray): Scalar; @@ -510,7 +538,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ max(ndarray: NDArray): Scalar { - return this.track(this.maxInternal(ndarray)); + return this.executeOp('max', () => this.maxInternal(ndarray)); } protected abstract maxInternal(ndarray: NDArray): Scalar; @@ -519,12 +547,14 @@ export abstract class NDArrayMath { * @param x The input vector. */ softmax(x: Array1D): Array1D { - return this.scope(() => { - // Do it in log space for numerical stability. - // exp(X - logSumExp(X)) - const lse = this.logSumExp(x); - const logResult = this.arrayMinusScalar(x, lse); - return this.exp(logResult); + return this.executeOp('softmax', () => { + return this.scope(() => { + // Do it in log space for numerical stability. + // exp(X - logSumExp(X)) + const lse = this.logSumExp(x); + const logResult = this.arrayMinusScalar(x, lse); + return this.exp(logResult); + }); }); } @@ -542,7 +572,7 @@ export abstract class NDArrayMath { a.rank === newDim.length, `Error in switchDim: length of input shape ${a.shape} ` + `must match size of newDim array ${newDim}.`); - return this.track(this.switchDimInternal(a, newDim)); + return this.executeOp('switchDim', () => this.switchDimInternal(a, newDim)); } protected abstract switchDimInternal( a: T, newDim: number[]): T; @@ -591,7 +621,7 @@ export abstract class NDArrayMath { * @param a The input array. */ neg(a: T): T { - return this.track(this.negInternal(a)); + return this.executeOp('neg', () => this.negInternal(a)); } protected abstract negInternal(a: T): T; @@ -604,7 +634,7 @@ export abstract class NDArrayMath { */ add(a: NDArray, b: NDArray): NDArray { util.assertAndGetBroadcastedShape(a.shape, b.shape); - return this.track(this.addInternal(a, b)); + return this.executeOp('add', () => this.addInternal(a, b)); } protected abstract addInternal(a: NDArray, b: NDArray): NDArray; @@ -629,7 +659,7 @@ export abstract class NDArrayMath { */ sub(a: NDArray, b: NDArray): NDArray { util.assertAndGetBroadcastedShape(a.shape, b.shape); - return this.track(this.subInternal(a, b)); + return this.executeOp('sub', () => this.subInternal(a, b)); } protected abstract subInternal(a: NDArray, b: NDArray): NDArray; @@ -654,7 +684,7 @@ export abstract class NDArrayMath { */ multiply(a: NDArray, b: NDArray): NDArray { util.assertAndGetBroadcastedShape(a.shape, b.shape); - return this.track(this.multiplyInternal(a, b)); + return this.executeOp('multiply', () => this.multiplyInternal(a, b)); } protected abstract multiplyInternal(a: T, b: T): T; @@ -686,7 +716,7 @@ export abstract class NDArrayMath { */ divide(a: NDArray, b: NDArray): NDArray { util.assertAndGetBroadcastedShape(a.shape, b.shape); - return this.track(this.divideInternal(a, b)); + return this.executeOp('divide', () => this.divideInternal(a, b)); } protected abstract divideInternal(a: NDArray, b: NDArray): NDArray; @@ -735,7 +765,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ exp(ndarray: T): T { - return this.track(this.expInternal(ndarray)); + return this.executeOp('exp', () => this.expInternal(ndarray)); } protected abstract expInternal(ndarray: T): T; @@ -744,7 +774,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ log(ndarray: T): T { - return this.track(this.logInternal(ndarray)); + return this.executeOp('log', () => this.logInternal(ndarray)); } protected abstract logInternal(ndarray: T): T; @@ -753,7 +783,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ sqrt(ndarray: T): T { - return this.track(this.sqrtInternal(ndarray)); + return this.executeOp('sqrt', () => this.sqrtInternal(ndarray)); } protected abstract sqrtInternal(ndarray: T): T; @@ -762,7 +792,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ relu(ndarray: T): T { - return this.track(this.reluInternal(ndarray)); + return this.executeOp('relu', () => this.reluInternal(ndarray)); } protected abstract reluInternal(ndarray: T): T; @@ -771,7 +801,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ sigmoid(ndarray: T): T { - return this.track(this.sigmoidInternal(ndarray)); + return this.executeOp('sigmoid', () => this.sigmoidInternal(ndarray)); } protected abstract sigmoidInternal(ndarray: T): T; @@ -780,7 +810,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ tanh(ndarray: T): T { - return this.track(this.tanhInternal(ndarray)); + return this.executeOp('tanh', () => this.tanhInternal(ndarray)); } protected abstract tanhInternal(ndarray: T): T; @@ -789,7 +819,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ sin(ndarray: T): T { - return this.track(this.sinInternal(ndarray)); + return this.executeOp('sin', () => this.sinInternal(ndarray)); } protected abstract sinInternal(ndarray: T): T; @@ -799,7 +829,7 @@ export abstract class NDArrayMath { * @param ndarray The input NDArray. */ step(ndarray: T): T { - return this.track(this.stepInternal(ndarray)); + return this.executeOp('step', () => this.stepInternal(ndarray)); } protected abstract stepInternal(ndarray: T): T; @@ -821,7 +851,8 @@ export abstract class NDArrayMath { `NDArray of rank ${c2.rank}.`); util.assertShapesMatch(a.shape, b.shape, 'Error in scaledArrayAdd: '); - return this.track(this.scaledArrayAddInternal(c1, a, c2, b)); + return this.executeOp( + 'scaledArrayAdd', () => this.scaledArrayAddInternal(c1, a, c2, b)); } protected abstract scaledArrayAddInternal( c1: Scalar, a: T, c2: Scalar, b: T): T; @@ -892,7 +923,9 @@ export abstract class NDArrayMath { `input depth for weights ${weights.shape[2]}.`); - return this.track(this.conv2dInternal(x, weights, biases, stride, zeroPad)); + return this.executeOp( + 'conv2d', + () => this.conv2dInternal(x, weights, biases, stride, zeroPad)); } protected abstract conv2dInternal( x: Array3D, weights: Array4D, biases: Array1D|null, stride: number, @@ -931,14 +964,15 @@ export abstract class NDArrayMath { `Error in conv2dBackProp: depth of dy (${dy.shape[2]}) must ` + `match output depth for weights (${weights.shape[3]}).`); - const backpropResult = - this.conv2dBackPropInternal(x, dy, weights, stride, pad); - this.track(backpropResult.db); - this.track(backpropResult.dw); - this.track(backpropResult.dx); - - return backpropResult; + let result: {dx: Array3D, dw: Array4D, db: Array1D}; + this.executeOp('conv2dBackProp', () => { + result = this.conv2dBackPropInternal(x, dy, weights, stride, pad); + return result.dx; + }); + this.track(result.db); + this.track(result.dw); + return result; } protected abstract conv2dBackPropInternal( x: Array3D, dy: Array3D, weights: Array4D, stride: number, @@ -977,8 +1011,9 @@ export abstract class NDArrayMath { `Error in conv2dTranspose: depth of input (${x.shape[2]}) must ` + `match input depth for weights ${weights.shape[3]}.`); - return this.track( - this.conv2dTransposeInternal(x, weights, biases, stride, pad)); + return this.executeOp( + 'conv2dTranspose', + () => this.conv2dTransposeInternal(x, weights, biases, stride, pad)); } protected abstract conv2dTransposeInternal( x: Array3D, weights: Array4D, biases: Array1D|null, stride: number, @@ -996,7 +1031,8 @@ export abstract class NDArrayMath { util.assert( x.rank === 3, 'Error in maxPool: x must be rank 3 but got rank ' + x.rank + '.'); - return this.track(this.maxPoolInternal(x, fSize, stride, pad)); + return this.executeOp( + 'maxPool', () => this.maxPoolInternal(x, fSize, stride, pad)); } protected abstract maxPoolInternal( x: Array3D, fSize: number, stride: number, pad: number): Array3D; @@ -1022,7 +1058,9 @@ export abstract class NDArrayMath { `Error in maxPoolBackprop: x must be rank 3 but got rank ` + `${x.rank}.`); - return this.track(this.maxPoolBackpropInternal(dy, x, fSize, stride, pad)); + return this.executeOp( + 'maxPoolBackprop', + () => this.maxPoolBackpropInternal(dy, x, fSize, stride, pad)); } protected abstract maxPoolBackpropInternal( dy: Array3D, x: Array3D, fSize: number, stride: number, @@ -1040,7 +1078,8 @@ export abstract class NDArrayMath { util.assert( x.rank === 3, `Error in minPool: x must be rank 3 but got rank ${x.rank}.`); - return this.track(this.minPoolInternal(x, fSize, stride, pad)); + return this.executeOp( + 'minPool', () => this.minPoolInternal(x, fSize, stride, pad)); } protected abstract minPoolInternal( x: Array3D, fSize: number, stride: number, pad: number): Array3D; @@ -1057,7 +1096,8 @@ export abstract class NDArrayMath { util.assert( x.rank === 3, `Error in avgPool: x must be rank 3 but got rank ${x.rank}.`); - return this.track(this.avgPoolInternal(x, fSize, stride, pad)); + return this.executeOp( + 'avgPool', () => this.avgPoolInternal(x, fSize, stride, pad)); } protected abstract avgPoolInternal( x: Array3D, fSize: number, stride: number, pad: number): Array3D; @@ -1081,8 +1121,9 @@ export abstract class NDArrayMath { newShape2D.length === 2, `Error in resizeBilinear3D: new shape must 2D, but got shape ` + `${newShape2D}.`); - return this.track( - this.resizeBilinear3DInternal(x, newShape2D, alignCorners)); + return this.executeOp( + 'resizeBilinear3D', + () => this.resizeBilinear3DInternal(x, newShape2D, alignCorners)); } protected abstract resizeBilinear3DInternal( x: Array3D, newShape2D: [number, number], alignCorners: boolean): Array3D; @@ -1128,8 +1169,10 @@ export abstract class NDArrayMath { `but got rank ${offset.rank}.`); } - return this.track(this.batchNormalization3DInternal( - x, mean, variance, varianceEpsilon, scale, offset)); + return this.executeOp( + 'batchNorm3D', + () => this.batchNormalization3DInternal( + x, mean, variance, varianceEpsilon, scale, offset)); } protected abstract batchNormalization3DInternal( x: Array3D, mean: Array3D|Array1D, variance: Array3D|Array1D, @@ -1219,11 +1262,10 @@ export abstract class NDArrayMath { const o = this.slice2D( res, [0, res.shape[1] / 4 * 3], [res.shape[0], res.shape[1] / 4]); - const newC = - this.add( - this.multiplyStrict( - c, this.sigmoid(this.scalarPlusArray(forgetBias, f))), - this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D; + const newC = this.add( + this.multiplyStrict( + c, this.sigmoid(this.scalarPlusArray(forgetBias, f))), + this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D; const newH = this.multiplyStrict(this.tanh(newC), this.sigmoid(o)) as Array2D; diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index dc1be5a28c..7af86a1358 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -153,10 +153,14 @@ export class NDArrayMathGPU extends NDArrayMath { protected batchNormalization3DInternal( x: Array3D, mean: Array3D|Array1D, variance: Array3D|Array1D, - varianceEpsilon = 0.000001, scale?: Array3D|Array1D, + varianceEpsilon: number|null, scale?: Array3D|Array1D, offset?: Array3D|Array1D): Array3D { const inputs = [x, mean, variance]; + if (varianceEpsilon == null) { + varianceEpsilon = 0.000001; + } + let offsetShape = null; if (offset != null) { offsetShape = offset.shape; diff --git a/src/math/webgl/argminmax_gpu.ts b/src/math/webgl/argminmax_gpu.ts index 7f50b08ab4..876a3174b5 100644 --- a/src/math/webgl/argminmax_gpu.ts +++ b/src/math/webgl/argminmax_gpu.ts @@ -20,11 +20,10 @@ export function getArgMinMaxSnippet( const compOp = (op === 'min') ? '<' : '>'; return ` float getArgMinMax${texName}() { - float bestIndex = 0.0; - float bestValue = get${texName}Flat(0.0); + int bestIndex = 0; + float bestValue = get${texName}Flat(0); - for (int ii = 0; ii < ${size}; ii++) { - float i = float(ii); + for (int i = 0; i < ${size}; i++) { float candidate = get${texName}Flat(i); if (isNaN(candidate)) { return candidate; @@ -34,7 +33,7 @@ export function getArgMinMaxSnippet( bestIndex = i; } } - return bestIndex; + return float(bestIndex); } `; } diff --git a/src/math/webgl/concat3d_gpu.ts b/src/math/webgl/concat3d_gpu.ts index e6c6840c8a..891b9ee30c 100644 --- a/src/math/webgl/concat3d_gpu.ts +++ b/src/math/webgl/concat3d_gpu.ts @@ -32,16 +32,16 @@ export class Concat3DProgram implements GPGPUProgram { concat3d_util.computeConcat3DOutputShape(x1Shape, x2Shape, axis); this.userCode = ` void main() { - vec3 coords = getOutputCoords(); - float yR = coords.x; - float yC = coords.y; - float yD = coords.z; + ivec3 coords = getOutputCoords(); + int yR = coords.x; + int yC = coords.y; + int yD = coords.z; float value = 0.0; - if (${concatAxis} < ${x1Shape[axis]}.0) { + if (${concatAxis} < ${x1Shape[axis]}) { value = getA(yR, yC, yD); } else { - ${concatAxis} -= ${x1Shape[axis]}.0; + ${concatAxis} -= ${x1Shape[axis]}; value = getB(yR, yC, yD); } diff --git a/src/math/webgl/conv_backprop_gpu.ts b/src/math/webgl/conv_backprop_gpu.ts index d01dabb619..77dc4eb45d 100644 --- a/src/math/webgl/conv_backprop_gpu.ts +++ b/src/math/webgl/conv_backprop_gpu.ts @@ -36,28 +36,26 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram { this.params = [stride, zeroPad]; this.userCode = ` void main() { - vec4 coords = getOutputCoords(); - float wR = coords.x; - float wC = coords.y; - float d1 = coords.z; - float d2 = coords.w; + ivec4 coords = getOutputCoords(); + int wR = coords.x; + int wC = coords.y; + int d1 = coords.z; + int d2 = coords.w; // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2). // ? = to be determined. : = across all values in that axis. float dotProd = 0.0; - for (int iyR = 0; iyR < ${yNumRows}; iyR++) { - float yR = float(iyR); - float xR = wR + yR * ${stride}.0 - ${zeroPad}.0; + for (int yR = 0; yR < ${yNumRows}; yR++) { + int xR = wR + yR * ${stride} - ${zeroPad}; - if (xR < 0.0 || xR >= ${xNumRows}.0) { + if (xR < 0 || xR >= ${xNumRows}) { continue; } - for (int iyC = 0; iyC < ${yNumCols}; iyC++) { - float yC = float(iyC); - float xC = wC + yC * ${stride}.0 - ${zeroPad}.0; + for (int yC = 0; yC < ${yNumCols}; yC++) { + int xC = wC + yC * ${stride} - ${zeroPad}; - if (xC < 0.0 || xC >= ${xNumCols}.0) { + if (xC < 0 || xC >= ${xNumCols}) { continue; } @@ -94,42 +92,41 @@ export class Conv2DTransposeProgram implements GPGPUProgram { this.params = [pad, fSize, origStride, hasBias]; this.userCode = ` + const ivec2 pads = ivec2(${pad}, ${pad}); + void main() { - vec3 coords = getOutputCoords(); - float yR = coords.x; - float yC = coords.y; - float d2 = coords.z; + ivec3 coords = getOutputCoords(); + int d2 = coords.z; - vec2 xRCCorner = vec2(yR, yC) - vec2(${pad}.0, ${pad}.0); - float xRCorner = xRCCorner.x; - float xCCorner = xRCCorner.y; + ivec2 xRCCorner = coords.xy - pads; + int xRCorner = xRCCorner.x; + int xCCorner = xRCCorner.y; // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2). // ? = to be determined. : = across all values in that axis. float dotProd = 0.0; - for (int iwR = 0; iwR < ${fSize}; iwR++) { - float wR = float(iwR); - float xR = (xRCorner + wR) / ${origStride}.0; + for (int wR = 0; wR < ${fSize}; wR++) { + float xR = float(xRCorner + wR) / ${origStride}.0; if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) { continue; } + int ixR = int(xR); - float wRPerm = ${fSize}.0 - 1.0 - wR; + int wRPerm = ${fSize} - 1 - wR; - for (int iwC = 0; iwC < ${fSize}; iwC++) { - float wC = float(iwC); - float xC = (xCCorner + wC) / ${origStride}.0; + for (int wC = 0; wC < ${fSize}; wC++) { + float xC = float(xCCorner + wC) / ${origStride}.0; if (xC < 0.0 || xC >= ${xCols}.0 || fract(xC) > 0.0) { continue; } + int ixC = int(xC); - float wCPerm = ${fSize}.0 - 1.0 - wC; + int wCPerm = ${fSize} - 1 - wC; - for (int id1 = 0; id1 < ${origOutputDepth}; id1++) { - float d1 = float(id1); - float xValue = getX(xR, xC, d1); + for (int d1 = 0; d1 < ${origOutputDepth}; d1++) { + float xValue = getX(ixR, ixC, d1); float wValue = getW(wRPerm, wCPerm, d2, d1); dotProd += xValue * wValue; } @@ -153,13 +150,11 @@ export class Conv2DDerBiasProgram implements GPGPUProgram { this.outputShape = [outputDepth]; this.userCode = ` void main() { - float d2 = getOutputCoords(); + int d2 = getOutputCoords(); float derBias = 0.0; - for (int iyR = 0; iyR < ${yNumRows}; iyR++) { - float yR = float(iyR); - for (int iyC = 0; iyC < ${yNumCols}; iyC++) { - float yC = float(iyC); + for (int yR = 0; yR < ${yNumRows}; yR++) { + for (int yC = 0; yC < ${yNumCols}; yC++) { derBias += getDy(yR, yC, d2); } } diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts index ed55deff33..4565206861 100644 --- a/src/math/webgl/conv_gpu.ts +++ b/src/math/webgl/conv_gpu.ts @@ -33,38 +33,35 @@ export class Conv2DProgram implements GPGPUProgram { const xNumRows = xShape[0]; const xNumCols = xShape[1]; this.userCode = ` + const ivec2 strides = ivec2(${stride}, ${stride}); + const ivec2 pads = ivec2(${pad}, ${pad}); + void main() { - vec3 coords = getOutputCoords(); - float yR = coords.x; - float yC = coords.y; - float d2 = coords.z; + ivec3 coords = getOutputCoords(); + int d2 = coords.z; - vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}.0, ${stride}.0) - - vec2(${pad}.0, ${pad}.0); - float xRCorner = xRCCorner.x; - float xCCorner = xRCCorner.y; + ivec2 xRCCorner = coords.xy * strides - pads; + int xRCorner = xRCCorner.x; + int xCCorner = xRCCorner.y; // Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2). // ? = to be determined. : = across all values in that axis. float dotProd = 0.0; - for (int iwR = 0; iwR < ${fieldSize}; iwR++) { - float wR = float(iwR); - float xR = xRCorner + wR; + for (int wR = 0; wR < ${fieldSize}; wR++) { + int xR = xRCorner + wR; - if (xR < 0.0 || xR >= ${xNumRows}.0) { + if (xR < 0 || xR >= ${xNumRows}) { continue; } - for (int iwC = 0; iwC < ${fieldSize}; iwC++) { - float wC = float(iwC); - float xC = xCCorner + wC; + for (int wC = 0; wC < ${fieldSize}; wC++) { + int xC = xCCorner + wC; - if (xC < 0.0 || xC >= ${xNumCols}.0) { + if (xC < 0 || xC >= ${xNumCols}) { continue; } - for (int id1 = 0; id1 < ${inputDepth}; id1++) { - float d1 = float(id1); + for (int d1 = 0; d1 < ${inputDepth}; d1++) { float xValue = getX(xR, xC, d1); float wValue = getW(wR, wC, d1, d2); dotProd += xValue * wValue; diff --git a/src/math/webgl/copy_gpu.ts b/src/math/webgl/copy_gpu.ts index 1ea1418c6b..51862bc9d6 100644 --- a/src/math/webgl/copy_gpu.ts +++ b/src/math/webgl/copy_gpu.ts @@ -26,16 +26,14 @@ export class Copy2DProgram implements GPGPUProgram { this.outputShape = null; this.params = [srcNumCols, destNumCols]; this.userCode = ` - uniform vec2 sourceStart; - uniform vec2 destStart; + uniform ivec2 sourceStart; + uniform ivec2 destStart; void main() { - vec2 destCoords = getOutputCoords() - destStart; - float index = dot(destCoords, vec2(${destNumCols}.0, 1.0)); - vec2 sourceCoords = sourceStart + vec2( - floor(index / ${srcNumCols}.0), - mod(index, ${srcNumCols}.0) - ); + ivec2 destCoords = getOutputCoords() - destStart; + int index = destCoords.x * ${destNumCols} + destCoords.y; + int r = index / ${srcNumCols}; + ivec2 sourceCoords = sourceStart + ivec2(r, index - r * ${srcNumCols}); setOutput(getSource(sourceCoords.x, sourceCoords.y)); } `; @@ -48,9 +46,9 @@ export class Copy2DProgram implements GPGPUProgram { gpgpu.setOutputMatrixWriteRegion( destStart[0], destSize[0], destStart[1], destSize[1]); const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart'); - gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]); + gpgpu.gl.uniform2i(sourceStartCRLoc, sourceStart[0], sourceStart[1]); const destStartCRLoc = gpgpu.getUniformLocation('destStart'); - gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]); + gpgpu.gl.uniform2i(destStartCRLoc, destStart[0], destStart[1]); }; } } diff --git a/src/math/webgl/gpgpu_context.ts b/src/math/webgl/gpgpu_context.ts index d8e0b803cc..db691dc2c4 100644 --- a/src/math/webgl/gpgpu_context.ts +++ b/src/math/webgl/gpgpu_context.ts @@ -43,14 +43,15 @@ export class GPGPUContext { if (!webgl_util.isWebGL2Enabled()) { this.textureFloatExtension = webgl_util.getExtensionOrThrow(this.gl, 'OES_texture_float'); + this.colorBufferFloatExtension = + this.gl.getExtension('WEBGL_color_buffer_float'); } else { this.colorBufferFloatExtension = webgl_util.getExtensionOrThrow(this.gl, 'EXT_color_buffer_float'); } - this.loseContextExtension = - webgl_util.getExtensionOrThrow(this.gl, 'WEBGL_lose_context') as - WebGLLoseContextExtension; + this.loseContextExtension = webgl_util.getExtensionOrThrow( + this.gl, 'WEBGL_lose_context') as WebGLLoseContextExtension; this.vertexBuffer = gpgpu_util.createVertexBuffer(this.gl); this.indexBuffer = gpgpu_util.createIndexBuffer(this.gl); this.framebuffer = webgl_util.createFramebuffer(this.gl); @@ -258,6 +259,9 @@ export class GPGPUContext { this.throwIfDisposed(); webgl_util.bindColorTextureToFramebuffer( this.gl, texture, this.framebuffer); + if (this.autoDebugValidate) { + webgl_util.validateFramebuffer(this.gl); + } const result = downloadAndDecode(); if (this.outputTexture != null) { webgl_util.bindColorTextureToFramebuffer( diff --git a/src/math/webgl/gpgpu_context_test.ts b/src/math/webgl/gpgpu_context_test.ts index efc802fba4..3e9ae712bb 100644 --- a/src/math/webgl/gpgpu_context_test.ts +++ b/src/math/webgl/gpgpu_context_test.ts @@ -41,12 +41,21 @@ describe('GPGPUContext downloadMatrixFromTexture WebGL 2.0', () => { expect(result[0]).toBeCloseTo(0.123); }); - it('returns matrix that was uploaded', () => { + it('returns 1x1 matrix that was uploaded', () => { gpgpu.uploadMatrixToTexture(texture, 1, 1, new Float32Array([1.234])); const result = gpgpu.downloadMatrixFromTexture(texture, 1, 1); expect(result[0]).toBeCloseTo(1.234); }); + it('returns 2x2 matrix that was uploaded', () => { + const texture2 = gpgpu.createMatrixTexture(2, 2); + gpgpu.uploadMatrixToTexture( + texture2, 2, 2, new Float32Array([1.234, 2, 3, 4])); + const result = gpgpu.downloadMatrixFromTexture(texture2, 2, 2); + expect(result).toEqual(new Float32Array([1.234, 2, 3, 4])); + gpgpu.deleteMatrixTexture(texture2); + }); + it('uses texture parameter', () => { const texture2: WebGLTexture = gpgpu.createMatrixTexture(1, 1); gpgpu.uploadMatrixToTexture(texture, 1, 1, new Float32Array([1])); @@ -84,12 +93,21 @@ describe('GPGPUContext downloadMatrixFromTexture WebGL 1.0', () => { expect(result[0]).toBeCloseTo(0.123); }); - it('returns matrix that was uploaded', () => { + it('returns 1x1 matrix that was uploaded', () => { gpgpu.uploadMatrixToTexture(texture, 1, 1, new Float32Array([1.234])); const result = gpgpu.downloadMatrixFromTexture(texture, 1, 1); expect(result[0]).toBeCloseTo(1.234); }); + it('returns 2x2 matrix that was uploaded', () => { + const texture2 = gpgpu.createMatrixTexture(2, 2); + gpgpu.uploadMatrixToTexture( + texture2, 2, 2, new Float32Array([1.234, 2, 3, 4])); + const result = gpgpu.downloadMatrixFromTexture(texture2, 2, 2); + expect(result).toEqual(new Float32Array([1.234, 2, 3, 4])); + gpgpu.deleteMatrixTexture(texture2); + }); + it('uses texture parameter', () => { const texture2: WebGLTexture = gpgpu.createMatrixTexture(1, 1); gpgpu.uploadMatrixToTexture(texture, 1, 1, new Float32Array([1])); diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts index 5784c18a3a..ea643dc381 100644 --- a/src/math/webgl/gpgpu_math.ts +++ b/src/math/webgl/gpgpu_math.ts @@ -115,7 +115,7 @@ export function makeShaderKey( const params = program.params; const keyStart = inputs.concat(output).map(x => x.shape + '_' + x.getTextureShapeRC()); - const keyEnd = params.map(p => p.toString()); + const keyEnd = params.map(String); let key = [program.constructor.name]; key.push((program.supportsBroadcasting === true).toString()); key = key.concat(keyStart, keyEnd); diff --git a/src/math/webgl/gpgpu_util.ts b/src/math/webgl/gpgpu_util.ts index 1b7960136d..13f40d8dbd 100644 --- a/src/math/webgl/gpgpu_util.ts +++ b/src/math/webgl/gpgpu_util.ts @@ -90,7 +90,11 @@ function getTextureInternalFormat( function getTextureFormat( gl: WebGLRenderingContext, numChannels: number): number { - if (webgl_util.isWebGL2Enabled() && numChannels === 1) { + if (webgl_util.isWebGL2Enabled()) { + if (numChannels === 4) { + // tslint:disable-next-line:no-any + return (gl as any).RGBA; + } // tslint:disable-next-line:no-any return (gl as any).RED; } @@ -206,12 +210,17 @@ export function uploadMatrixToTexture( const channelsPerTexture = numChannels === 1 ? webgl_util.getChannelsPerTexture() : numChannels; - const unpackedArray = - new Float32Array(tex_util.getUnpackedArraySizeFromMatrixSize( - matrix.length, channelsPerTexture)); - tex_util.encodeMatrixToUnpackedArray( - matrix, unpackedArray, channelsPerTexture); - + let unpackedArray: Float32Array; + if (channelsPerTexture === 1) { + // No need to allocate a temporary array. + unpackedArray = matrix; + } else { + unpackedArray = + new Float32Array(tex_util.getUnpackedArraySizeFromMatrixSize( + matrix.length, channelsPerTexture)); + tex_util.encodeMatrixToUnpackedArray( + matrix, unpackedArray, channelsPerTexture); + } uploadDataToTexture(gl, texture, w, h, unpackedArray, numChannels); } diff --git a/src/math/webgl/logsumexp_gpu.ts b/src/math/webgl/logsumexp_gpu.ts index d6e6861be2..703268275d 100644 --- a/src/math/webgl/logsumexp_gpu.ts +++ b/src/math/webgl/logsumexp_gpu.ts @@ -24,14 +24,14 @@ export class LogSumExpProgram implements GPGPUProgram { constructor(aSize: number) { this.userCode = ` void main() { - float aMax = getAFlat(0.0); + float aMax = getAFlat(0); for (int i = 0; i < ${aSize}; i++) { - aMax = max(aMax, getAFlat(float(i))); + aMax = max(aMax, getAFlat(i)); } float expSum = 0.0; for (int i = 0; i < ${aSize}; i++) { - expSum += exp(getAFlat(float(i)) - aMax); + expSum += exp(getAFlat(i) - aMax); } setOutput(aMax + log(expSum)); diff --git a/src/math/webgl/max_pool_backprop_gpu.ts b/src/math/webgl/max_pool_backprop_gpu.ts index 66a97db9ac..083e2d7399 100644 --- a/src/math/webgl/max_pool_backprop_gpu.ts +++ b/src/math/webgl/max_pool_backprop_gpu.ts @@ -37,43 +37,43 @@ export class MaxPool2DBackpropProgram implements GPGPUProgram { [dilatedDyRC[0], dilatedDyRC[1], dyShape[2]], fSize, dyShape[2], 1, pad); + const lastIndex = fSize * fSize - 1; this.userCode = ` + const ivec2 pads = ivec2(${pad}, ${pad}); + void main() { - vec3 coords = getOutputCoords(); - float dxR = coords.x; - float dxC = coords.y; - float d = coords.z; + ivec3 coords = getOutputCoords(); + int d = coords.z; - vec2 dyRCCorner = vec2(dxR, dxC) - vec2(${pad}.0, ${pad}.0); - float dyRCorner = dyRCCorner.x; - float dyCCorner = dyRCCorner.y; + ivec2 dyRCCorner = coords.xy - pads; + int dyRCorner = dyRCCorner.x; + int dyCCorner = dyRCCorner.y; - // Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(yR, dxC, d). + // Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d). // ? = to be determined. : = across all values in that axis. float dotProd = 0.0; - for (int iwR = 0; iwR < ${fSize}; iwR++) { - float wR = float(iwR); - float dyR = (dyRCorner + wR) / ${origStride}.0; + for (int wR = 0; wR < ${fSize}; wR++) { + float dyR = float(dyRCorner + wR) / ${origStride}.0; if (dyR < 0.0 || dyR >= ${dyRows}.0 || fract(dyR) > 0.0) { continue; } + int idyR = int(dyR); - for (int iwC = 0; iwC < ${fSize}; iwC++) { - float wC = float(iwC); - float dyC = (dyCCorner + wC) / ${origStride}.0; + for (int wC = 0; wC < ${fSize}; wC++) { + float dyC = float(dyCCorner + wC) / ${origStride}.0; if (dyC < 0.0 || dyC >= ${dyCols}.0 || fract(dyC) > 0.0) { continue; } + int idyC = int(dyC); - float dyValue = getDy(dyR, dyC, d); - float maxPosValue = - ${fSize * fSize - 1}.0 - getMaxPos(dyR, dyC, d); + float dyValue = getDy(idyR, idyC, d); + int maxPosValue = ${lastIndex} - int(getMaxPos(idyR, idyC, d)); // Get the current value, check it against the value from the // position matrix. - float curPosValue = wR * ${fSize}.0 + wC; + int curPosValue = wR * ${fSize} + wC; float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0); dotProd += dyValue * mask; diff --git a/src/math/webgl/minmax_gpu.ts b/src/math/webgl/minmax_gpu.ts index d10a1f1021..88bcd998d5 100644 --- a/src/math/webgl/minmax_gpu.ts +++ b/src/math/webgl/minmax_gpu.ts @@ -25,9 +25,9 @@ export class MinMaxProgram implements GPGPUProgram { this.params = [opType]; this.userCode = ` void main() { - float value = getAFlat(0.0); + float value = getAFlat(0); for (int i = 0; i < ${aSize}; i++) { - float candidate = getAFlat(float(i)); + float candidate = getAFlat(i); if (isNaN(candidate)) { setOutput(candidate); return; diff --git a/src/math/webgl/mulmat_gpu.ts b/src/math/webgl/mulmat_gpu.ts index 366991a24a..345641fca9 100644 --- a/src/math/webgl/mulmat_gpu.ts +++ b/src/math/webgl/mulmat_gpu.ts @@ -44,10 +44,9 @@ export class MatMulProgram implements GPGPUProgram { this.userCode = ` const int sharedDim = ${sharedDim}; - float dotARowBCol(float aRow, float bCol) { + float dotARowBCol(int aRow, int bCol) { float result = 0.0; - for (int ii = 0; ii < sharedDim; ii++) { - float i = float(ii); + for (int i = 0; i < sharedDim; i++) { float a = getMatrixA(${aSnippet}); float b = getMatrixB(${bSnippet}); result += (a * b); @@ -56,7 +55,7 @@ export class MatMulProgram implements GPGPUProgram { } void main() { - vec2 resRC = getOutputCoords(); + ivec2 resRC = getOutputCoords(); setOutput(dotARowBCol(resRC.x, resRC.y)); } `; diff --git a/src/math/webgl/mulmat_gpu_test.ts b/src/math/webgl/mulmat_gpu_test.ts index c39526290b..651183af31 100644 --- a/src/math/webgl/mulmat_gpu_test.ts +++ b/src/math/webgl/mulmat_gpu_test.ts @@ -331,6 +331,20 @@ describe('mulmat_gpu (transposed versions)', () => { }); }); +describe('mulmat_gpu huge matrix', () => { + it('vector times matrix', () => { + const sharedDim = 1000; + const outDim = 50000; + const a = test_util.randomArrayInRange(sharedDim, -1, 1); + const matrix = test_util.randomArrayInRange(sharedDim * outDim, -1, 1); + const result = uploadMultiplyMatrixDownload( + a, 1, sharedDim, matrix, sharedDim, outDim); + const cpuResult = + test_util.cpuMultiplyMatrix(a, 1, sharedDim, matrix, sharedDim, outDim); + test_util.expectArraysClose(result, cpuResult, 1e-4); + }); +}); + export function uploadMultiplyMatrixDownload( a: Float32Array, aNumRows: number, aNumCols: number, b: Float32Array, bNumRows: number, bNumCols: number, diff --git a/src/math/webgl/pool_gpu.ts b/src/math/webgl/pool_gpu.ts index f1f6dfacb0..fd35f1cd39 100644 --- a/src/math/webgl/pool_gpu.ts +++ b/src/math/webgl/pool_gpu.ts @@ -31,48 +31,49 @@ export class Pool2DProgram implements GPGPUProgram { let returnValue = 'minMaxValue'; if (computePositions) { - returnValue = 'minMaxPosition'; + returnValue = 'float(minMaxPosition)'; } else if (poolType === 'avg') { returnValue = `avgValue / ${fSize * fSize}.0`; } - const xRowsLimit = xShape[0] - 0.5; - const xColsLimit = xShape[1] - 0.5; + const xRowsLimit = xShape[0]; + const xColsLimit = xShape[1]; this.params = [stride, pad, fSize, poolType, computePositions]; this.outputShape = conv_util.computeOutputShape3D(xShape, fSize, xShape[2], stride, pad); + const isAvgPool = poolType === 'avg'; + const compareOp = poolType === 'min' ? '<=' : '>='; + this.userCode = ` + const ivec2 strides = ivec2(${stride}, ${stride}); + const ivec2 pads = ivec2(${pad}, ${pad}); + void main() { - vec3 coords = getOutputCoords(); - float yR = coords.x; - float yC = coords.y; - float d = coords.z; + ivec3 coords = getOutputCoords(); + int d = coords.z; - vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}.0, ${stride}.0) - - vec2(${pad}.0, ${pad}.0); - float xRCorner = xRCCorner.x; - float xCCorner = xRCCorner.y; + ivec2 xRCCorner = coords.xy * strides - pads; + int xRCorner = xRCCorner.x; + int xCCorner = xRCCorner.y; // max/min x(?, ?, d) to get y(yR, yC, d). // ? = to be determined float minMaxValue = 0.0; float minMaxValueFound = 0.0; - float minMaxPosition = 0.0; + int minMaxPosition = 0; float avgValue = 0.0; - for (int iwR = 0; iwR < ${fSize}; iwR++) { - float wR = float(iwR); - float xR = xRCorner + wR; + for (int wR = 0; wR < ${fSize}; wR++) { + int xR = xRCorner + wR; - if (xR < 0.0 || xR > ${xRowsLimit}) { + if (xR < 0 || xR >= ${xRowsLimit}) { continue; } - for (int iwC = 0; iwC < ${fSize}; iwC++) { - float wC = float(iwC); - float xC = xCCorner + wC; + for (int wC = 0; wC < ${fSize}; wC++) { + int xC = xCCorner + wC; - if (xC < 0.0 || xC > ${xColsLimit}) { + if (xC < 0 || xC >= ${xColsLimit}) { continue; } @@ -83,18 +84,18 @@ export class Pool2DProgram implements GPGPUProgram { return; } - if (${poolType === 'avg'}) { + if (${isAvgPool}) { avgValue += value; } else { // If a min / max value has already been found, use it. If not, // use the current value. float currMinMaxValue = mix( value, minMaxValue, minMaxValueFound); - if (value ${poolType === 'min' ? '<=' : '>='} currMinMaxValue) { + if (value ${compareOp} currMinMaxValue) { minMaxValue = value; minMaxValueFound = 1.0; if (${computePositions}) { - minMaxPosition = wR * ${fSize}.0 + wC; + minMaxPosition = wR * ${fSize} + wC; } } } diff --git a/src/math/webgl/reducesum_gpu.ts b/src/math/webgl/reducesum_gpu.ts index 54d19b6dbf..8cbb76c44b 100644 --- a/src/math/webgl/reducesum_gpu.ts +++ b/src/math/webgl/reducesum_gpu.ts @@ -26,7 +26,7 @@ export class ReduceSumProgram implements GPGPUProgram { void main() { float sum = 0.0; for (int i = 0; i < ${aSize}; i++) { - sum += getAFlat(float(i)); + sum += getAFlat(i); } setOutput(sum); } diff --git a/src/math/webgl/resize_bilinear_gpu.ts b/src/math/webgl/resize_bilinear_gpu.ts index 9ffb6707f7..09decbc8a2 100644 --- a/src/math/webgl/resize_bilinear_gpu.ts +++ b/src/math/webgl/resize_bilinear_gpu.ts @@ -38,32 +38,35 @@ export class ResizeBilinear3DProgram implements GPGPUProgram { this.outputShape; this.userCode = ` const vec2 effectiveInputOverOutputRatioRC = vec2( - ${effectiveInputShape[0] / effectiveOutputShape[0]}, - ${effectiveInputShape[1] / effectiveOutputShape[1]}); + ${effectiveInputShape[0] / + effectiveOutputShape[0]}, + ${effectiveInputShape[1] / + effectiveOutputShape[1]}); const vec2 inputShapeRC = vec2(${inputShape[0]}.0, ${inputShape[1]}.0); void main() { - vec3 coords = getOutputCoords(); - vec2 yRC = coords.xy; - float d = coords.z; + ivec3 coords = getOutputCoords(); + ivec2 yRC = coords.xy; + int d = coords.z; // Fractional source index. - vec2 sourceFracIndexRC = yRC * effectiveInputOverOutputRatioRC; + vec2 sourceFracIndexRC = vec2(yRC) * effectiveInputOverOutputRatioRC; // Compute the four integer indices. - vec2 sourceFloorRC = floor(sourceFracIndexRC); - vec2 sourceCeilRC = min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)); + ivec2 sourceFloorRC = ivec2(sourceFracIndexRC); + ivec2 sourceCeilRC = ivec2( + min(inputShapeRC - 1.0, ceil(sourceFracIndexRC))); - float topLeft = getA(sourceFloorRC[0], sourceFloorRC[1], d); - float bottomLeft = getA(sourceCeilRC[0], sourceFloorRC[1], d); - float topRight = getA(sourceFloorRC[0], sourceCeilRC[1], d); - float bottomRight = getA(sourceCeilRC[0], sourceCeilRC[1], d); + float topLeft = getA(sourceFloorRC.x, sourceFloorRC.y, d); + float bottomLeft = getA(sourceCeilRC.x, sourceFloorRC.y, d); + float topRight = getA(sourceFloorRC.x, sourceCeilRC.y, d); + float bottomRight = getA(sourceCeilRC.x, sourceCeilRC.y, d); - vec2 fracRC = sourceFracIndexRC - sourceFloorRC; + vec2 fracRC = sourceFracIndexRC - vec2(sourceFloorRC); - float top = topLeft + (topRight - topLeft) * fracRC[1]; - float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC[1]; - float newValue = top + (bottom - top) * fracRC[0]; + float top = topLeft + (topRight - topLeft) * fracRC.y; + float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC.y; + float newValue = top + (bottom - top) * fracRC.x; setOutput(newValue); } diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts index a2d5a9a962..728c47a361 100644 --- a/src/math/webgl/shader_compiler.ts +++ b/src/math/webgl/shader_compiler.ts @@ -109,41 +109,39 @@ function getOutputSamplingSnippet( } const SAMPLE_1D_SNIPPET = ` -vec2 UVfrom1D(float texNumR, float texNumC, float index) { - float texR = floor(index / texNumC); - float texC = mod(index, texNumC); +vec2 UVfrom1D(int texNumR, int texNumC, int index) { + int texR = index / texNumC; + int texC = index - texR * texNumC; return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); } `; const SAMPLE_2D_SNIPPET = ` -vec2 UVfrom2D(float texNumR, float texNumC, float numC, float row, - float col) { - float index = dot(vec2(row, col), vec2(numC, 1.0)); - float texR = floor(index / texNumC); - float texC = mod(index, texNumC); +vec2 UVfrom2D(int texNumR, int texNumC, int numC, int row, int col) { + int index = row * numC + col; + int texR = index / texNumC; + int texC = index - texR * texNumC; return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); } `; const SAMPLE_3D_SNIPPET = ` -vec2 UVfrom3D(float texNumR, float texNumC, float stride0, - float stride1, float row, float col, float depth) { - float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0)); - float texR = floor(index / texNumC); - float texC = mod(index, texNumC); +vec2 UVfrom3D(int texNumR, int texNumC, int stride0, + int stride1, int row, int col, int depth) { + int index = row * stride0 + col * stride1 + depth; + int texR = index / texNumC; + int texC = index - texR * texNumC; return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); } `; const SAMPLE_4D_SNIPPET = ` -vec2 UVfrom4D(float texNumR, float texNumC, float stride0, - float stride1, float stride2, float row, float col, float depth, - float depth2) { - float index = dot(vec4(row, col, depth, depth2), - vec4(stride0, stride1, stride2, 1.0)); - float texR = floor(index / texNumC); - float texC = mod(index, texNumC); +vec2 UVfrom4D(int texNumR, int texNumC, int stride0, + int stride1, int stride2, int row, int col, int depth, + int depth2) { + int index = row * stride0 + col * stride1 + depth * stride2 + depth2; + int texR = index / texNumC; + int texC = index - texR * texNumC; return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); } `; @@ -174,22 +172,22 @@ function getOutput1DCoords( shape: [number], texShape: [number, number]): string { if (texShape[0] === 1) { return ` - float getOutputCoords() { - return floor(gl_FragCoord.x); + int getOutputCoords() { + return int(gl_FragCoord.x); } `; } if (texShape[1] === 1) { return ` - float getOutputCoords() { - return floor(gl_FragCoord.y); + int getOutputCoords() { + return int(gl_FragCoord.y); } `; } return ` - float getOutputCoords() { - vec2 resTexRC = floor(gl_FragCoord.yx); - return dot(resTexRC, vec2(${texShape[1]}.0, 1.0)); + int getOutputCoords() { + ivec2 resTexRC = ivec2(gl_FragCoord.yx); + return resTexRC.x * ${texShape[1]} + resTexRC.y; } `; } @@ -199,14 +197,14 @@ function getOutput3DCoords( const stride0 = shape[1] * shape[2]; const stride1 = shape[2]; return ` - vec3 getOutputCoords() { - vec2 resTexRC = floor(gl_FragCoord.yx); - float index = dot(resTexRC, vec2(${texShape[1]}.0, 1.0)); - float r = floor(index / ${stride0}.0); - index -= r * ${stride0}.0; - float c = floor(index / ${stride1}.0); - float d = mod(index, ${stride1}.0); - return vec3(r, c, d); + ivec3 getOutputCoords() { + ivec2 resTexRC = ivec2(gl_FragCoord.yx); + int index = resTexRC.x * ${texShape[1]} + resTexRC.y; + int r = index / ${stride0}; + index -= r * ${stride0}; + int c = index / ${stride1}; + int d = index - c * ${stride1}; + return ivec3(r, c, d); } `; } @@ -218,20 +216,20 @@ function getOutput4DCoords( const stride1 = shape[2] * stride2; const stride0 = shape[1] * stride1; return ` - vec4 getOutputCoords() { - vec2 resTexRC = floor(gl_FragCoord.yx); - float index = dot(resTexRC, vec2(${texShape[1]}.0, 1.0)); + ivec4 getOutputCoords() { + ivec2 resTexRC = ivec2(gl_FragCoord.yx); + int index = resTexRC.x * ${texShape[1]} + resTexRC.y; - float r = floor(index / ${stride0}.0); - index -= r * ${stride0}.0; + int r = index / ${stride0}; + index -= r * ${stride0}; - float c = floor(index / ${stride1}.0); - index -= c * ${stride1}.0; + int c = index / ${stride1}; + index -= c * ${stride1}; - float d = floor(index / ${stride2}.0); - float d2 = mod(index, ${stride2}.0); + int d = index / ${stride2}; + int d2 = index - d * ${stride2}; - return vec4(r, c, d, d2); + return ivec4(r, c, d, d2); } `; } @@ -240,18 +238,36 @@ function getOutput2DCoords( shape: [number, number], texShape: [number, number]): string { if (util.arraysEqual(shape, texShape)) { return ` - vec2 getOutputCoords() { - return floor(gl_FragCoord.yx); + ivec2 getOutputCoords() { + return ivec2(gl_FragCoord.yx); + } + `; + } + if (shape[1] === 1) { + return ` + ivec2 getOutputCoords() { + ivec2 resTexRC = ivec2(gl_FragCoord.yx); + int index = resTexRC.x * ${texShape[1]} + resTexRC.y; + return ivec2(index, 0); + } + `; + } + if (shape[0] === 1) { + return ` + ivec2 getOutputCoords() { + ivec2 resTexRC = ivec2(gl_FragCoord.yx); + int index = resTexRC.x * ${texShape[1]} + resTexRC.y; + return ivec2(0, index); } `; } return ` - vec2 getOutputCoords() { - vec2 resTexRC = floor(gl_FragCoord.yx); - float index = dot(resTexRC, vec2(${texShape[1]}.0, 1.0)); - float r = floor(index / ${shape[1]}.0); - float c = mod(index, ${shape[1]}.0); - return vec2(r, c); + ivec2 getOutputCoords() { + ivec2 resTexRC = ivec2(gl_FragCoord.yx); + int index = resTexRC.x * ${texShape[1]} + resTexRC.y; + int r = index / ${shape[1]}; + int c = index - r * ${shape[1]}; + return ivec2(r, c); } `; } @@ -271,30 +287,30 @@ function getSampler1D(texName: string, texShape: [number, number]): string { const tC = texShape[1]; if (texShape[0] === 1 && texShape[1] === 1) { return ` - float ${funcName}(float index) { + float ${funcName}(int index) { return sample(${texName}, halfCR); } `; } if (texShape[1] === 1) { return ` - float ${funcName}(float index) { - vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0); + float ${funcName}(int index) { + vec2 uv = vec2(0.5, (float(index) + 0.5) / ${tR}.0); return sample(${texName}, uv); } `; } if (texShape[0] === 1) { return ` - float ${funcName}(float index) { - vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5); + float ${funcName}(int index) { + vec2 uv = vec2((float(index) + 0.5) / ${tC}.0, 0.5); return sample(${texName}, uv); } `; } return ` - float ${funcName}(float index) { - vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index); + float ${funcName}(int index) { + vec2 uv = UVfrom1D(${tR}, ${tC}, index); return sample(${texName}, uv); } `; @@ -310,18 +326,17 @@ function getSampler3D( const stride1 = shape[2]; if (tC === stride0) { return ` - float ${funcName}(float row, float col, float depth) { - float texR = row; - float texC = dot(vec2(col, depth), vec2(${stride1}, 1.0)); + float ${funcName}(int row, int col, int depth) { + int texR = row; + int texC = col * ${stride1} + depth; vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tC}.0, ${tR}.0); return sample(${texName}, uv); } `; } return ` - float ${funcName}(float row, float col, float depth) { - vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row, - col, depth); + float ${funcName}(int row, int col, int depth) { + vec2 uv = UVfrom3D(${tR}, ${tC}, ${stride0}, ${stride1}, row, col, depth); return sample(${texName}, uv); } `; @@ -339,19 +354,18 @@ function getSampler4D( if (tC === stride0) { return ` - float ${funcName}(float row, float col, float depth, float depth2) { - float texR = row; - float texC = dot(vec3(col, depth, depth2), - vec3(${stride1}.0, ${stride2}.0, 1.0)); + float ${funcName}(int row, int col, int depth, int depth2) { + int texR = row; + int texC = col * ${stride1} + depth * ${stride2} + depth2; vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tC}.0, ${tR}.0); return sample(${texName}, uv); } `; } return ` - float ${funcName}(float row, float col, float depth, float depth2) { - vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, - ${stride2}.0, row, col, depth, depth2); + float ${funcName}(int row, int col, int depth, int depth2) { + vec2 uv = UVfrom4D(${tR}, ${tC}, ${stride0}, ${stride1}, ${stride2}, + row, col, depth, depth2); return sample(${texName}, uv); } `; @@ -365,33 +379,49 @@ function getSampler2D( const tC = texShape[1]; if (util.arraysEqual(shape, texShape)) { return ` - float ${funcName}(float row, float col) { + float ${funcName}(int row, int col) { vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0); return sample(${texName}, uv); } `; } if (tC === 1) { + if (shape[0] === 1) { + return ` + float ${funcName}(int row, int col) { + vec2 uv = vec2(0.5, (float(col) + 0.5) / ${tR}.0); + return sample(${texName}, uv); + } + `; + } + if (shape[1] === 1) { + return ` + float ${funcName}(int row, int col) { + vec2 uv = vec2(0.5, (float(row) + 0.5) / ${tR}.0); + return sample(${texName}, uv); + } + `; + } return ` - float ${funcName}(float row, float col) { - float index = dot(vec2(row, col), vec2(${shape[1]}.0, 1.0)); - vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0); + float ${funcName}(int row, int col) { + int index = row * ${shape[1]} + col; + vec2 uv = vec2(0.5, (float(index) + 0.5) / ${tR}.0); return sample(${texName}, uv); } `; } if (tR === 1) { return ` - float ${funcName}(float row, float col) { - float index = dot(vec2(row, col), vec2(${shape[1]}.0, 1.0)); - vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5); + float ${funcName}(int row, int col) { + int index = row * ${shape[1]} + col; + vec2 uv = vec2((float(index) + 0.5) / ${tC}.0, 0.5); return sample(${texName}, uv); } `; } return ` - float ${funcName}(float row, float col) { - vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col); + float ${funcName}(int row, int col) { + vec2 uv = UVfrom2D(${tR}, ${tC}, ${shape[1]}, row, col); return sample(${texName}, uv); } `; @@ -404,31 +434,31 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string { const tNumC = texShape[1]; if (tNumC === 1 && tNumR === 1) { return ` - float ${funcName}(float index) { + float ${funcName}(int index) { return sample(${texName}, halfCR); } `; } if (tNumC === 1) { return ` - float ${funcName}(float index) { - vec2 uv = vec2(0.5, (index + 0.5) / ${tNumR}.0); + float ${funcName}(int index) { + vec2 uv = vec2(0.5, (float(index) + 0.5) / ${tNumR}.0); return sample(${texName}, uv); } `; } if (tNumR === 1) { return ` - float ${funcName}(float index) { - vec2 uv = vec2((index + 0.5) / ${tNumC}.0, 0.5); + float ${funcName}(int index) { + vec2 uv = vec2((float(index) + 0.5) / ${tNumC}.0, 0.5); return sample(${texName}, uv); } `; } return ` - float ${funcName}(float index) { - float texR = floor(index / ${tNumC}.0); - float texC = mod(index, ${tNumC}.0); + float ${funcName}(int index) { + int texR = index / ${tNumC}; + int texC = index - texR * ${tNumC}; vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tNumC}.0, ${tNumR}.0); return sample(${texName}, uv); } @@ -448,15 +478,20 @@ function getSamplerAtOutputCoords( `; } const inSize = util.sizeFromShape(inTexShape); - const broadcastSnippet = broadcast ? `index = mod(index, ${inSize}.0);` : ''; - + let broadcastSnippet = ''; + if (broadcast) { + broadcastSnippet = ` + int mainPart = index / ${inSize}; + index -= mainPart * ${inSize}; + `; + } return ` float ${funcName}() { - vec2 resTexRC = floor(gl_FragCoord.yx); - float index = dot(resTexRC, vec2(${outTexShape[1]}.0, 1.0)); + ivec2 resTexRC = ivec2(gl_FragCoord.yx); + int index = resTexRC.x * ${outTexShape[1]} + resTexRC.y; ${broadcastSnippet} - float texR = floor(index / ${inTexShape[1]}.0); - float texC = mod(index, ${inTexShape[1]}.0); + int texR = index / ${inTexShape[1]}; + int texC = index - texR * ${inTexShape[1]}; vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0); return sample(${texName}, uv); diff --git a/src/math/webgl/webgl_util.ts b/src/math/webgl/webgl_util.ts index 2170f84eb8..11704dd645 100644 --- a/src/math/webgl/webgl_util.ts +++ b/src/math/webgl/webgl_util.ts @@ -67,10 +67,9 @@ export function isWebGL2Enabled() { if (gl != null) { WEBGL2_ENABLED = true; - const loseContextExtension = - getExtensionOrThrow( - gl as WebGLRenderingContext, 'WEBGL_lose_context') as - WebGLLoseContextExtension; + const loseContextExtension = getExtensionOrThrow( + gl as WebGLRenderingContext, + 'WEBGL_lose_context') as WebGLLoseContextExtension; loseContextExtension.loseContext(); } else { WEBGL2_ENABLED = false; @@ -86,9 +85,10 @@ export function createWebGLRenderingContextFromCanvas( if (isWebGL2Enabled()) { gl = canvas.getContext('webgl2', attributes) as WebGLRenderingContext; } else { - gl = (canvas.getContext('webgl', attributes) || - canvas.getContext('experimental-webgl', attributes)) as - WebGLRenderingContext; + gl = + (canvas.getContext('webgl', attributes) || + canvas.getContext( + 'experimental-webgl', attributes)) as WebGLRenderingContext; } if (gl == null) { @@ -169,6 +169,7 @@ export function createFragmentShader( callAndCheck(gl, () => gl.shaderSource(fragmentShader, fragmentShaderSource)); callAndCheck(gl, () => gl.compileShader(fragmentShader)); if (gl.getShaderParameter(fragmentShader, gl.COMPILE_STATUS) === false) { + console.log(fragmentShaderSource); console.log(gl.getShaderInfoLog(fragmentShader)); throw new Error('Failed to compile fragment shader.'); } diff --git a/src/test_util.ts b/src/test_util.ts index d7682df6e8..f7433498b4 100644 --- a/src/test_util.ts +++ b/src/test_util.ts @@ -69,12 +69,14 @@ export function cpuMultiplyMatrix( bCol: number) { const result = new Float32Array(aRow * bCol); for (let r = 0; r < aRow; ++r) { + const aOffset = (r * aCol); + const cOffset = (r * bCol); for (let c = 0; c < bCol; ++c) { let d = 0; for (let k = 0; k < aCol; ++k) { - d += a[(r * aCol) + k] * b[(k * bCol) + c]; + d += a[aOffset + k] * b[(k * bCol) + c]; } - result[(r * bCol) + c] = d; + result[cOffset + c] = d; } } return result; diff --git a/src/util.ts b/src/util.ts index 925e12fd99..92b9a63345 100644 --- a/src/util.ts +++ b/src/util.ts @@ -103,7 +103,8 @@ export function flatten(arr: any[], ret?: number[]): number[] { return ret; } -export type ArrayData = number|number[]|number[][]|number[][][]|number[][][][]; +export type ArrayData = + number | number[] | number[][] | number[][][] | number[][][][]; export function inferShape(arr: ArrayData): number[] { const shape: number[] = []; @@ -212,3 +213,10 @@ export function assertAndGetBroadcastedShape( } return result.reverse(); } + +export function rightPad(a: string, size: number): string { + if (size <= a.length) { + return a; + } + return a + ' '.repeat(size - a.length); +}