From 25f8967b4bb813ca496a8ad8893265ea53b8185f Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Tue, 5 Sep 2017 22:30:53 -0400
Subject: [PATCH] Switch shader indexing from float to int (#93)

* switch shader indexing from float to int

* revert graph_runner_test

* self review
---
 src/math/math.ts                        | 176 +++++++++++-------
 src/math/math_gpu.ts                    |   6 +-
 src/math/webgl/argminmax_gpu.ts         |   9 +-
 src/math/webgl/concat3d_gpu.ts          |  12 +-
 src/math/webgl/conv_backprop_gpu.ts     |  67 ++++---
 src/math/webgl/conv_gpu.ts              |  33 ++--
 src/math/webgl/copy_gpu.ts              |  18 +-
 src/math/webgl/gpgpu_context.ts         |  10 +-
 src/math/webgl/gpgpu_context_test.ts    |  22 ++-
 src/math/webgl/gpgpu_math.ts            |   2 +-
 src/math/webgl/gpgpu_util.ts            |  23 ++-
 src/math/webgl/logsumexp_gpu.ts         |   6 +-
 src/math/webgl/max_pool_backprop_gpu.ts |  36 ++--
 src/math/webgl/minmax_gpu.ts            |   4 +-
 src/math/webgl/mulmat_gpu.ts            |   7 +-
 src/math/webgl/mulmat_gpu_test.ts       |  14 ++
 src/math/webgl/pool_gpu.ts              |  47 ++---
 src/math/webgl/reducesum_gpu.ts         |   2 +-
 src/math/webgl/resize_bilinear_gpu.ts   |  35 ++--
 src/math/webgl/shader_compiler.ts       | 227 ++++++++++++++----------
 src/math/webgl/webgl_util.ts            |  15 +-
 src/test_util.ts                        |   6 +-
 src/util.ts                             |  10 +-
 23 files changed, 458 insertions(+), 329 deletions(-)
diff --git a/src/math/math.ts b/src/math/math.ts
index b599d180d0..2979b0f276 100644
--- a/src/math/math.ts
+++ b/src/math/math.ts
@@ -19,7 +19,7 @@ import * as copy2d_util from './copy2d_util';
 
 import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray';
 
-export type ScopeResult = NDArray[]|NDArray|void;
+export type ScopeResult = NDArray[] | NDArray | void;
 
 export interface LSTMCell {
   (data: Array2D, c: Array2D, h: Array2D): [Array2D, Array2D];
@@ -162,11 +162,10 @@ export abstract class NDArrayMath {
     return result;
   }
 
-  private checkForNaN(arr: NDArray): void {
-    const vals = arr.getValues();
+  private checkForNaN(vals: Float32Array, name: string): void {
     for (let i = 0; i < vals.length; i++) {
       if (isNaN(vals[i])) {
-        throw Error('The result NDArray of the last math call has NaNs.');
+        throw Error(`The result of the last math.${name} has NaNs.`);
       }
     }
   }
@@ -177,9 +176,6 @@ export abstract class NDArrayMath {
    * @param result The NDArray to track in the current scope.
    */
   track<T extends NDArray>(result: T): T {
-    if (this.debugMode) {
-      this.checkForNaN(result);
-    }
     if (this.activeScope == null) {
       if (this.safeMode) {
         throw new Error(
@@ -225,8 +221,31 @@ export abstract class NDArrayMath {
             `${b.shape} and orientations ${MatrixOrientation[aOrientation]}` +
             ` and ${MatrixOrientation[bOrientation]} must match.`);
 
-    return this.track(this.matMulInternal(a, b, aOrientation, bOrientation));
+    return this.executeOp(
+        'matMul', () => this.matMulInternal(a, b, aOrientation, bOrientation));
+  }
+
+  private executeOp<T extends NDArray>(name: string, f: () => T): T {
+    let start: number;
+    if (this.debugMode) {
+      start = performance.now();
+    }
+    const result = f();
+    if (this.debugMode) {
+      const vals = result.getValues();
+      const time = util.rightPad((performance.now() - start) + 'ms', 9);
+      const paddedName = util.rightPad(name, 25);
+      const rank = result.rank;
+      const size = result.size;
+      const shape = util.rightPad(result.shape + '', 14);
+      console.log(
+          `%c${paddedName}\t%c${time}\t%c${rank}D ${shape}\t%c${size}`,
+          'font-weight:bold', 'color:red', 'color:blue', 'color: orange');
+      this.checkForNaN(vals, name);
+    }
+    return this.track(result);
   }
+
   protected abstract matMulInternal(
       a: Array2D, b: Array2D, aOrientation: MatrixOrientation,
       bOrientation: MatrixOrientation): Array2D;
@@ -317,7 +336,7 @@ export abstract class NDArrayMath {
    * @param ndarray The NDArray to clone.
    */
   clone<T extends NDArray>(ndarray: T): T {
-    return this.track(this.cloneInternal(ndarray));
+    return this.executeOp('clone', () => this.cloneInternal(ndarray));
   }
   protected abstract cloneInternal<T extends NDArray>(ndarray: T): T;
 
@@ -347,7 +366,8 @@ export abstract class NDArrayMath {
             begin[1] + size[1] <= input.shape[1],
         `Error in slice2D: requested start position ${begin} and size ` +
             `${size} would overflow input of shape ${input.shape}.`);
-    return this.track(this.slice2DInternal(input, begin, size));
+    return this.executeOp(
+        'slice2D', () => this.slice2DInternal(input, begin, size));
   }
   protected abstract slice2DInternal(
       input: Array2D, begin: [number, number], size: [number, number]): Array2D;
@@ -366,7 +386,7 @@ export abstract class NDArrayMath {
   copy2D(
       source: Array2D, sourceBegin: [number, number],
       sourceSize: [number, number], dest: Array2D, destBegin: [number, number],
-      destSize: [number, number]) {
+      destSize: [number, number]): void {
     util.assert(
         sourceBegin[0] + sourceSize[0] <= source.shape[0] &&
             sourceBegin[1] + sourceSize[1] <= source.shape[1],
@@ -381,8 +401,11 @@ export abstract class NDArrayMath {
             `shape ${dest.shape}.`);
     copy2d_util.validateShapes(sourceSize, destSize);
 
-    return this.copy2DInternal(
-        source, sourceBegin, sourceSize, dest, destBegin, destSize);
+    this.executeOp('copy2D', () => {
+      this.copy2DInternal(
+          source, sourceBegin, sourceSize, dest, destBegin, destSize);
+      return dest;
+    });
   }
   protected abstract copy2DInternal(
       source: Array2D, sourceBegin: [number, number],
@@ -422,7 +445,8 @@ export abstract class NDArrayMath {
   concat3D(ndarray1: Array3D, ndarray2: Array3D, axis: number): Array3D {
     concat3d_util.assertConcat3DShapesMatch(
         ndarray1.shape, ndarray2.shape, axis, 'Error in concat3d: ');
-    return this.track(this.concat3DInternal(ndarray1, ndarray2, axis));
+    return this.executeOp(
+        'concat3D', () => this.concat3DInternal(ndarray1, ndarray2, axis));
   }
   protected abstract concat3DInternal(
       ndarray1: Array3D, ndarray2: Array3D, axis: number): Array3D;
@@ -436,7 +460,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray to compute the logSumExp over.
    */
   logSumExp(ndarray: NDArray): Scalar {
-    return this.track(this.logSumExpInternal(ndarray));
+    return this.executeOp('logSumExp', () => this.logSumExpInternal(ndarray));
   }
   protected abstract logSumExpInternal(ndarray: NDArray): Scalar;
 
@@ -445,7 +469,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray to compute the sum over.
    */
   sum(ndarray: NDArray): Scalar {
-    return this.track(this.sumInternal(ndarray));
+    return this.executeOp('sum', () => this.sumInternal(ndarray));
   }
   protected abstract sumInternal(ndarray: NDArray): Scalar;
 
@@ -454,7 +478,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   argMin(ndarray: NDArray): Scalar {
-    return this.track(this.argMinInternal(ndarray));
+    return this.executeOp('argMin', () => this.argMinInternal(ndarray));
   }
   protected abstract argMinInternal(ndarray: NDArray): Scalar;
 
@@ -463,7 +487,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   argMax(ndarray: NDArray): Scalar {
-    return this.track(this.argMaxInternal(ndarray));
+    return this.executeOp('argMax', () => this.argMaxInternal(ndarray));
   }
   protected abstract argMaxInternal(ndarray: NDArray): Scalar;
 
@@ -474,7 +498,8 @@ export abstract class NDArrayMath {
    */
   argMaxEquals(x1: NDArray, x2: NDArray): Scalar {
     util.assertShapesMatch(x1.shape, x2.shape, 'Error in argMaxEquals: ');
-    return this.track(this.argMaxEqualsInternal(x1, x2));
+    return this.executeOp(
+        'argMaxEquals', () => this.argMaxEqualsInternal(x1, x2));
   }
   protected abstract argMaxEqualsInternal(x1: NDArray, x2: NDArray): Scalar;
 
@@ -488,8 +513,11 @@ export abstract class NDArrayMath {
         k <= ndarray.size,
         `Error in topK: k value (${k}) must be less than size of input ` +
             `ndarray, got shape ${ndarray.shape}.`);
-    const result = this.topKInternal(ndarray, k);
-    this.track(result.values);
+    let result: {values: Array1D, indices: Array1D};
+    this.executeOp('topK', () => {
+      result = this.topKInternal(ndarray, k);
+      return result.values;
+    });
     this.track(result.indices);
     return result;
   }
@@ -501,7 +529,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   min(ndarray: NDArray): Scalar {
-    return this.track(this.minInternal(ndarray));
+    return this.executeOp('min', () => this.minInternal(ndarray));
   }
   protected abstract minInternal(ndarray: NDArray): Scalar;
 
@@ -510,7 +538,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   max(ndarray: NDArray): Scalar {
-    return this.track(this.maxInternal(ndarray));
+    return this.executeOp('max', () => this.maxInternal(ndarray));
   }
   protected abstract maxInternal(ndarray: NDArray): Scalar;
 
@@ -519,12 +547,14 @@ export abstract class NDArrayMath {
    * @param x The input vector.
    */
   softmax(x: Array1D): Array1D {
-    return this.scope(() => {
-      // Do it in log space for numerical stability.
-      // exp(X - logSumExp(X))
-      const lse = this.logSumExp(x);
-      const logResult = this.arrayMinusScalar(x, lse);
-      return this.exp(logResult);
+    return this.executeOp('softmax', () => {
+      return this.scope(() => {
+        // Do it in log space for numerical stability.
+        // exp(X - logSumExp(X))
+        const lse = this.logSumExp(x);
+        const logResult = this.arrayMinusScalar(x, lse);
+        return this.exp(logResult);
+      });
     });
   }
 
@@ -542,7 +572,7 @@ export abstract class NDArrayMath {
         a.rank === newDim.length,
         `Error in switchDim: length of input shape ${a.shape} ` +
             `must match size of newDim array ${newDim}.`);
-    return this.track(this.switchDimInternal(a, newDim));
+    return this.executeOp('switchDim', () => this.switchDimInternal(a, newDim));
   }
   protected abstract switchDimInternal<T extends NDArray>(
       a: T, newDim: number[]): T;
@@ -591,7 +621,7 @@ export abstract class NDArrayMath {
    * @param a The input array.
    */
   neg<T extends NDArray>(a: T): T {
-    return this.track(this.negInternal(a));
+    return this.executeOp('neg', () => this.negInternal(a));
   }
   protected abstract negInternal<T extends NDArray>(a: T): T;
 
@@ -604,7 +634,7 @@ export abstract class NDArrayMath {
    */
   add(a: NDArray, b: NDArray): NDArray {
     util.assertAndGetBroadcastedShape(a.shape, b.shape);
-    return this.track(this.addInternal(a, b));
+    return this.executeOp('add', () => this.addInternal(a, b));
   }
   protected abstract addInternal(a: NDArray, b: NDArray): NDArray;
 
@@ -629,7 +659,7 @@ export abstract class NDArrayMath {
    */
   sub(a: NDArray, b: NDArray): NDArray {
     util.assertAndGetBroadcastedShape(a.shape, b.shape);
-    return this.track(this.subInternal(a, b));
+    return this.executeOp('sub', () => this.subInternal(a, b));
   }
   protected abstract subInternal(a: NDArray, b: NDArray): NDArray;
 
@@ -654,7 +684,7 @@ export abstract class NDArrayMath {
    */
   multiply(a: NDArray, b: NDArray): NDArray {
     util.assertAndGetBroadcastedShape(a.shape, b.shape);
-    return this.track(this.multiplyInternal(a, b));
+    return this.executeOp('multiply', () => this.multiplyInternal(a, b));
   }
   protected abstract multiplyInternal<T extends NDArray>(a: T, b: T): T;
 
@@ -686,7 +716,7 @@ export abstract class NDArrayMath {
    */
   divide(a: NDArray, b: NDArray): NDArray {
     util.assertAndGetBroadcastedShape(a.shape, b.shape);
-    return this.track(this.divideInternal(a, b));
+    return this.executeOp('divide', () => this.divideInternal(a, b));
   }
   protected abstract divideInternal(a: NDArray, b: NDArray): NDArray;
 
@@ -735,7 +765,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   exp<T extends NDArray>(ndarray: T): T {
-    return this.track(this.expInternal(ndarray));
+    return this.executeOp('exp', () => this.expInternal(ndarray));
   }
   protected abstract expInternal<T extends NDArray>(ndarray: T): T;
 
@@ -744,7 +774,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   log<T extends NDArray>(ndarray: T): T {
-    return this.track(this.logInternal(ndarray));
+    return this.executeOp('log', () => this.logInternal(ndarray));
   }
   protected abstract logInternal<T extends NDArray>(ndarray: T): T;
 
@@ -753,7 +783,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   sqrt<T extends NDArray>(ndarray: T): T {
-    return this.track(this.sqrtInternal(ndarray));
+    return this.executeOp('sqrt', () => this.sqrtInternal(ndarray));
   }
   protected abstract sqrtInternal<T extends NDArray>(ndarray: T): T;
 
@@ -762,7 +792,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   relu<T extends NDArray>(ndarray: T): T {
-    return this.track(this.reluInternal(ndarray));
+    return this.executeOp('relu', () => this.reluInternal(ndarray));
   }
   protected abstract reluInternal<T extends NDArray>(ndarray: T): T;
 
@@ -771,7 +801,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   sigmoid<T extends NDArray>(ndarray: T): T {
-    return this.track(this.sigmoidInternal(ndarray));
+    return this.executeOp('sigmoid', () => this.sigmoidInternal(ndarray));
   }
   protected abstract sigmoidInternal<T extends NDArray>(ndarray: T): T;
 
@@ -780,7 +810,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   tanh<T extends NDArray>(ndarray: T): T {
-    return this.track(this.tanhInternal(ndarray));
+    return this.executeOp('tanh', () => this.tanhInternal(ndarray));
   }
   protected abstract tanhInternal<T extends NDArray>(ndarray: T): T;
 
@@ -789,7 +819,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   sin<T extends NDArray>(ndarray: T): T {
-    return this.track(this.sinInternal(ndarray));
+    return this.executeOp('sin', () => this.sinInternal(ndarray));
   }
   protected abstract sinInternal<T extends NDArray>(ndarray: T): T;
 
@@ -799,7 +829,7 @@ export abstract class NDArrayMath {
    * @param ndarray The input NDArray.
    */
   step<T extends NDArray>(ndarray: T): T {
-    return this.track(this.stepInternal(ndarray));
+    return this.executeOp('step', () => this.stepInternal(ndarray));
   }
   protected abstract stepInternal<T extends NDArray>(ndarray: T): T;
 
@@ -821,7 +851,8 @@ export abstract class NDArrayMath {
             `NDArray of rank ${c2.rank}.`);
     util.assertShapesMatch(a.shape, b.shape, 'Error in scaledArrayAdd: ');
 
-    return this.track(this.scaledArrayAddInternal(c1, a, c2, b));
+    return this.executeOp(
+        'scaledArrayAdd', () => this.scaledArrayAddInternal(c1, a, c2, b));
   }
   protected abstract scaledArrayAddInternal<T extends NDArray>(
       c1: Scalar, a: T, c2: Scalar, b: T): T;
@@ -892,7 +923,9 @@ export abstract class NDArrayMath {
             `input depth for weights ${weights.shape[2]}.`);
 
 
-    return this.track(this.conv2dInternal(x, weights, biases, stride, zeroPad));
+    return this.executeOp(
+        'conv2d',
+        () => this.conv2dInternal(x, weights, biases, stride, zeroPad));
   }
   protected abstract conv2dInternal(
       x: Array3D, weights: Array4D, biases: Array1D|null, stride: number,
@@ -931,14 +964,15 @@ export abstract class NDArrayMath {
         `Error in conv2dBackProp: depth of dy (${dy.shape[2]}) must ` +
             `match output depth for weights (${weights.shape[3]}).`);
 
-    const backpropResult =
-        this.conv2dBackPropInternal(x, dy, weights, stride, pad);
 
-    this.track(backpropResult.db);
-    this.track(backpropResult.dw);
-    this.track(backpropResult.dx);
-
-    return backpropResult;
+    let result: {dx: Array3D, dw: Array4D, db: Array1D};
+    this.executeOp('conv2dBackProp', () => {
+      result = this.conv2dBackPropInternal(x, dy, weights, stride, pad);
+      return result.dx;
+    });
+    this.track(result.db);
+    this.track(result.dw);
+    return result;
   }
   protected abstract conv2dBackPropInternal(
       x: Array3D, dy: Array3D, weights: Array4D, stride: number,
@@ -977,8 +1011,9 @@ export abstract class NDArrayMath {
         `Error in conv2dTranspose: depth of input (${x.shape[2]}) must ` +
             `match input depth for weights ${weights.shape[3]}.`);
 
-    return this.track(
-        this.conv2dTransposeInternal(x, weights, biases, stride, pad));
+    return this.executeOp(
+        'conv2dTranspose',
+        () => this.conv2dTransposeInternal(x, weights, biases, stride, pad));
   }
   protected abstract conv2dTransposeInternal(
       x: Array3D, weights: Array4D, biases: Array1D|null, stride: number,
@@ -996,7 +1031,8 @@ export abstract class NDArrayMath {
     util.assert(
         x.rank === 3,
         'Error in maxPool: x must be rank 3 but got rank ' + x.rank + '.');
-    return this.track(this.maxPoolInternal(x, fSize, stride, pad));
+    return this.executeOp(
+        'maxPool', () => this.maxPoolInternal(x, fSize, stride, pad));
   }
   protected abstract maxPoolInternal(
       x: Array3D, fSize: number, stride: number, pad: number): Array3D;
@@ -1022,7 +1058,9 @@ export abstract class NDArrayMath {
         `Error in maxPoolBackprop: x must be rank 3 but got rank ` +
             `${x.rank}.`);
 
-    return this.track(this.maxPoolBackpropInternal(dy, x, fSize, stride, pad));
+    return this.executeOp(
+        'maxPoolBackprop',
+        () => this.maxPoolBackpropInternal(dy, x, fSize, stride, pad));
   }
   protected abstract maxPoolBackpropInternal(
       dy: Array3D, x: Array3D, fSize: number, stride: number,
@@ -1040,7 +1078,8 @@ export abstract class NDArrayMath {
     util.assert(
         x.rank === 3,
         `Error in minPool: x must be rank 3 but got rank ${x.rank}.`);
-    return this.track(this.minPoolInternal(x, fSize, stride, pad));
+    return this.executeOp(
+        'minPool', () => this.minPoolInternal(x, fSize, stride, pad));
   }
   protected abstract minPoolInternal(
       x: Array3D, fSize: number, stride: number, pad: number): Array3D;
@@ -1057,7 +1096,8 @@ export abstract class NDArrayMath {
     util.assert(
         x.rank === 3,
         `Error in avgPool: x must be rank 3 but got rank ${x.rank}.`);
-    return this.track(this.avgPoolInternal(x, fSize, stride, pad));
+    return this.executeOp(
+        'avgPool', () => this.avgPoolInternal(x, fSize, stride, pad));
   }
   protected abstract avgPoolInternal(
       x: Array3D, fSize: number, stride: number, pad: number): Array3D;
@@ -1081,8 +1121,9 @@ export abstract class NDArrayMath {
         newShape2D.length === 2,
         `Error in resizeBilinear3D: new shape must 2D, but got shape ` +
             `${newShape2D}.`);
-    return this.track(
-        this.resizeBilinear3DInternal(x, newShape2D, alignCorners));
+    return this.executeOp(
+        'resizeBilinear3D',
+        () => this.resizeBilinear3DInternal(x, newShape2D, alignCorners));
   }
   protected abstract resizeBilinear3DInternal(
       x: Array3D, newShape2D: [number, number], alignCorners: boolean): Array3D;
@@ -1128,8 +1169,10 @@ export abstract class NDArrayMath {
               `but got rank ${offset.rank}.`);
     }
 
-    return this.track(this.batchNormalization3DInternal(
-        x, mean, variance, varianceEpsilon, scale, offset));
+    return this.executeOp(
+        'batchNorm3D',
+        () => this.batchNormalization3DInternal(
+            x, mean, variance, varianceEpsilon, scale, offset));
   }
   protected abstract batchNormalization3DInternal(
       x: Array3D, mean: Array3D|Array1D, variance: Array3D|Array1D,
@@ -1219,11 +1262,10 @@ export abstract class NDArrayMath {
       const o = this.slice2D(
           res, [0, res.shape[1] / 4 * 3], [res.shape[0], res.shape[1] / 4]);
 
-      const newC =
-          this.add(
-              this.multiplyStrict(
-                  c, this.sigmoid(this.scalarPlusArray(forgetBias, f))),
-              this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D;
+      const newC = this.add(
+          this.multiplyStrict(
+              c, this.sigmoid(this.scalarPlusArray(forgetBias, f))),
+          this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D;
       const newH =
           this.multiplyStrict(this.tanh(newC), this.sigmoid(o)) as Array2D;
 
diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index dc1be5a28c..7af86a1358 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -153,10 +153,14 @@ export class NDArrayMathGPU extends NDArrayMath {
 
   protected batchNormalization3DInternal(
       x: Array3D, mean: Array3D|Array1D, variance: Array3D|Array1D,
-      varianceEpsilon = 0.000001, scale?: Array3D|Array1D,
+      varianceEpsilon: number|null, scale?: Array3D|Array1D,
       offset?: Array3D|Array1D): Array3D {
     const inputs = [x, mean, variance];
 
+    if (varianceEpsilon == null) {
+      varianceEpsilon = 0.000001;
+    }
+
     let offsetShape = null;
     if (offset != null) {
       offsetShape = offset.shape;
diff --git a/src/math/webgl/argminmax_gpu.ts b/src/math/webgl/argminmax_gpu.ts
index 7f50b08ab4..876a3174b5 100644
--- a/src/math/webgl/argminmax_gpu.ts
+++ b/src/math/webgl/argminmax_gpu.ts
@@ -20,11 +20,10 @@ export function getArgMinMaxSnippet(
   const compOp = (op === 'min') ? '<' : '>';
   return `
     float getArgMinMax${texName}() {
-      float bestIndex = 0.0;
-      float bestValue = get${texName}Flat(0.0);
+      int bestIndex = 0;
+      float bestValue = get${texName}Flat(0);
 
-      for (int ii = 0; ii < ${size}; ii++) {
-        float i = float(ii);
+      for (int i = 0; i < ${size}; i++) {
         float candidate = get${texName}Flat(i);
         if (isNaN(candidate)) {
           return candidate;
@@ -34,7 +33,7 @@ export function getArgMinMaxSnippet(
           bestIndex = i;
         }
       }
-      return bestIndex;
+      return float(bestIndex);
     }
   `;
 }
diff --git a/src/math/webgl/concat3d_gpu.ts b/src/math/webgl/concat3d_gpu.ts
index e6c6840c8a..891b9ee30c 100644
--- a/src/math/webgl/concat3d_gpu.ts
+++ b/src/math/webgl/concat3d_gpu.ts
@@ -32,16 +32,16 @@ export class Concat3DProgram implements GPGPUProgram {
         concat3d_util.computeConcat3DOutputShape(x1Shape, x2Shape, axis);
     this.userCode = `
       void main() {
-        vec3 coords = getOutputCoords();
-        float yR = coords.x;
-        float yC = coords.y;
-        float yD = coords.z;
+        ivec3 coords = getOutputCoords();
+        int yR = coords.x;
+        int yC = coords.y;
+        int yD = coords.z;
 
         float value = 0.0;
-        if (${concatAxis} < ${x1Shape[axis]}.0) {
+        if (${concatAxis} < ${x1Shape[axis]}) {
           value = getA(yR, yC, yD);
         } else {
-          ${concatAxis} -= ${x1Shape[axis]}.0;
+          ${concatAxis} -= ${x1Shape[axis]};
           value = getB(yR, yC, yD);
         }
 
diff --git a/src/math/webgl/conv_backprop_gpu.ts b/src/math/webgl/conv_backprop_gpu.ts
index d01dabb619..77dc4eb45d 100644
--- a/src/math/webgl/conv_backprop_gpu.ts
+++ b/src/math/webgl/conv_backprop_gpu.ts
@@ -36,28 +36,26 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram {
     this.params = [stride, zeroPad];
     this.userCode = `
       void main() {
-        vec4 coords = getOutputCoords();
-        float wR = coords.x;
-        float wC = coords.y;
-        float d1 = coords.z;
-        float d2 = coords.w;
+        ivec4 coords = getOutputCoords();
+        int wR = coords.x;
+        int wC = coords.y;
+        int d1 = coords.z;
+        int d2 = coords.w;
 
         // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
         // ? = to be determined. : = across all values in that axis.
         float dotProd = 0.0;
-        for (int iyR = 0; iyR < ${yNumRows}; iyR++) {
-          float yR = float(iyR);
-          float xR = wR + yR * ${stride}.0 - ${zeroPad}.0;
+        for (int yR = 0; yR < ${yNumRows}; yR++) {
+          int xR = wR + yR * ${stride} - ${zeroPad};
 
-          if (xR < 0.0 || xR >= ${xNumRows}.0) {
+          if (xR < 0 || xR >= ${xNumRows}) {
             continue;
           }
 
-          for (int iyC = 0; iyC < ${yNumCols}; iyC++) {
-            float yC = float(iyC);
-            float xC = wC + yC * ${stride}.0 - ${zeroPad}.0;
+          for (int yC = 0; yC < ${yNumCols}; yC++) {
+            int xC = wC + yC * ${stride} - ${zeroPad};
 
-            if (xC < 0.0 || xC >= ${xNumCols}.0) {
+            if (xC < 0 || xC >= ${xNumCols}) {
               continue;
             }
 
@@ -94,42 +92,41 @@ export class Conv2DTransposeProgram implements GPGPUProgram {
     this.params = [pad, fSize, origStride, hasBias];
 
     this.userCode = `
+      const ivec2 pads = ivec2(${pad}, ${pad});
+
       void main() {
-        vec3 coords = getOutputCoords();
-        float yR = coords.x;
-        float yC = coords.y;
-        float d2 = coords.z;
+        ivec3 coords = getOutputCoords();
+        int d2 = coords.z;
 
-        vec2 xRCCorner = vec2(yR, yC) - vec2(${pad}.0, ${pad}.0);
-        float xRCorner = xRCCorner.x;
-        float xCCorner = xRCCorner.y;
+        ivec2 xRCCorner = coords.xy - pads;
+        int xRCorner = xRCCorner.x;
+        int xCCorner = xRCCorner.y;
 
         // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2).
         // ? = to be determined. : = across all values in that axis.
         float dotProd = 0.0;
-        for (int iwR = 0; iwR < ${fSize}; iwR++) {
-          float wR = float(iwR);
-          float xR = (xRCorner + wR) / ${origStride}.0;
+        for (int wR = 0; wR < ${fSize}; wR++) {
+          float xR = float(xRCorner + wR) / ${origStride}.0;
 
           if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) {
             continue;
           }
+          int ixR = int(xR);
 
-          float wRPerm = ${fSize}.0 - 1.0 - wR;
+          int wRPerm = ${fSize} - 1 - wR;
 
-          for (int iwC = 0; iwC < ${fSize}; iwC++) {
-            float wC = float(iwC);
-            float xC = (xCCorner + wC) / ${origStride}.0;
+          for (int wC = 0; wC < ${fSize}; wC++) {
+            float xC = float(xCCorner + wC) / ${origStride}.0;
 
             if (xC < 0.0 || xC >= ${xCols}.0 || fract(xC) > 0.0) {
               continue;
             }
+            int ixC = int(xC);
 
-            float wCPerm = ${fSize}.0 - 1.0 - wC;
+            int wCPerm = ${fSize} - 1 - wC;
 
-            for (int id1 = 0; id1 < ${origOutputDepth}; id1++) {
-              float d1 = float(id1);
-              float xValue = getX(xR, xC, d1);
+            for (int d1 = 0; d1 < ${origOutputDepth}; d1++) {
+              float xValue = getX(ixR, ixC, d1);
               float wValue = getW(wRPerm, wCPerm, d2, d1);
               dotProd += xValue * wValue;
             }
@@ -153,13 +150,11 @@ export class Conv2DDerBiasProgram implements GPGPUProgram {
     this.outputShape = [outputDepth];
     this.userCode = `
       void main() {
-        float d2 = getOutputCoords();
+        int d2 = getOutputCoords();
 
         float derBias = 0.0;
-        for (int iyR = 0; iyR < ${yNumRows}; iyR++) {
-          float yR = float(iyR);
-          for (int iyC = 0; iyC < ${yNumCols}; iyC++) {
-            float yC = float(iyC);
+        for (int yR = 0; yR < ${yNumRows}; yR++) {
+          for (int yC = 0; yC < ${yNumCols}; yC++) {
             derBias += getDy(yR, yC, d2);
           }
         }
diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts
index ed55deff33..4565206861 100644
--- a/src/math/webgl/conv_gpu.ts
+++ b/src/math/webgl/conv_gpu.ts
@@ -33,38 +33,35 @@ export class Conv2DProgram implements GPGPUProgram {
     const xNumRows = xShape[0];
     const xNumCols = xShape[1];
     this.userCode = `
+      const ivec2 strides = ivec2(${stride}, ${stride});
+      const ivec2 pads = ivec2(${pad}, ${pad});
+
       void main() {
-        vec3 coords = getOutputCoords();
-        float yR = coords.x;
-        float yC = coords.y;
-        float d2 = coords.z;
+        ivec3 coords = getOutputCoords();
+        int d2 = coords.z;
 
-        vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}.0, ${stride}.0) -
-            vec2(${pad}.0, ${pad}.0);
-        float xRCorner = xRCCorner.x;
-        float xCCorner = xRCCorner.y;
+        ivec2 xRCCorner = coords.xy * strides - pads;
+        int xRCorner = xRCCorner.x;
+        int xCCorner = xRCCorner.y;
 
         // Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2).
         // ? = to be determined. : = across all values in that axis.
         float dotProd = 0.0;
-        for (int iwR = 0; iwR < ${fieldSize}; iwR++) {
-          float wR = float(iwR);
-          float xR = xRCorner + wR;
+        for (int wR = 0; wR < ${fieldSize}; wR++) {
+          int xR = xRCorner + wR;
 
-          if (xR < 0.0 || xR >= ${xNumRows}.0) {
+          if (xR < 0 || xR >= ${xNumRows}) {
             continue;
           }
 
-          for (int iwC = 0; iwC < ${fieldSize}; iwC++) {
-            float wC = float(iwC);
-            float xC = xCCorner + wC;
+          for (int wC = 0; wC < ${fieldSize}; wC++) {
+            int xC = xCCorner + wC;
 
-            if (xC < 0.0 || xC >= ${xNumCols}.0) {
+            if (xC < 0 || xC >= ${xNumCols}) {
               continue;
             }
 
-            for (int id1 = 0; id1 < ${inputDepth}; id1++) {
-              float d1 = float(id1);
+            for (int d1 = 0; d1 < ${inputDepth}; d1++) {
               float xValue = getX(xR, xC, d1);
               float wValue = getW(wR, wC, d1, d2);
               dotProd += xValue * wValue;
diff --git a/src/math/webgl/copy_gpu.ts b/src/math/webgl/copy_gpu.ts
index 1ea1418c6b..51862bc9d6 100644
--- a/src/math/webgl/copy_gpu.ts
+++ b/src/math/webgl/copy_gpu.ts
@@ -26,16 +26,14 @@ export class Copy2DProgram implements GPGPUProgram {
     this.outputShape = null;
     this.params = [srcNumCols, destNumCols];
     this.userCode = `
-      uniform vec2 sourceStart;
-      uniform vec2 destStart;
+      uniform ivec2 sourceStart;
+      uniform ivec2 destStart;
 
       void main() {
-        vec2 destCoords = getOutputCoords() - destStart;
-        float index = dot(destCoords, vec2(${destNumCols}.0, 1.0));
-        vec2 sourceCoords = sourceStart + vec2(
-          floor(index / ${srcNumCols}.0),
-          mod(index, ${srcNumCols}.0)
-        );
+        ivec2 destCoords = getOutputCoords() - destStart;
+        int index = destCoords.x * ${destNumCols} + destCoords.y;
+        int r = index / ${srcNumCols};
+        ivec2 sourceCoords = sourceStart + ivec2(r, index - r * ${srcNumCols});
         setOutput(getSource(sourceCoords.x, sourceCoords.y));
       }
     `;
@@ -48,9 +46,9 @@ export class Copy2DProgram implements GPGPUProgram {
       gpgpu.setOutputMatrixWriteRegion(
           destStart[0], destSize[0], destStart[1], destSize[1]);
       const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart');
-      gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]);
+      gpgpu.gl.uniform2i(sourceStartCRLoc, sourceStart[0], sourceStart[1]);
       const destStartCRLoc = gpgpu.getUniformLocation('destStart');
-      gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]);
+      gpgpu.gl.uniform2i(destStartCRLoc, destStart[0], destStart[1]);
     };
   }
 }
diff --git a/src/math/webgl/gpgpu_context.ts b/src/math/webgl/gpgpu_context.ts
index d8e0b803cc..db691dc2c4 100644
--- a/src/math/webgl/gpgpu_context.ts
+++ b/src/math/webgl/gpgpu_context.ts
@@ -43,14 +43,15 @@ export class GPGPUContext {
     if (!webgl_util.isWebGL2Enabled()) {
       this.textureFloatExtension =
           webgl_util.getExtensionOrThrow(this.gl, 'OES_texture_float');
+      this.colorBufferFloatExtension =
+          this.gl.getExtension('WEBGL_color_buffer_float');
     } else {
       this.colorBufferFloatExtension =
           webgl_util.getExtensionOrThrow(this.gl, 'EXT_color_buffer_float');
     }
 
-    this.loseContextExtension =
-        webgl_util.getExtensionOrThrow(this.gl, 'WEBGL_lose_context') as
-        WebGLLoseContextExtension;
+    this.loseContextExtension = webgl_util.getExtensionOrThrow(
+        this.gl, 'WEBGL_lose_context') as WebGLLoseContextExtension;
     this.vertexBuffer = gpgpu_util.createVertexBuffer(this.gl);
     this.indexBuffer = gpgpu_util.createIndexBuffer(this.gl);
     this.framebuffer = webgl_util.createFramebuffer(this.gl);
@@ -258,6 +259,9 @@ export class GPGPUContext {
     this.throwIfDisposed();
     webgl_util.bindColorTextureToFramebuffer(
         this.gl, texture, this.framebuffer);
+    if (this.autoDebugValidate) {
+      webgl_util.validateFramebuffer(this.gl);
+    }
     const result = downloadAndDecode();
     if (this.outputTexture != null) {
       webgl_util.bindColorTextureToFramebuffer(
diff --git a/src/math/webgl/gpgpu_context_test.ts b/src/math/webgl/gpgpu_context_test.ts
index efc802fba4..3e9ae712bb 100644
--- a/src/math/webgl/gpgpu_context_test.ts
+++ b/src/math/webgl/gpgpu_context_test.ts
@@ -41,12 +41,21 @@ describe('GPGPUContext downloadMatrixFromTexture WebGL 2.0', () => {
     expect(result[0]).toBeCloseTo(0.123);
   });
 
-  it('returns matrix that was uploaded', () => {
+  it('returns 1x1 matrix that was uploaded', () => {
     gpgpu.uploadMatrixToTexture(texture, 1, 1, new Float32Array([1.234]));
     const result = gpgpu.downloadMatrixFromTexture(texture, 1, 1);
     expect(result[0]).toBeCloseTo(1.234);
   });
 
+  it('returns 2x2 matrix that was uploaded', () => {
+    const texture2 = gpgpu.createMatrixTexture(2, 2);
+    gpgpu.uploadMatrixToTexture(
+        texture2, 2, 2, new Float32Array([1.234, 2, 3, 4]));
+    const result = gpgpu.downloadMatrixFromTexture(texture2, 2, 2);
+    expect(result).toEqual(new Float32Array([1.234, 2, 3, 4]));
+    gpgpu.deleteMatrixTexture(texture2);
+  });
+
   it('uses texture parameter', () => {
     const texture2: WebGLTexture = gpgpu.createMatrixTexture(1, 1);
     gpgpu.uploadMatrixToTexture(texture, 1, 1, new Float32Array([1]));
@@ -84,12 +93,21 @@ describe('GPGPUContext downloadMatrixFromTexture WebGL 1.0', () => {
     expect(result[0]).toBeCloseTo(0.123);
   });
 
-  it('returns matrix that was uploaded', () => {
+  it('returns 1x1 matrix that was uploaded', () => {
     gpgpu.uploadMatrixToTexture(texture, 1, 1, new Float32Array([1.234]));
     const result = gpgpu.downloadMatrixFromTexture(texture, 1, 1);
     expect(result[0]).toBeCloseTo(1.234);
   });
 
+  it('returns 2x2 matrix that was uploaded', () => {
+    const texture2 = gpgpu.createMatrixTexture(2, 2);
+    gpgpu.uploadMatrixToTexture(
+        texture2, 2, 2, new Float32Array([1.234, 2, 3, 4]));
+    const result = gpgpu.downloadMatrixFromTexture(texture2, 2, 2);
+    expect(result).toEqual(new Float32Array([1.234, 2, 3, 4]));
+    gpgpu.deleteMatrixTexture(texture2);
+  });
+
   it('uses texture parameter', () => {
     const texture2: WebGLTexture = gpgpu.createMatrixTexture(1, 1);
     gpgpu.uploadMatrixToTexture(texture, 1, 1, new Float32Array([1]));
diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts
index 5784c18a3a..ea643dc381 100644
--- a/src/math/webgl/gpgpu_math.ts
+++ b/src/math/webgl/gpgpu_math.ts
@@ -115,7 +115,7 @@ export function makeShaderKey(
   const params = program.params;
   const keyStart =
       inputs.concat(output).map(x => x.shape + '_' + x.getTextureShapeRC());
-  const keyEnd = params.map(p => p.toString());
+  const keyEnd = params.map(String);
   let key = [program.constructor.name];
   key.push((program.supportsBroadcasting === true).toString());
   key = key.concat(keyStart, keyEnd);
diff --git a/src/math/webgl/gpgpu_util.ts b/src/math/webgl/gpgpu_util.ts
index 1b7960136d..13f40d8dbd 100644
--- a/src/math/webgl/gpgpu_util.ts
+++ b/src/math/webgl/gpgpu_util.ts
@@ -90,7 +90,11 @@ function getTextureInternalFormat(
 
 function getTextureFormat(
     gl: WebGLRenderingContext, numChannels: number): number {
-  if (webgl_util.isWebGL2Enabled() && numChannels === 1) {
+  if (webgl_util.isWebGL2Enabled()) {
+    if (numChannels === 4) {
+      // tslint:disable-next-line:no-any
+      return (gl as any).RGBA;
+    }
     // tslint:disable-next-line:no-any
     return (gl as any).RED;
   }
@@ -206,12 +210,17 @@ export function uploadMatrixToTexture(
 
   const channelsPerTexture =
       numChannels === 1 ? webgl_util.getChannelsPerTexture() : numChannels;
-  const unpackedArray =
-      new Float32Array(tex_util.getUnpackedArraySizeFromMatrixSize(
-          matrix.length, channelsPerTexture));
-  tex_util.encodeMatrixToUnpackedArray(
-      matrix, unpackedArray, channelsPerTexture);
-
+  let unpackedArray: Float32Array;
+  if (channelsPerTexture === 1) {
+    // No need to allocate a temporary array.
+    unpackedArray = matrix;
+  } else {
+    unpackedArray =
+        new Float32Array(tex_util.getUnpackedArraySizeFromMatrixSize(
+            matrix.length, channelsPerTexture));
+    tex_util.encodeMatrixToUnpackedArray(
+        matrix, unpackedArray, channelsPerTexture);
+  }
   uploadDataToTexture(gl, texture, w, h, unpackedArray, numChannels);
 }
 
diff --git a/src/math/webgl/logsumexp_gpu.ts b/src/math/webgl/logsumexp_gpu.ts
index d6e6861be2..703268275d 100644
--- a/src/math/webgl/logsumexp_gpu.ts
+++ b/src/math/webgl/logsumexp_gpu.ts
@@ -24,14 +24,14 @@ export class LogSumExpProgram implements GPGPUProgram {
   constructor(aSize: number) {
     this.userCode = `
       void main() {
-        float aMax = getAFlat(0.0);
+        float aMax = getAFlat(0);
         for (int i = 0; i < ${aSize}; i++) {
-          aMax = max(aMax, getAFlat(float(i)));
+          aMax = max(aMax, getAFlat(i));
         }
 
         float expSum = 0.0;
         for (int i = 0; i < ${aSize}; i++) {
-          expSum += exp(getAFlat(float(i)) - aMax);
+          expSum += exp(getAFlat(i) - aMax);
         }
 
         setOutput(aMax + log(expSum));
diff --git a/src/math/webgl/max_pool_backprop_gpu.ts b/src/math/webgl/max_pool_backprop_gpu.ts
index 66a97db9ac..083e2d7399 100644
--- a/src/math/webgl/max_pool_backprop_gpu.ts
+++ b/src/math/webgl/max_pool_backprop_gpu.ts
@@ -37,43 +37,43 @@ export class MaxPool2DBackpropProgram implements GPGPUProgram {
         [dilatedDyRC[0], dilatedDyRC[1], dyShape[2]], fSize, dyShape[2], 1,
         pad);
 
+    const lastIndex = fSize * fSize - 1;
     this.userCode = `
+      const ivec2 pads = ivec2(${pad}, ${pad});
+
       void main() {
-        vec3 coords = getOutputCoords();
-        float dxR = coords.x;
-        float dxC = coords.y;
-        float d = coords.z;
+        ivec3 coords = getOutputCoords();
+        int d = coords.z;
 
-        vec2 dyRCCorner = vec2(dxR, dxC) - vec2(${pad}.0, ${pad}.0);
-        float dyRCorner = dyRCCorner.x;
-        float dyCCorner = dyRCCorner.y;
+        ivec2 dyRCCorner = coords.xy - pads;
+        int dyRCorner = dyRCCorner.x;
+        int dyCCorner = dyRCCorner.y;
 
-        // Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(yR, dxC, d).
+        // Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(xR, xC, d).
         // ? = to be determined. : = across all values in that axis.
         float dotProd = 0.0;
-        for (int iwR = 0; iwR < ${fSize}; iwR++) {
-          float wR = float(iwR);
-          float dyR = (dyRCorner + wR) / ${origStride}.0;
+        for (int wR = 0; wR < ${fSize}; wR++) {
+          float dyR = float(dyRCorner + wR) / ${origStride}.0;
 
           if (dyR < 0.0 || dyR >= ${dyRows}.0 || fract(dyR) > 0.0) {
             continue;
           }
+          int idyR = int(dyR);
 
-          for (int iwC = 0; iwC < ${fSize}; iwC++) {
-            float wC = float(iwC);
-            float dyC = (dyCCorner + wC) / ${origStride}.0;
+          for (int wC = 0; wC < ${fSize}; wC++) {
+            float dyC = float(dyCCorner + wC) / ${origStride}.0;
 
             if (dyC < 0.0 || dyC >= ${dyCols}.0 || fract(dyC) > 0.0) {
               continue;
             }
+            int idyC = int(dyC);
 
-            float dyValue = getDy(dyR, dyC, d);
-            float maxPosValue =
-                ${fSize * fSize - 1}.0 - getMaxPos(dyR, dyC, d);
+            float dyValue = getDy(idyR, idyC, d);
+            int maxPosValue = ${lastIndex} - int(getMaxPos(idyR, idyC, d));
 
             // Get the current value, check it against the value from the
             // position matrix.
-            float curPosValue = wR * ${fSize}.0 + wC;
+            int curPosValue = wR * ${fSize} + wC;
             float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
 
             dotProd += dyValue * mask;
diff --git a/src/math/webgl/minmax_gpu.ts b/src/math/webgl/minmax_gpu.ts
index d10a1f1021..88bcd998d5 100644
--- a/src/math/webgl/minmax_gpu.ts
+++ b/src/math/webgl/minmax_gpu.ts
@@ -25,9 +25,9 @@ export class MinMaxProgram implements GPGPUProgram {
     this.params = [opType];
     this.userCode = `
       void main() {
-        float value = getAFlat(0.0);
+        float value = getAFlat(0);
         for (int i = 0; i < ${aSize}; i++) {
-          float candidate = getAFlat(float(i));
+          float candidate = getAFlat(i);
           if (isNaN(candidate)) {
             setOutput(candidate);
             return;
diff --git a/src/math/webgl/mulmat_gpu.ts b/src/math/webgl/mulmat_gpu.ts
index 366991a24a..345641fca9 100644
--- a/src/math/webgl/mulmat_gpu.ts
+++ b/src/math/webgl/mulmat_gpu.ts
@@ -44,10 +44,9 @@ export class MatMulProgram implements GPGPUProgram {
     this.userCode = `
       const int sharedDim = ${sharedDim};
 
-      float dotARowBCol(float aRow, float bCol) {
+      float dotARowBCol(int aRow, int bCol) {
         float result = 0.0;
-        for (int ii = 0; ii < sharedDim; ii++) {
-          float i = float(ii);
+        for (int i = 0; i < sharedDim; i++) {
           float a = getMatrixA(${aSnippet});
           float b = getMatrixB(${bSnippet});
           result += (a * b);
@@ -56,7 +55,7 @@ export class MatMulProgram implements GPGPUProgram {
       }
 
       void main() {
-        vec2 resRC = getOutputCoords();
+        ivec2 resRC = getOutputCoords();
         setOutput(dotARowBCol(resRC.x, resRC.y));
       }
     `;
diff --git a/src/math/webgl/mulmat_gpu_test.ts b/src/math/webgl/mulmat_gpu_test.ts
index c39526290b..651183af31 100644
--- a/src/math/webgl/mulmat_gpu_test.ts
+++ b/src/math/webgl/mulmat_gpu_test.ts
@@ -331,6 +331,20 @@ describe('mulmat_gpu (transposed versions)', () => {
   });
 });
 
+describe('mulmat_gpu huge matrix', () => {
+  it('vector times matrix', () => {
+    const sharedDim = 1000;
+    const outDim = 50000;
+    const a = test_util.randomArrayInRange(sharedDim, -1, 1);
+    const matrix = test_util.randomArrayInRange(sharedDim * outDim, -1, 1);
+    const result = uploadMultiplyMatrixDownload(
+        a, 1, sharedDim, matrix, sharedDim, outDim);
+    const cpuResult =
+        test_util.cpuMultiplyMatrix(a, 1, sharedDim, matrix, sharedDim, outDim);
+    test_util.expectArraysClose(result, cpuResult, 1e-4);
+  });
+});
+
 export function uploadMultiplyMatrixDownload(
     a: Float32Array, aNumRows: number, aNumCols: number, b: Float32Array,
     bNumRows: number, bNumCols: number,
diff --git a/src/math/webgl/pool_gpu.ts b/src/math/webgl/pool_gpu.ts
index f1f6dfacb0..fd35f1cd39 100644
--- a/src/math/webgl/pool_gpu.ts
+++ b/src/math/webgl/pool_gpu.ts
@@ -31,48 +31,49 @@ export class Pool2DProgram implements GPGPUProgram {
 
     let returnValue = 'minMaxValue';
     if (computePositions) {
-      returnValue = 'minMaxPosition';
+      returnValue = 'float(minMaxPosition)';
     } else if (poolType === 'avg') {
       returnValue = `avgValue / ${fSize * fSize}.0`;
     }
-    const xRowsLimit = xShape[0] - 0.5;
-    const xColsLimit = xShape[1] - 0.5;
+    const xRowsLimit = xShape[0];
+    const xColsLimit = xShape[1];
     this.params = [stride, pad, fSize, poolType, computePositions];
     this.outputShape =
         conv_util.computeOutputShape3D(xShape, fSize, xShape[2], stride, pad);
 
+    const isAvgPool = poolType === 'avg';
+    const compareOp = poolType === 'min' ? '<=' : '>=';
+
     this.userCode = `
+      const ivec2 strides = ivec2(${stride}, ${stride});
+      const ivec2 pads = ivec2(${pad}, ${pad});
+
       void main() {
-        vec3 coords = getOutputCoords();
-        float yR = coords.x;
-        float yC = coords.y;
-        float d = coords.z;
+        ivec3 coords = getOutputCoords();
+        int d = coords.z;
 
-        vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}.0, ${stride}.0) -
-            vec2(${pad}.0, ${pad}.0);
-        float xRCorner = xRCCorner.x;
-        float xCCorner = xRCCorner.y;
+        ivec2 xRCCorner = coords.xy * strides - pads;
+        int xRCorner = xRCCorner.x;
+        int xCCorner = xRCCorner.y;
 
         // max/min x(?, ?, d) to get y(yR, yC, d).
         // ? = to be determined
         float minMaxValue = 0.0;
         float minMaxValueFound = 0.0;
-        float minMaxPosition = 0.0;
+        int minMaxPosition = 0;
         float avgValue = 0.0;
 
-        for (int iwR = 0; iwR < ${fSize}; iwR++) {
-          float wR = float(iwR);
-          float xR = xRCorner + wR;
+        for (int wR = 0; wR < ${fSize}; wR++) {
+          int xR = xRCorner + wR;
 
-          if (xR < 0.0 || xR > ${xRowsLimit}) {
+          if (xR < 0 || xR >= ${xRowsLimit}) {
             continue;
           }
 
-          for (int iwC = 0; iwC < ${fSize}; iwC++) {
-            float wC = float(iwC);
-            float xC = xCCorner + wC;
+          for (int wC = 0; wC < ${fSize}; wC++) {
+            int xC = xCCorner + wC;
 
-            if (xC < 0.0 || xC > ${xColsLimit}) {
+            if (xC < 0 || xC >= ${xColsLimit}) {
               continue;
             }
 
@@ -83,18 +84,18 @@ export class Pool2DProgram implements GPGPUProgram {
               return;
             }
 
-            if (${poolType === 'avg'}) {
+            if (${isAvgPool}) {
               avgValue += value;
             } else {
               // If a min / max value has already been found, use it. If not,
               // use the current value.
               float currMinMaxValue = mix(
                   value, minMaxValue, minMaxValueFound);
-              if (value ${poolType === 'min' ? '<=' : '>='} currMinMaxValue) {
+              if (value ${compareOp} currMinMaxValue) {
                 minMaxValue = value;
                 minMaxValueFound = 1.0;
                 if (${computePositions}) {
-                  minMaxPosition = wR * ${fSize}.0 + wC;
+                  minMaxPosition = wR * ${fSize} + wC;
                 }
               }
             }
diff --git a/src/math/webgl/reducesum_gpu.ts b/src/math/webgl/reducesum_gpu.ts
index 54d19b6dbf..8cbb76c44b 100644
--- a/src/math/webgl/reducesum_gpu.ts
+++ b/src/math/webgl/reducesum_gpu.ts
@@ -26,7 +26,7 @@ export class ReduceSumProgram implements GPGPUProgram {
       void main() {
         float sum = 0.0;
         for (int i = 0; i < ${aSize}; i++) {
-          sum += getAFlat(float(i));
+          sum += getAFlat(i);
         }
         setOutput(sum);
       }
diff --git a/src/math/webgl/resize_bilinear_gpu.ts b/src/math/webgl/resize_bilinear_gpu.ts
index 9ffb6707f7..09decbc8a2 100644
--- a/src/math/webgl/resize_bilinear_gpu.ts
+++ b/src/math/webgl/resize_bilinear_gpu.ts
@@ -38,32 +38,35 @@ export class ResizeBilinear3DProgram implements GPGPUProgram {
         this.outputShape;
     this.userCode = `
       const vec2 effectiveInputOverOutputRatioRC = vec2(
-          ${effectiveInputShape[0] / effectiveOutputShape[0]},
-          ${effectiveInputShape[1] / effectiveOutputShape[1]});
+          ${effectiveInputShape[0] /
+        effectiveOutputShape[0]},
+          ${effectiveInputShape[1] /
+        effectiveOutputShape[1]});
       const vec2 inputShapeRC = vec2(${inputShape[0]}.0, ${inputShape[1]}.0);
 
       void main() {
-        vec3 coords = getOutputCoords();
-        vec2 yRC = coords.xy;
-        float d = coords.z;
+        ivec3 coords = getOutputCoords();
+        ivec2 yRC = coords.xy;
+        int d = coords.z;
 
         // Fractional source index.
-        vec2 sourceFracIndexRC = yRC * effectiveInputOverOutputRatioRC;
+        vec2 sourceFracIndexRC = vec2(yRC) * effectiveInputOverOutputRatioRC;
 
         // Compute the four integer indices.
-        vec2 sourceFloorRC = floor(sourceFracIndexRC);
-        vec2 sourceCeilRC = min(inputShapeRC - 1.0, ceil(sourceFracIndexRC));
+        ivec2 sourceFloorRC = ivec2(sourceFracIndexRC);
+        ivec2 sourceCeilRC = ivec2(
+          min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)));
 
-        float topLeft = getA(sourceFloorRC[0], sourceFloorRC[1], d);
-        float bottomLeft = getA(sourceCeilRC[0], sourceFloorRC[1], d);
-        float topRight = getA(sourceFloorRC[0], sourceCeilRC[1], d);
-        float bottomRight = getA(sourceCeilRC[0], sourceCeilRC[1], d);
+        float topLeft = getA(sourceFloorRC.x, sourceFloorRC.y, d);
+        float bottomLeft = getA(sourceCeilRC.x, sourceFloorRC.y, d);
+        float topRight = getA(sourceFloorRC.x, sourceCeilRC.y, d);
+        float bottomRight = getA(sourceCeilRC.x, sourceCeilRC.y, d);
 
-        vec2 fracRC = sourceFracIndexRC - sourceFloorRC;
+        vec2 fracRC = sourceFracIndexRC - vec2(sourceFloorRC);
 
-        float top = topLeft + (topRight - topLeft) * fracRC[1];
-        float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC[1];
-        float newValue = top + (bottom - top) * fracRC[0];
+        float top = topLeft + (topRight - topLeft) * fracRC.y;
+        float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC.y;
+        float newValue = top + (bottom - top) * fracRC.x;
 
         setOutput(newValue);
       }
diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts
index a2d5a9a962..728c47a361 100644
--- a/src/math/webgl/shader_compiler.ts
+++ b/src/math/webgl/shader_compiler.ts
@@ -109,41 +109,39 @@ function getOutputSamplingSnippet(
 }
 
 const SAMPLE_1D_SNIPPET = `
-vec2 UVfrom1D(float texNumR, float texNumC, float index) {
-  float texR = floor(index / texNumC);
-  float texC = mod(index, texNumC);
+vec2 UVfrom1D(int texNumR, int texNumC, int index) {
+  int texR = index / texNumC;
+  int texC = index - texR * texNumC;
   return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
 }
 `;
 
 const SAMPLE_2D_SNIPPET = `
-vec2 UVfrom2D(float texNumR, float texNumC, float numC, float row,
-    float col) {
-  float index = dot(vec2(row, col), vec2(numC, 1.0));
-  float texR = floor(index / texNumC);
-  float texC = mod(index, texNumC);
+vec2 UVfrom2D(int texNumR, int texNumC, int numC, int row, int col) {
+  int index = row * numC + col;
+  int texR = index / texNumC;
+  int texC = index - texR * texNumC;
   return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
 }
 `;
 
 const SAMPLE_3D_SNIPPET = `
-vec2 UVfrom3D(float texNumR, float texNumC, float stride0,
-    float stride1, float row, float col, float depth) {
-  float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0));
-  float texR = floor(index / texNumC);
-  float texC = mod(index, texNumC);
+vec2 UVfrom3D(int texNumR, int texNumC, int stride0,
+    int stride1, int row, int col, int depth) {
+  int index = row * stride0 + col * stride1 + depth;
+  int texR = index / texNumC;
+  int texC = index - texR * texNumC;
   return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
 }
 `;
 
 const SAMPLE_4D_SNIPPET = `
-vec2 UVfrom4D(float texNumR, float texNumC, float stride0,
-    float stride1, float stride2, float row, float col, float depth,
-    float depth2) {
-  float index = dot(vec4(row, col, depth, depth2),
-                    vec4(stride0, stride1, stride2, 1.0));
-  float texR = floor(index / texNumC);
-  float texC = mod(index, texNumC);
+vec2 UVfrom4D(int texNumR, int texNumC, int stride0,
+    int stride1, int stride2, int row, int col, int depth,
+    int depth2) {
+  int index = row * stride0 + col * stride1 + depth * stride2 + depth2;
+  int texR = index / texNumC;
+  int texC = index - texR * texNumC;
   return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
 }
 `;
@@ -174,22 +172,22 @@ function getOutput1DCoords(
     shape: [number], texShape: [number, number]): string {
   if (texShape[0] === 1) {
     return `
-      float getOutputCoords() {
-        return floor(gl_FragCoord.x);
+      int getOutputCoords() {
+        return int(gl_FragCoord.x);
       }
     `;
   }
   if (texShape[1] === 1) {
     return `
-      float getOutputCoords() {
-        return floor(gl_FragCoord.y);
+      int getOutputCoords() {
+        return int(gl_FragCoord.y);
       }
     `;
   }
   return `
-    float getOutputCoords() {
-      vec2 resTexRC = floor(gl_FragCoord.yx);
-      return dot(resTexRC, vec2(${texShape[1]}.0, 1.0));
+    int getOutputCoords() {
+      ivec2 resTexRC = ivec2(gl_FragCoord.yx);
+      return resTexRC.x * ${texShape[1]} + resTexRC.y;
     }
   `;
 }
@@ -199,14 +197,14 @@ function getOutput3DCoords(
   const stride0 = shape[1] * shape[2];
   const stride1 = shape[2];
   return `
-    vec3 getOutputCoords() {
-      vec2 resTexRC = floor(gl_FragCoord.yx);
-      float index = dot(resTexRC, vec2(${texShape[1]}.0, 1.0));
-      float r = floor(index / ${stride0}.0);
-      index -= r * ${stride0}.0;
-      float c = floor(index / ${stride1}.0);
-      float d = mod(index, ${stride1}.0);
-      return vec3(r, c, d);
+    ivec3 getOutputCoords() {
+      ivec2 resTexRC = ivec2(gl_FragCoord.yx);
+      int index = resTexRC.x * ${texShape[1]} + resTexRC.y;
+      int r = index / ${stride0};
+      index -= r * ${stride0};
+      int c = index / ${stride1};
+      int d = index - c * ${stride1};
+      return ivec3(r, c, d);
     }
   `;
 }
@@ -218,20 +216,20 @@ function getOutput4DCoords(
   const stride1 = shape[2] * stride2;
   const stride0 = shape[1] * stride1;
   return `
-    vec4 getOutputCoords() {
-      vec2 resTexRC = floor(gl_FragCoord.yx);
-      float index = dot(resTexRC, vec2(${texShape[1]}.0, 1.0));
+    ivec4 getOutputCoords() {
+      ivec2 resTexRC = ivec2(gl_FragCoord.yx);
+      int index = resTexRC.x * ${texShape[1]} + resTexRC.y;
 
-      float r = floor(index / ${stride0}.0);
-      index -= r * ${stride0}.0;
+      int r = index / ${stride0};
+      index -= r * ${stride0};
 
-      float c = floor(index / ${stride1}.0);
-      index -= c * ${stride1}.0;
+      int c = index / ${stride1};
+      index -= c * ${stride1};
 
-      float d = floor(index / ${stride2}.0);
-      float d2 = mod(index, ${stride2}.0);
+      int d = index / ${stride2};
+      int d2 = index - d * ${stride2};
 
-      return vec4(r, c, d, d2);
+      return ivec4(r, c, d, d2);
     }
   `;
 }
@@ -240,18 +238,36 @@ function getOutput2DCoords(
     shape: [number, number], texShape: [number, number]): string {
   if (util.arraysEqual(shape, texShape)) {
     return `
-      vec2 getOutputCoords() {
-        return floor(gl_FragCoord.yx);
+      ivec2 getOutputCoords() {
+        return ivec2(gl_FragCoord.yx);
+      }
+    `;
+  }
+  if (shape[1] === 1) {
+    return `
+      ivec2 getOutputCoords() {
+        ivec2 resTexRC = ivec2(gl_FragCoord.yx);
+        int index = resTexRC.x * ${texShape[1]} + resTexRC.y;
+        return ivec2(index, 0);
+      }
+    `;
+  }
+  if (shape[0] === 1) {
+    return `
+      ivec2 getOutputCoords() {
+        ivec2 resTexRC = ivec2(gl_FragCoord.yx);
+        int index = resTexRC.x * ${texShape[1]} + resTexRC.y;
+        return ivec2(0, index);
       }
     `;
   }
   return `
-    vec2 getOutputCoords() {
-      vec2 resTexRC = floor(gl_FragCoord.yx);
-      float index = dot(resTexRC, vec2(${texShape[1]}.0, 1.0));
-      float r = floor(index / ${shape[1]}.0);
-      float c = mod(index, ${shape[1]}.0);
-      return vec2(r, c);
+    ivec2 getOutputCoords() {
+      ivec2 resTexRC = ivec2(gl_FragCoord.yx);
+      int index = resTexRC.x * ${texShape[1]} + resTexRC.y;
+      int r = index / ${shape[1]};
+      int c = index - r * ${shape[1]};
+      return ivec2(r, c);
     }
   `;
 }
@@ -271,30 +287,30 @@ function getSampler1D(texName: string, texShape: [number, number]): string {
   const tC = texShape[1];
   if (texShape[0] === 1 && texShape[1] === 1) {
     return `
-      float ${funcName}(float index) {
+      float ${funcName}(int index) {
         return sample(${texName}, halfCR);
       }
     `;
   }
   if (texShape[1] === 1) {
     return `
-      float ${funcName}(float index) {
-        vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0);
+      float ${funcName}(int index) {
+        vec2 uv = vec2(0.5, (float(index) + 0.5) / ${tR}.0);
         return sample(${texName}, uv);
       }
     `;
   }
   if (texShape[0] === 1) {
     return `
-      float ${funcName}(float index) {
-        vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5);
+      float ${funcName}(int index) {
+        vec2 uv = vec2((float(index) + 0.5) / ${tC}.0, 0.5);
         return sample(${texName}, uv);
       }
     `;
   }
   return `
-    float ${funcName}(float index) {
-      vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index);
+    float ${funcName}(int index) {
+      vec2 uv = UVfrom1D(${tR}, ${tC}, index);
       return sample(${texName}, uv);
     }
   `;
@@ -310,18 +326,17 @@ function getSampler3D(
   const stride1 = shape[2];
   if (tC === stride0) {
     return `
-      float ${funcName}(float row, float col, float depth) {
-        float texR = row;
-        float texC = dot(vec2(col, depth), vec2(${stride1}, 1.0));
+      float ${funcName}(int row, int col, int depth) {
+        int texR = row;
+        int texC = col * ${stride1} + depth;
         vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tC}.0, ${tR}.0);
         return sample(${texName}, uv);
       }
     `;
   }
   return `
-    float ${funcName}(float row, float col, float depth) {
-      vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row,
-        col, depth);
+    float ${funcName}(int row, int col, int depth) {
+      vec2 uv = UVfrom3D(${tR}, ${tC}, ${stride0}, ${stride1}, row, col, depth);
       return sample(${texName}, uv);
     }
   `;
@@ -339,19 +354,18 @@ function getSampler4D(
 
   if (tC === stride0) {
     return `
-      float ${funcName}(float row, float col, float depth, float depth2) {
-        float texR = row;
-        float texC = dot(vec3(col, depth, depth2),
-                         vec3(${stride1}.0, ${stride2}.0, 1.0));
+      float ${funcName}(int row, int col, int depth, int depth2) {
+        int texR = row;
+        int texC = col * ${stride1} + depth * ${stride2} + depth2;
         vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tC}.0, ${tR}.0);
         return sample(${texName}, uv);
       }
     `;
   }
   return `
-    float ${funcName}(float row, float col, float depth, float depth2) {
-      vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0,
-          ${stride2}.0, row, col, depth, depth2);
+    float ${funcName}(int row, int col, int depth, int depth2) {
+      vec2 uv = UVfrom4D(${tR}, ${tC}, ${stride0}, ${stride1}, ${stride2},
+          row, col, depth, depth2);
       return sample(${texName}, uv);
     }
   `;
@@ -365,33 +379,49 @@ function getSampler2D(
   const tC = texShape[1];
   if (util.arraysEqual(shape, texShape)) {
     return `
-      float ${funcName}(float row, float col) {
+      float ${funcName}(int row, int col) {
         vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0);
         return sample(${texName}, uv);
       }
     `;
   }
   if (tC === 1) {
+    if (shape[0] === 1) {
+      return `
+        float ${funcName}(int row, int col) {
+          vec2 uv = vec2(0.5, (float(col) + 0.5) / ${tR}.0);
+          return sample(${texName}, uv);
+        }
+      `;
+    }
+    if (shape[1] === 1) {
+      return `
+        float ${funcName}(int row, int col) {
+          vec2 uv = vec2(0.5, (float(row) + 0.5) / ${tR}.0);
+          return sample(${texName}, uv);
+        }
+      `;
+    }
     return `
-      float ${funcName}(float row, float col) {
-        float index = dot(vec2(row, col), vec2(${shape[1]}.0, 1.0));
-        vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0);
+      float ${funcName}(int row, int col) {
+        int index = row * ${shape[1]} + col;
+        vec2 uv = vec2(0.5, (float(index) + 0.5) / ${tR}.0);
         return sample(${texName}, uv);
       }
     `;
   }
   if (tR === 1) {
     return `
-      float ${funcName}(float row, float col) {
-        float index = dot(vec2(row, col), vec2(${shape[1]}.0, 1.0));
-        vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5);
+      float ${funcName}(int row, int col) {
+        int index = row * ${shape[1]} + col;
+        vec2 uv = vec2((float(index) + 0.5) / ${tC}.0, 0.5);
         return sample(${texName}, uv);
       }
     `;
   }
   return `
-    float ${funcName}(float row, float col) {
-      vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col);
+    float ${funcName}(int row, int col) {
+      vec2 uv = UVfrom2D(${tR}, ${tC}, ${shape[1]}, row, col);
       return sample(${texName}, uv);
     }
   `;
@@ -404,31 +434,31 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string {
   const tNumC = texShape[1];
   if (tNumC === 1 && tNumR === 1) {
     return `
-      float ${funcName}(float index) {
+      float ${funcName}(int index) {
         return sample(${texName}, halfCR);
       }
     `;
   }
   if (tNumC === 1) {
     return `
-      float ${funcName}(float index) {
-        vec2 uv = vec2(0.5, (index + 0.5) / ${tNumR}.0);
+      float ${funcName}(int index) {
+        vec2 uv = vec2(0.5, (float(index) + 0.5) / ${tNumR}.0);
         return sample(${texName}, uv);
       }
     `;
   }
   if (tNumR === 1) {
     return `
-      float ${funcName}(float index) {
-        vec2 uv = vec2((index + 0.5) / ${tNumC}.0, 0.5);
+      float ${funcName}(int index) {
+        vec2 uv = vec2((float(index) + 0.5) / ${tNumC}.0, 0.5);
         return sample(${texName}, uv);
       }
     `;
   }
   return `
-    float ${funcName}(float index) {
-      float texR = floor(index / ${tNumC}.0);
-      float texC = mod(index, ${tNumC}.0);
+    float ${funcName}(int index) {
+      int texR = index / ${tNumC};
+      int texC = index - texR * ${tNumC};
       vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tNumC}.0, ${tNumR}.0);
       return sample(${texName}, uv);
     }
@@ -448,15 +478,20 @@ function getSamplerAtOutputCoords(
     `;
   }
   const inSize = util.sizeFromShape(inTexShape);
-  const broadcastSnippet = broadcast ? `index = mod(index, ${inSize}.0);` : '';
-
+  let broadcastSnippet = '';
+  if (broadcast) {
+    broadcastSnippet = `
+      int mainPart = index / ${inSize};
+      index -= mainPart * ${inSize};
+    `;
+  }
   return `
     float ${funcName}() {
-      vec2 resTexRC = floor(gl_FragCoord.yx);
-      float index = dot(resTexRC, vec2(${outTexShape[1]}.0, 1.0));
+      ivec2 resTexRC = ivec2(gl_FragCoord.yx);
+      int index = resTexRC.x * ${outTexShape[1]} + resTexRC.y;
       ${broadcastSnippet}
-      float texR = floor(index / ${inTexShape[1]}.0);
-      float texC = mod(index, ${inTexShape[1]}.0);
+      int texR = index / ${inTexShape[1]};
+      int texC = index - texR * ${inTexShape[1]};
       vec2 uv = (vec2(texC, texR) + halfCR) /
                  vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0);
       return sample(${texName}, uv);
diff --git a/src/math/webgl/webgl_util.ts b/src/math/webgl/webgl_util.ts
index 2170f84eb8..11704dd645 100644
--- a/src/math/webgl/webgl_util.ts
+++ b/src/math/webgl/webgl_util.ts
@@ -67,10 +67,9 @@ export function isWebGL2Enabled() {
     if (gl != null) {
       WEBGL2_ENABLED = true;
 
-      const loseContextExtension =
-          getExtensionOrThrow(
-              gl as WebGLRenderingContext, 'WEBGL_lose_context') as
-          WebGLLoseContextExtension;
+      const loseContextExtension = getExtensionOrThrow(
+          gl as WebGLRenderingContext,
+          'WEBGL_lose_context') as WebGLLoseContextExtension;
       loseContextExtension.loseContext();
     } else {
       WEBGL2_ENABLED = false;
@@ -86,9 +85,10 @@ export function createWebGLRenderingContextFromCanvas(
   if (isWebGL2Enabled()) {
     gl = canvas.getContext('webgl2', attributes) as WebGLRenderingContext;
   } else {
-    gl = (canvas.getContext('webgl', attributes) ||
-          canvas.getContext('experimental-webgl', attributes)) as
-        WebGLRenderingContext;
+    gl =
+        (canvas.getContext('webgl', attributes) ||
+         canvas.getContext(
+             'experimental-webgl', attributes)) as WebGLRenderingContext;
   }
 
   if (gl == null) {
@@ -169,6 +169,7 @@ export function createFragmentShader(
   callAndCheck(gl, () => gl.shaderSource(fragmentShader, fragmentShaderSource));
   callAndCheck(gl, () => gl.compileShader(fragmentShader));
   if (gl.getShaderParameter(fragmentShader, gl.COMPILE_STATUS) === false) {
+    console.log(fragmentShaderSource);
     console.log(gl.getShaderInfoLog(fragmentShader));
     throw new Error('Failed to compile fragment shader.');
   }
diff --git a/src/test_util.ts b/src/test_util.ts
index d7682df6e8..f7433498b4 100644
--- a/src/test_util.ts
+++ b/src/test_util.ts
@@ -69,12 +69,14 @@ export function cpuMultiplyMatrix(
     bCol: number) {
   const result = new Float32Array(aRow * bCol);
   for (let r = 0; r < aRow; ++r) {
+    const aOffset = (r * aCol);
+    const cOffset = (r * bCol);
     for (let c = 0; c < bCol; ++c) {
       let d = 0;
       for (let k = 0; k < aCol; ++k) {
-        d += a[(r * aCol) + k] * b[(k * bCol) + c];
+        d += a[aOffset + k] * b[(k * bCol) + c];
       }
-      result[(r * bCol) + c] = d;
+      result[cOffset + c] = d;
     }
   }
   return result;
diff --git a/src/util.ts b/src/util.ts
index 925e12fd99..92b9a63345 100644
--- a/src/util.ts
+++ b/src/util.ts
@@ -103,7 +103,8 @@ export function flatten(arr: any[], ret?: number[]): number[] {
   return ret;
 }
 
-export type ArrayData = number|number[]|number[][]|number[][][]|number[][][][];
+export type ArrayData =
+    number | number[] | number[][] | number[][][] | number[][][][];
 
 export function inferShape(arr: ArrayData): number[] {
   const shape: number[] = [];
@@ -212,3 +213,10 @@ export function assertAndGetBroadcastedShape(
   }
   return result.reverse();
 }
+
+export function rightPad(a: string, size: number): string {
+  if (size <= a.length) {
+    return a;
+  }
+  return a + ' '.repeat(size - a.length);
+}