Add resizeBilinear gradient implementation. (#996)

FEATURE
tensorflow · May 8, 2018 · 7abc301 · 7abc301
1 parent 4091cac
commit 7abc301
Show file tree

Hide file tree

Showing 7 changed files with 1,002 additions and 19 deletions.
diff --git a/src/kernels/backend.ts b/src/kernels/backend.ts
@@ -188,6 +188,9 @@ export interface KernelBackend extends TensorStorage, BackendTimer {
       x: Tensor4D, newHeight: number, newWidth: number,
       alignCorners: boolean): Tensor4D;
 
+  resizeBilinearBackprop(dy: Tensor4D, x: Tensor4D, alignCorners: boolean):
+      Tensor4D;
+
   resizeNearestNeighbor(
       x: Tensor4D, newHEight: number, newWidth: number,
       alignCorners: boolean): Tensor4D;

diff --git a/src/kernels/backend_cpu.ts b/src/kernels/backend_cpu.ts
@@ -1429,10 +1429,16 @@ export class MathBackendCPU implements KernelBackend {
     const output =
         ops.buffer<Rank.R4>([batch, newHeight, newWidth, numChannels], x.dtype);
 
-    const effectiveInputSize: [number, number] =
-        alignCorners ? [oldHeight - 1, oldWidth - 1] : [oldHeight, oldWidth];
-    const effectiveOutputSize: [number, number] =
-        alignCorners ? [newHeight - 1, newWidth - 1] : [newHeight, newWidth];
+    const effectiveInputSize: [number, number] = [
+      (alignCorners && newHeight > 1) ? oldHeight - 1 : oldHeight,
+      (alignCorners && newWidth > 1) ? oldWidth - 1 : oldWidth
+    ];
+
+    const effectiveOutputSize: [number, number] = [
+      (alignCorners && newHeight > 1) ? newHeight - 1 : newHeight,
+      (alignCorners && newWidth > 1) ? newWidth - 1 : newWidth
+    ];
+
     for (let b = 0; b < batch; b++) {
       for (let r = 0; r < newHeight; r++) {
         for (let c = 0; c < newWidth; c++) {
@@ -1469,6 +1475,74 @@ export class MathBackendCPU implements KernelBackend {
         }
       }
     }
+    return output.toTensor();
+  }
+
+  resizeBilinearBackprop(dy: Tensor4D, x: Tensor4D, alignCorners: boolean) {
+    const [batch, xHeight, xWidth, depth] = x.shape;
+    const [, yHeight, yWidth] = dy.shape;
+
+    const output =
+        ops.buffer<Rank.R4>([batch, xHeight, xWidth, depth], x.dtype);
+
+    // In the backwards pass, we want to find the pixels that were generated for
+    // each pixel in the input image the forward pass and add the corresponding
+    // coefficient from dy to the gradient (with some interpolation).
+
+    const effectiveXSize: [number, number] = [
+      (alignCorners && yHeight > 1) ? xHeight - 1 : xHeight,
+      (alignCorners && yWidth > 1) ? xWidth - 1 : xWidth
+    ];
+
+    const effectiveYSize: [number, number] = [
+      (alignCorners && yHeight > 1) ? yHeight - 1 : yHeight,
+      (alignCorners && yWidth > 1) ? yWidth - 1 : yWidth
+    ];
+
+    const heightScale = effectiveXSize[0] / effectiveYSize[0];
+    const widthScale = effectiveXSize[1] / effectiveYSize[1];
+
+    // Reference implementation
+    // tslint:disable-next-line:max-line-length
+    // https://github.com/tensorflow/tensorflow/blob/3039375c86a5bbc9610c7725dcaa95d635f87ba2/tensorflow/core/kernels/resize_bilinear_op.cc#L275
+
+    for (let b = 0; b < batch; b++) {
+      for (let r = 0; r < yHeight; r++) {
+        const dxR = r * heightScale;
+        const topDxRIndex = Math.floor(dxR);
+        const bottomDxRIndex = Math.min(Math.ceil(dxR), xHeight - 1);
+        const dxRLerp = dxR - topDxRIndex;
+        const inverseDxRLerp = 1.0 - dxRLerp;
+
+        for (let c = 0; c < yWidth; c++) {
+          const dxC = c * widthScale;
+          const leftDxCIndex = Math.floor(dxC);
+          const rightDxCIndex = Math.min(Math.ceil(dxC), xWidth - 1);
+          const dxCLerp = dxC - leftDxCIndex;
+          const inverseDxCLerp = 1.0 - dxCLerp;
+
+          for (let d = 0; d < depth; d++) {
+            const dyVal = dy.get(b, r, c, d);
+
+            let topLeft = output.get(b, topDxRIndex, leftDxCIndex, d);
+            topLeft += dyVal * inverseDxRLerp * inverseDxCLerp;
+            output.set(topLeft, b, topDxRIndex, leftDxCIndex, d);
+
+            let topRight = output.get(b, topDxRIndex, rightDxCIndex, d);
+            topRight += dyVal * inverseDxRLerp * dxCLerp;
+            output.set(topRight, b, topDxRIndex, rightDxCIndex, d);
+
+            let bottomLeft = output.get(b, bottomDxRIndex, leftDxCIndex, d);
+            bottomLeft += dyVal * dxRLerp * inverseDxCLerp;
+            output.set(bottomLeft, b, bottomDxRIndex, leftDxCIndex, d);
+
+            let bottomRight = output.get(b, bottomDxRIndex, rightDxCIndex, d);
+            bottomRight += dyVal * dxRLerp * dxCLerp;
+            output.set(bottomRight, b, bottomDxRIndex, rightDxCIndex, d);
+          }
+        }
+      }
+    }
 
     return output.toTensor();
   }

diff --git a/src/kernels/backend_webgl.ts b/src/kernels/backend_webgl.ts
@@ -55,6 +55,8 @@ import {OneHotProgram} from './webgl/onehot_gpu';
 import {PadProgram} from './webgl/pad_gpu';
 import {Pool2DProgram} from './webgl/pool_gpu';
 import {ReduceProgram} from './webgl/reduce_gpu';
+// tslint:disable-next-line:max-line-length
+import {ResizeBilinearBackpropProgram} from './webgl/resize_bilinear_backprop_gpu';
 import {ResizeBilinearProgram} from './webgl/resize_bilinear_gpu';
 // tslint:disable-next-line:max-line-length
 import {ResizeNearestNeighborProgram} from './webgl/resize_nearest_neighbor_gpu';
@@ -889,6 +891,13 @@ export class MathBackendWebGL implements KernelBackend {
     return this.compileAndRun(program, [x]);
   }
 
+  resizeBilinearBackprop(dy: Tensor4D, x: Tensor4D, alignCorners: boolean):
+      Tensor4D {
+    const program = new ResizeBilinearBackpropProgram(dy, x, alignCorners);
+
+    return this.compileAndRun(program, [dy]);
+  }
+
   resizeNearestNeighbor(
       x: Tensor4D, newHeight: number, newWidth: number,
       alignCorners: boolean): Tensor4D {

diff --git a/src/kernels/webgl/resize_bilinear_backprop_gpu.ts b/src/kernels/webgl/resize_bilinear_backprop_gpu.ts
@@ -0,0 +1,139 @@
+/**
+ * @license
+ * Copyright 2018 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {Tensor4D} from '../../tensor';
+import {GPGPUProgram} from './gpgpu_math';
+
+export class ResizeBilinearBackpropProgram implements GPGPUProgram {
+  variableNames = ['dy'];
+  outputShape: number[] = [];
+  userCode: string;
+
+  constructor(dy: Tensor4D, x: Tensor4D, alignCorners: boolean) {
+    this.outputShape = x.shape;
+    const [, xHeight, xWidth, ] = x.shape;
+    const [, yHeight, yWidth] = dy.shape;
+
+    // In the backwards pass, we want to find the pixels that were generated for
+    // each pixel in the input image the forward pass and add the corresponding
+    // coefficient from dy to the gradient (with some interpolation).
+
+    const effectiveXSize: [number, number] = [
+      (alignCorners && yHeight > 1) ? xHeight - 1 : xHeight,
+      (alignCorners && yWidth > 1) ? xWidth - 1 : xWidth
+    ];
+
+    const effectiveYSize: [number, number] = [
+      (alignCorners && yHeight > 1) ? yHeight - 1 : yHeight,
+      (alignCorners && yWidth > 1) ? yWidth - 1 : yWidth
+    ];
+
+    const heightScale = effectiveXSize[0] / effectiveYSize[0];
+    const widthScale = effectiveXSize[1] / effectiveYSize[1];
+
+    const invHeightScale = 1 / heightScale;
+    const invWidthScale = 1 / widthScale;
+
+    // This defines the size of the window of values around a particular
+    // index in dy that we want to search for contributions to dx.
+    const winHeight = (Math.ceil(invHeightScale) * 2) + 2;
+    const winWidth = (Math.ceil(invWidthScale) * 2) + 2;
+
+    this.userCode = `
+      void main() {
+        ivec4 coords = getOutputCoords();
+        int b = coords[0];
+        int d = coords[3];
+        int r = coords[1];
+        int c = coords[2];
+
+        float accumulator = 0.0;
+
+        const float heightScale = float(${heightScale});
+        const float widthScale = float(${widthScale});
+
+        const float invHeightScale = float(${invHeightScale});
+        const float invWidthScale = float(${invWidthScale});
+
+        const int winHeight = int(${winHeight});
+        const int winWidth = int(${winWidth});
+
+        // Compute bounds for where in dy we will look
+        float startRLerp = floor(float(r) * invHeightScale);
+        int startDyR = int(startRLerp - float(winHeight / 2));
+
+        float startCLerp = floor(float(c) * invWidthScale);
+        int startDyC = int(startCLerp - float(winWidth / 2));
+
+        // Loop over dy
+        for (int dyROffset = 0; dyROffset < winHeight; dyROffset++) {
+          int dyR = dyROffset + startDyR;
+
+          // Guard against the window exceeding the bounds of dy
+          if (dyR < 0 || dyR >= ${yHeight}) {
+            continue;
+          }
+
+          for (int dyCOffset = 0; dyCOffset < winWidth; dyCOffset++) {
+            int dyC = dyCOffset + startDyC;
+
+            // Guard against the window exceeding the bounds of dy
+            if (dyC < 0 || dyC >= ${yWidth}) {
+              continue;
+            }
+
+            float dxR = float(dyR) * heightScale;
+            int topDxRIndex = int(floor(dxR));
+            int bottomDxRIndex = int(min(ceil(dxR), ${xHeight - 1}.0));
+            float dxRLerp = dxR - float(topDxRIndex);
+            float inverseDxRLerp = 1.0 - dxRLerp;
+
+            float dxC = float(dyC) * widthScale;
+            int leftDxCIndex = int(floor(dxC));
+            int rightDxCIndex = int(min(ceil(dxC), ${xWidth - 1}.0));
+            float dxCLerp = dxC - float(leftDxCIndex);
+            float inverseDxCLerp = 1.0 - dxCLerp;
+
+            if (r == topDxRIndex && c == leftDxCIndex) {
+              // topLeft
+              accumulator +=
+                getDy(b, dyR, dyC, d) * inverseDxRLerp * inverseDxCLerp;
+            }
+
+            if (r == topDxRIndex && c == rightDxCIndex) {
+              // topRight
+              accumulator += getDy(b, dyR, dyC, d) * inverseDxRLerp * dxCLerp;
+            }
+
+            if (r == bottomDxRIndex && c == leftDxCIndex) {
+              // bottomLeft
+              accumulator += getDy(b, dyR, dyC, d) * dxRLerp * inverseDxCLerp;
+            }
+
+            if (r == bottomDxRIndex && c == rightDxCIndex) {
+              // bottomRight
+              accumulator += getDy(b, dyR, dyC, d) * dxRLerp * dxCLerp;
+            }
+          }
+        }
+        // End loop over dy
+
+        setOutput(accumulator);
+      }
+    `;
+  }
+}
diff --git a/src/kernels/webgl/resize_bilinear_gpu.ts b/src/kernels/webgl/resize_bilinear_gpu.ts
@@ -28,11 +28,16 @@ export class ResizeBilinearProgram implements GPGPUProgram {
     const [batch, oldHeight, oldWidth, depth] = inputShape;
     this.outputShape = [batch, newHeight, newWidth, depth];
 
-    const effectiveInSize: [number, number] =
-        alignCorners ? [oldHeight - 1, oldWidth - 1] : [oldHeight, oldWidth];
+    const effectiveInSize: [number, number] = [
+      (alignCorners && newHeight > 1) ? oldHeight - 1 : oldHeight,
+      (alignCorners && newWidth > 1) ? oldWidth - 1 : oldWidth
+    ];
+
+    const effectiveOutSize: [number, number] = [
+      (alignCorners && newHeight > 1) ? newHeight - 1 : newHeight,
+      (alignCorners && newWidth > 1) ? newWidth - 1 : newWidth
+    ];
 
-    const effectiveOutSize: [number, number] =
-        alignCorners ? [newHeight - 1, newWidth - 1] : [newHeight, newWidth];
     this.userCode = `
       const vec2 effectiveInputOverOutputRatioRC = vec2(
           ${effectiveInSize[0] / effectiveOutSize[0]},

diff --git a/src/ops/image_ops.ts b/src/ops/image_ops.ts
@@ -16,8 +16,9 @@
  */
 
 import {doc} from '../doc';
+import {ForwardFunc} from '../engine';
 import {ENV} from '../environment';
-import {Tensor3D, Tensor4D} from '../tensor';
+import {Tensor, Tensor3D, Tensor4D} from '../tensor';
 import * as util from '../util';
 import {operation} from './operation';
 
@@ -55,11 +56,21 @@ export class ImageOps {
       batchImages =
           images.as4D(1, images.shape[0], images.shape[1], images.shape[2]);
     }
+
     const [newHeight, newWidth] = size;
-    const res = ENV.engine.runKernel(
-        backend => backend.resizeBilinear(
-            batchImages, newHeight, newWidth, alignCorners),
-        {batchImages});
+    const forward: ForwardFunc<Tensor4D> = (backend, save) =>
+        backend.resizeBilinear(batchImages, newHeight, newWidth, alignCorners);
+
+    const backward = (dy: Tensor4D, saved: Tensor[]) => {
+      return {
+        batchImages: () => ENV.engine.runKernel(
+            backend =>
+                backend.resizeBilinearBackprop(dy, batchImages, alignCorners),
+            {})
+      };
+    };
+
+    const res = ENV.engine.runKernel(forward, {batchImages}, backward);
     if (reshapedTo4D) {
       return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T;
     }