From 18948ea86478951f2922427d9da9d6f6eb140cbe Mon Sep 17 00:00:00 2001 From: Maharshi Pandya Date: Mon, 3 Jun 2024 20:11:05 +0530 Subject: [PATCH] add standard deviation --- playground.ipynb | 124 ++++++++++++++++++++++++++++------------ smolgrad/core/engine.py | 16 +++++- test_backward.py | 4 +- 3 files changed, 105 insertions(+), 39 deletions(-) diff --git a/playground.ipynb b/playground.ipynb index 2844a3a..e5baa83 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -61,34 +61,86 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "output of mean: tensor([2., 3.], grad_fn=)\n", - "gradient of a: tensor([[0.5000, 0.5000],\n", - " [0.5000, 0.5000]])\n" + "1.118033988749895\n", + "2.5\n", + "[[2.25 0.25]\n", + " [0.25 2.25]]\n" ] + }, + { + "data": { + "text/plain": [ + "array([[1.11803399]])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "a = torch.Tensor([[1, 2], [3, 4]]); a.requires_grad = True\n", + "a = np.array([[1, 2], [3, 4]])\n", + "\n", + "std = np.std(a, keepdims=False)\n", + "print(std)\n", + "\n", + "temp = (a - a.mean(keepdims=True)) ** 2\n", + "\n", + "print(a.mean())\n", + "print(temp)\n", "\n", - "b = a.mean(axis = 0)\n", + "std = np.sqrt(temp.mean(keepdims=True))\n", + "std" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([0.7071, 0.7071], grad_fn=)\n", + "tensor([[-0.7071, 0.7071],\n", + " [-0.7071, 0.7071]])\n" + ] + } + ], + "source": [ + "a = torch.Tensor([[1, 2], [3, 4]])\n", + "a = a.to(torch.float32)\n", + "a.requires_grad = True\n", + "\n", + "b = torch.std(a, axis=1)\n", "c = b.sum()\n", "\n", "c.backward()\n", "\n", - "print(\"output of mean: \", b)\n", - "print(\"gradient of a: \", a.grad)" + "print(b)\n", + "print(a.grad)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -97,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -124,24 +176,24 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(tensor([[1., 2., 3.],\n", - " [4., 5., 6.]], requires_grad=True),\n", - " tensor([[1., 2., 3.],\n", - " [4., 5., 6.]], dtype=torch.float16, grad_fn=),\n", - " tensor([[1., 2., 3.],\n", - " [4., 5., 6.]], dtype=torch.float16, grad_fn=),\n", - " tensor([[ 2., 4., 6.],\n", - " [ 8., 10., 12.]], dtype=torch.float16, grad_fn=),\n", - " tensor(42., dtype=torch.float16, grad_fn=))" + "(tensor([[1., 2.],\n", + " [3., 4.]], requires_grad=True),\n", + " tensor([[1., 2.],\n", + " [3., 4.]], dtype=torch.float16, grad_fn=),\n", + " tensor([[1., 2.],\n", + " [3., 4.]], dtype=torch.float16, grad_fn=),\n", + " tensor([[2., 4.],\n", + " [6., 8.]], dtype=torch.float16, grad_fn=),\n", + " tensor(20., dtype=torch.float16, grad_fn=))" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -158,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -167,17 +219,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([[2., 2., 2.],\n", - " [2., 2., 2.]])" + "tensor([[2.5000, 2.5000],\n", + " [2.5000, 2.5000]])" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -188,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -212,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -222,7 +274,7 @@ " [1., 1.]])" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -233,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -242,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -251,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -262,7 +314,7 @@ " [10, 11, 12, 13]])" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -274,15 +326,15 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Output from Linear layer: tensor([-1.7602, -1.6158, 3.9906, -4.1399, 3.3804, -2.6154, 1.3862, 1.1908,\n", - " -2.5090, -1.7173], grad_fn=)\n", + "Output from Linear layer: tensor([ 2.1948, 0.2230, 1.8296, 1.1871, 1.2304, 1.1347, -1.8223, 2.1204,\n", + " -0.1979, -3.8751], grad_fn=)\n", "Input shape: torch.Size([5])\n", "Output shape: torch.Size([10])\n", "torch.Size([10, 5]) torch.Size([10])\n" diff --git a/smolgrad/core/engine.py b/smolgrad/core/engine.py index 2145e5f..7f5baa2 100644 --- a/smolgrad/core/engine.py +++ b/smolgrad/core/engine.py @@ -156,7 +156,7 @@ def _sum_backward(): return out - def mean(self, axis: int = None, keepdims: bool = False): + def mean(self, axis: int = None, keepdims: bool = False) -> "Tensor": """ calculate the arithmetic average of the Tensor elements along given axis """ @@ -166,6 +166,20 @@ def mean(self, axis: int = None, keepdims: bool = False): out: Tensor = self.sum(axis=axis, keepdims=keepdims) / N return out + def std(self, axis: int = None, keepdims: bool = False, correction: int = 0): + """ + calculate the standard deviation of the Tensor elements along given axis + """ + N: int = self.data.shape[axis] if axis is not None else self.data.size + assert N - correction > 0, "Correction should not be greater than or equal to number of samples." + + # composed operations i.e. no need to write backward function + t = (self - self.mean(axis=axis, keepdims=True)) ** 2 + t1 = t.sum(axis=axis, keepdims=keepdims) / (N - correction) + out = t1 ** (1/2) + + return out + def half(self): """ convert the data and gradients to half precision i.e. float32 -> float16 diff --git a/test_backward.py b/test_backward.py index 39aca86..59cbadb 100644 --- a/test_backward.py +++ b/test_backward.py @@ -2,10 +2,10 @@ a = Tensor([[1, 2], [3, 4]], requires_grad=True) -b = a.mean(axis = 1) +b = a.std(axis=1, correction=1) c = b.sum() c.backward() -print("output of mean: ", b) +print("output of std: ", b) print("gradient of a: ", a.grad)