add standard deviation

smolorg · Jun 3, 2024 · 18948ea · 18948ea
1 parent 0c327e5
commit 18948ea
Show file tree

Hide file tree

Showing 3 changed files with 105 additions and 39 deletions.
diff --git a/playground.ipynb b/playground.ipynb
@@ -61,34 +61,86 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "output of mean:  tensor([2., 3.], grad_fn=<MeanBackward1>)\n",
-      "gradient of a:  tensor([[0.5000, 0.5000],\n",
-      "        [0.5000, 0.5000]])\n"
+      "1.118033988749895\n",
+      "2.5\n",
+      "[[2.25 0.25]\n",
+      " [0.25 2.25]]\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[1.11803399]])"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "a = torch.Tensor([[1, 2], [3, 4]]); a.requires_grad = True\n",
+    "a = np.array([[1, 2], [3, 4]])\n",
+    "\n",
+    "std = np.std(a, keepdims=False)\n",
+    "print(std)\n",
+    "\n",
+    "temp = (a - a.mean(keepdims=True)) ** 2\n",
+    "\n",
+    "print(a.mean())\n",
+    "print(temp)\n",
     "\n",
-    "b = a.mean(axis = 0)\n",
+    "std = np.sqrt(temp.mean(keepdims=True))\n",
+    "std"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([0.7071, 0.7071], grad_fn=<StdBackward0>)\n",
+      "tensor([[-0.7071,  0.7071],\n",
+      "        [-0.7071,  0.7071]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "a = torch.Tensor([[1, 2], [3, 4]])\n",
+    "a = a.to(torch.float32)\n",
+    "a.requires_grad = True\n",
+    "\n",
+    "b = torch.std(a, axis=1)\n",
     "c = b.sum()\n",
     "\n",
     "c.backward()\n",
     "\n",
-    "print(\"output of mean: \", b)\n",
-    "print(\"gradient of a: \", a.grad)"
+    "print(b)\n",
+    "print(a.grad)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -97,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -124,24 +176,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(tensor([[1., 2., 3.],\n",
-       "         [4., 5., 6.]], requires_grad=True),\n",
-       " tensor([[1., 2., 3.],\n",
-       "         [4., 5., 6.]], dtype=torch.float16, grad_fn=<ToCopyBackward0>),\n",
-       " tensor([[1., 2., 3.],\n",
-       "         [4., 5., 6.]], dtype=torch.float16, grad_fn=<ToCopyBackward0>),\n",
-       " tensor([[ 2.,  4.,  6.],\n",
-       "         [ 8., 10., 12.]], dtype=torch.float16, grad_fn=<AddBackward0>),\n",
-       " tensor(42., dtype=torch.float16, grad_fn=<SumBackward0>))"
+       "(tensor([[1., 2.],\n",
+       "         [3., 4.]], requires_grad=True),\n",
+       " tensor([[1., 2.],\n",
+       "         [3., 4.]], dtype=torch.float16, grad_fn=<ToCopyBackward0>),\n",
+       " tensor([[1., 2.],\n",
+       "         [3., 4.]], dtype=torch.float16, grad_fn=<ToCopyBackward0>),\n",
+       " tensor([[2., 4.],\n",
+       "         [6., 8.]], dtype=torch.float16, grad_fn=<AddBackward0>),\n",
+       " tensor(20., dtype=torch.float16, grad_fn=<SumBackward0>))"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -158,7 +210,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -167,17 +219,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "tensor([[2., 2., 2.],\n",
-       "        [2., 2., 2.]])"
+       "tensor([[2.5000, 2.5000],\n",
+       "        [2.5000, 2.5000]])"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -188,7 +240,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -212,7 +264,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -222,7 +274,7 @@
        "        [1., 1.]])"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -233,7 +285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -242,7 +294,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -251,7 +303,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -262,7 +314,7 @@
        "       [10, 11, 12, 13]])"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -274,15 +326,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Output from Linear layer:  tensor([-1.7602, -1.6158,  3.9906, -4.1399,  3.3804, -2.6154,  1.3862,  1.1908,\n",
-      "        -2.5090, -1.7173], grad_fn=<ViewBackward0>)\n",
+      "Output from Linear layer:  tensor([ 2.1948,  0.2230,  1.8296,  1.1871,  1.2304,  1.1347, -1.8223,  2.1204,\n",
+      "        -0.1979, -3.8751], grad_fn=<ViewBackward0>)\n",
       "Input shape:  torch.Size([5])\n",
       "Output shape:  torch.Size([10])\n",
       "torch.Size([10, 5]) torch.Size([10])\n"

diff --git a/smolgrad/core/engine.py b/smolgrad/core/engine.py
@@ -156,7 +156,7 @@ def _sum_backward():
 
         return out
 
-    def mean(self, axis: int = None, keepdims: bool = False):
+    def mean(self, axis: int = None, keepdims: bool = False) -> "Tensor":
         """
         calculate the arithmetic average of the Tensor elements along given axis
         """
@@ -166,6 +166,20 @@ def mean(self, axis: int = None, keepdims: bool = False):
         out: Tensor = self.sum(axis=axis, keepdims=keepdims) / N
         return out
 
+    def std(self, axis: int = None, keepdims: bool = False, correction: int = 0):
+        """
+        calculate the standard deviation of the Tensor elements along given axis
+        """
+        N: int = self.data.shape[axis] if axis is not None else self.data.size
+        assert N - correction > 0, "Correction should not be greater than or equal to number of samples."
+
+        # composed operations i.e. no need to write backward function
+        t = (self - self.mean(axis=axis, keepdims=True)) ** 2
+        t1 = t.sum(axis=axis, keepdims=keepdims) / (N - correction)
+        out = t1 ** (1/2)
+
+        return out
+
     def half(self):
         """
         convert the data and gradients to half precision i.e. float32 -> float16

diff --git a/test_backward.py b/test_backward.py
@@ -2,10 +2,10 @@
 
 a = Tensor([[1, 2], [3, 4]], requires_grad=True)
 
-b = a.mean(axis = 1)
+b = a.std(axis=1, correction=1)
 c = b.sum()
 
 c.backward()
 
-print("output of mean: ", b)
+print("output of std: ", b)
 print("gradient of a: ", a.grad)