|
9 | 9 | },
|
10 | 10 | {
|
11 | 11 | "cell_type": "code",
|
12 |
| - "execution_count": null, |
| 12 | + "execution_count": 23, |
13 | 13 | "metadata": {},
|
14 | 14 | "outputs": [],
|
15 | 15 | "source": [
|
|
21 | 21 | },
|
22 | 22 | {
|
23 | 23 | "cell_type": "code",
|
24 |
| - "execution_count": null, |
| 24 | + "execution_count": 27, |
25 | 25 | "metadata": {},
|
26 | 26 | "outputs": [],
|
27 | 27 | "source": [
|
|
32 | 32 | },
|
33 | 33 | {
|
34 | 34 | "cell_type": "code",
|
35 |
| - "execution_count": 25, |
| 35 | + "execution_count": 28, |
36 | 36 | "metadata": {},
|
37 | 37 | "outputs": [
|
38 | 38 | {
|
39 |
| - "name": "stdout", |
40 |
| - "output_type": "stream", |
41 |
| - "text": [ |
42 |
| - "[[0. 0.]]\n", |
43 |
| - "[1.84207953 4.6075716 ]\n" |
| 39 | + "ename": "FileNotFoundError", |
| 40 | + "evalue": "[Errno 2] No such file or directory: '../datasets/kmeans_image_compression.npy'", |
| 41 | + "output_type": "error", |
| 42 | + "traceback": [ |
| 43 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 44 | + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", |
| 45 | + "Cell \u001b[0;32mIn[28], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m temp1 \u001b[38;5;241m=\u001b[39m \u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m temp1\n", |
| 46 | + "Cell \u001b[0;32mIn[27], line 2\u001b[0m, in \u001b[0;36mload_data\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_data\u001b[39m():\n\u001b[0;32m----> 2\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m../datasets/kmeans_image_compression.npy\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m data\n", |
| 47 | + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/numpy/lib/npyio.py:427\u001b[0m, in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding, max_header_size)\u001b[0m\n\u001b[1;32m 425\u001b[0m own_fid \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 426\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 427\u001b[0m fid \u001b[38;5;241m=\u001b[39m stack\u001b[38;5;241m.\u001b[39menter_context(\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mos_fspath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 428\u001b[0m own_fid \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 430\u001b[0m \u001b[38;5;66;03m# Code to distinguish from NumPy binary files and pickles.\u001b[39;00m\n", |
| 48 | + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../datasets/kmeans_image_compression.npy'" |
44 | 49 | ]
|
45 | 50 | }
|
46 | 51 | ],
|
47 | 52 | "source": [
|
48 |
| - "temp = load_data()\n", |
49 |
| - "\n", |
50 |
| - "temmm = np.zeros((1, 2))\n", |
51 |
| - "\n", |
52 |
| - "print(temmm)\n", |
53 |
| - "\n", |
54 |
| - "temmm = temp[0]\n", |
55 |
| - "\n", |
56 |
| - "print(temmm)" |
| 53 | + "temp1 = load_data()\n", |
| 54 | + "temp1" |
57 | 55 | ]
|
58 | 56 | },
|
59 | 57 | {
|
60 | 58 | "cell_type": "code",
|
61 |
| - "execution_count": 17, |
| 59 | + "execution_count": 6, |
62 | 60 | "metadata": {},
|
63 | 61 | "outputs": [],
|
64 | 62 | "source": [
|
|
79 | 77 | " centroids.append([x, y])\n",
|
80 | 78 | " return centroids\n",
|
81 | 79 | "\n",
|
| 80 | + "\n", |
| 81 | + "\n", |
| 82 | + "def calculate_centroids(X, centroids):\n", |
| 83 | + " X_centroids = np.zeros(X.shape[0])\n", |
| 84 | + " for i in range(X.shape[0]):\n", |
| 85 | + " distance = []\n", |
| 86 | + " for j in range(centroids.shape[0]):\n", |
| 87 | + " norm = np.linalg.norm(X[i] - centroids[j])\n", |
| 88 | + " distance.append(norm)\n", |
| 89 | + "\n", |
| 90 | + " X_centroids[i] = np.argmin(distance)\n", |
| 91 | + " return X_centroids\n", |
| 92 | + "\n", |
| 93 | + "\n", |
| 94 | + "\n", |
82 | 95 | "def calulate_loss(data, centroids):\n",
|
83 | 96 | " \n",
|
84 | 97 | " return\n",
|
|
94 | 107 | },
|
95 | 108 | {
|
96 | 109 | "cell_type": "code",
|
97 |
| - "execution_count": 28, |
| 110 | + "execution_count": 7, |
98 | 111 | "metadata": {},
|
99 | 112 | "outputs": [
|
100 | 113 | {
|
101 |
| - "name": "stdout", |
102 |
| - "output_type": "stream", |
103 |
| - "text": [ |
104 |
| - "-0.24512712766170175 8.203398153359817\n" |
105 |
| - ] |
106 |
| - }, |
107 |
| - { |
108 |
| - "ename": "AttributeError", |
109 |
| - "evalue": "'list' object has no attribute 'shape'", |
| 114 | + "ename": "FileNotFoundError", |
| 115 | + "evalue": "[Errno 2] No such file or directory: '../datasets/kmeans_image_compression.npy'", |
110 | 116 | "output_type": "error",
|
111 | 117 | "traceback": [
|
112 | 118 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
113 |
| - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", |
114 |
| - "Cell \u001b[0;32mIn[28], line 35\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(X\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]):\n\u001b[1;32m 34\u001b[0m distance \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m j \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[43mcentroids\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m[\u001b[38;5;241m0\u001b[39m])):\n\u001b[1;32m 36\u001b[0m norm \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mlinalg\u001b[38;5;241m.\u001b[39mnorm(X[i] \u001b[38;5;241m-\u001b[39m centroids[j])\n\u001b[1;32m 37\u001b[0m distance\u001b[38;5;241m.\u001b[39mappend(norm)\n", |
115 |
| - "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'" |
| 119 | + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", |
| 120 | + "Cell \u001b[0;32mIn[7], line 25\u001b[0m\n\u001b[1;32m 23\u001b[0m iterations \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m100\u001b[39m\n\u001b[1;32m 24\u001b[0m cost_array \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m---> 25\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1\u001b[39m, K\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m1\u001b[39m):\n\u001b[1;32m 28\u001b[0m summ \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n", |
| 121 | + "Cell \u001b[0;32mIn[5], line 2\u001b[0m, in \u001b[0;36mload_data\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_data\u001b[39m():\n\u001b[0;32m----> 2\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m../datasets/kmeans_image_compression.npy\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m data\n", |
| 122 | + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/numpy/lib/npyio.py:427\u001b[0m, in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding, max_header_size)\u001b[0m\n\u001b[1;32m 425\u001b[0m own_fid \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 426\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 427\u001b[0m fid \u001b[38;5;241m=\u001b[39m stack\u001b[38;5;241m.\u001b[39menter_context(\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mos_fspath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 428\u001b[0m own_fid \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 430\u001b[0m \u001b[38;5;66;03m# Code to distinguish from NumPy binary files and pickles.\u001b[39;00m\n", |
| 123 | + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../datasets/kmeans_image_compression.npy'" |
116 | 124 | ]
|
117 | 125 | }
|
118 | 126 | ],
|
|
144 | 152 | "X = load_data()\n",
|
145 | 153 | "\n",
|
146 | 154 | "for k in range(1, K+1):\n",
|
| 155 | + " summ = 0\n", |
147 | 156 | " lowest_cost = 100000\n",
|
148 | 157 | " for iteration in range(iterations):\n",
|
149 | 158 | " centroids = initialize_centroids(X, k)\n",
|
150 | 159 | " X_centroids = np.zeros(X.shape[0])\n",
|
151 | 160 | " while(True):\n",
|
152 |
| - " for i in range(X.shape[0]):\n", |
153 |
| - " distance = []\n", |
154 |
| - " for j in range(len(centroids.shape[0])):\n", |
155 |
| - " norm = np.linalg.norm(X[i] - centroids[j])\n", |
156 |
| - " distance.append(norm)\n", |
157 |
| - "\n", |
158 |
| - " X_centroids[i] = np.argmin(distance)\n", |
159 |
| - "\n", |
| 161 | + " X_centroids = calculate_centroids(X, centroids)\n", |
160 | 162 | " ## calculate mean of each cluster\n",
|
161 | 163 | " means = np.zeros(range(K))\n",
|
162 | 164 | " for kk in range(K):\n",
|
|
173 | 175 | " else:\n",
|
174 | 176 | " centroids = means\n",
|
175 | 177 | "\n",
|
176 |
| - " # ## calculate cost of the final centroids\n", |
177 |
| - " # for i in range(X.shape[0]):\n", |
178 |
| - " # distance = []\n", |
179 |
| - " # for j in range(len(centroids.shape[0])):\n", |
180 |
| - " # norm = np.linalg.norm(X[i] - centroids[j])\n", |
181 |
| - " # distance.append(norm)\n", |
182 |
| - " \n", |
183 |
| - " \n", |
184 |
| - "\n" |
| 178 | + " ## calculate cost of the final centroids\n", |
| 179 | + " X_centroids = calculate_centroids(X, centroids)\n", |
| 180 | + " for i in range(len(X_centroids)):\n", |
| 181 | + " summ += np.linalg.norm(X[i] - centroids[X_centroids[i]])\n", |
| 182 | + "\n", |
| 183 | + " print(summ)" |
185 | 184 | ]
|
186 | 185 | },
|
187 | 186 | {
|
|
194 | 193 | ],
|
195 | 194 | "metadata": {
|
196 | 195 | "kernelspec": {
|
197 |
| - "display_name": "Python 3", |
| 196 | + "display_name": "Python 3 (ipykernel)", |
198 | 197 | "language": "python",
|
199 | 198 | "name": "python3"
|
200 | 199 | },
|
|
0 commit comments