Skip to content

Commit

Permalink
Merge pull request konrad-gajdus#6 from konrad-gajdus/enchantments
Browse files Browse the repository at this point in the history
new features
  • Loading branch information
konrad-gajdus authored Oct 4, 2024
2 parents f0ee609 + df65bac commit 271499b
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 59 deletions.
42 changes: 21 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,26 @@ This project implements a **minimal** neural network in C for classifying handwr
## Performance

```bash
Epoch 1, Accuracy: 96.12%, Avg Loss: 0.2188
Epoch 2, Accuracy: 96.98%, Avg Loss: 0.0875
Epoch 3, Accuracy: 97.41%, Avg Loss: 0.0561
Epoch 4, Accuracy: 97.63%, Avg Loss: 0.0383
Epoch 5, Accuracy: 97.63%, Avg Loss: 0.0270
Epoch 6, Accuracy: 97.69%, Avg Loss: 0.0193
Epoch 7, Accuracy: 97.98%, Avg Loss: 0.0143
Epoch 8, Accuracy: 98.03%, Avg Loss: 0.0117
Epoch 9, Accuracy: 98.03%, Avg Loss: 0.0103
Epoch 10, Accuracy: 98.06%, Avg Loss: 0.0094
Epoch 11, Accuracy: 98.06%, Avg Loss: 0.0087
Epoch 12, Accuracy: 98.16%, Avg Loss: 0.0081
Epoch 13, Accuracy: 98.16%, Avg Loss: 0.0078
Epoch 14, Accuracy: 98.18%, Avg Loss: 0.0075
Epoch 15, Accuracy: 98.19%, Avg Loss: 0.0074
Epoch 16, Accuracy: 98.20%, Avg Loss: 0.0072
Epoch 17, Accuracy: 98.24%, Avg Loss: 0.0070
Epoch 18, Accuracy: 98.23%, Avg Loss: 0.0069
Epoch 19, Accuracy: 98.23%, Avg Loss: 0.0069
Epoch 20, Accuracy: 98.22%, Avg Loss: 0.0068
Epoch 1, Accuracy: 95.61%, Avg Loss: 0.2717, Time: 2.61 seconds
Epoch 2, Accuracy: 96.80%, Avg Loss: 0.1167, Time: 2.62 seconds
Epoch 3, Accuracy: 97.21%, Avg Loss: 0.0766, Time: 2.66 seconds
Epoch 4, Accuracy: 97.38%, Avg Loss: 0.0550, Time: 2.64 seconds
Epoch 5, Accuracy: 97.49%, Avg Loss: 0.0397, Time: 2.64 seconds
Epoch 6, Accuracy: 97.47%, Avg Loss: 0.0285, Time: 2.65 seconds
Epoch 7, Accuracy: 97.47%, Avg Loss: 0.0205, Time: 2.66 seconds
Epoch 8, Accuracy: 97.72%, Avg Loss: 0.0151, Time: 2.66 seconds
Epoch 9, Accuracy: 97.88%, Avg Loss: 0.0112, Time: 2.67 seconds
Epoch 10, Accuracy: 97.82%, Avg Loss: 0.0084, Time: 2.67 seconds
Epoch 11, Accuracy: 97.88%, Avg Loss: 0.0063, Time: 2.68 seconds
Epoch 12, Accuracy: 97.92%, Avg Loss: 0.0049, Time: 2.68 seconds
Epoch 13, Accuracy: 97.92%, Avg Loss: 0.0039, Time: 2.69 seconds
Epoch 14, Accuracy: 98.02%, Avg Loss: 0.0032, Time: 2.69 seconds
Epoch 15, Accuracy: 98.06%, Avg Loss: 0.0027, Time: 2.70 seconds
Epoch 16, Accuracy: 98.09%, Avg Loss: 0.0024, Time: 2.70 seconds
Epoch 17, Accuracy: 98.11%, Avg Loss: 0.0021, Time: 2.69 seconds
Epoch 18, Accuracy: 98.12%, Avg Loss: 0.0019, Time: 2.70 seconds
Epoch 19, Accuracy: 98.16%, Avg Loss: 0.0017, Time: 2.70 seconds
Epoch 20, Accuracy: 98.17%, Avg Loss: 0.0015, Time: 2.71 seconds
```

## Prerequisites
Expand All @@ -45,7 +45,7 @@ Epoch 20, Accuracy: 98.22%, Avg Loss: 0.0068
## Compilation

```bash
gcc -o nn nn.c -lm
gcc -O3 -march=native -ffast-math -o nn nn.c -lm
```

## Usage
Expand Down
105 changes: 67 additions & 38 deletions nn.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@
#define INPUT_SIZE 784
#define HIDDEN_SIZE 256
#define OUTPUT_SIZE 10
#define LEARNING_RATE 0.001f
#define LEARNING_RATE 0.0005f
#define MOMENTUM 0.9f
#define EPOCHS 20
#define BATCH_SIZE 64
#define IMAGE_SIZE 28
#define TRAIN_SPLIT 0.8
#define PRINT_INTERVAL 1000

#define TRAIN_IMG_PATH "data/train-images.idx3-ubyte"
#define TRAIN_LBL_PATH "data/train-labels.idx1-ubyte"

typedef struct {
float *weights, *biases;
float *weights, *biases, *weight_momentum, *bias_momentum;
int input_size, output_size;
} Layer;

Expand Down Expand Up @@ -49,40 +51,67 @@ void init_layer(Layer *layer, int in_size, int out_size) {
layer->output_size = out_size;
layer->weights = malloc(n * sizeof(float));
layer->biases = calloc(out_size, sizeof(float));
layer->weight_momentum = calloc(n, sizeof(float));
layer->bias_momentum = calloc(out_size, sizeof(float));

for (int i = 0; i < n; i++)
layer->weights[i] = ((float)rand() / RAND_MAX - 0.5f) * 2 * scale;
}

void forward(Layer *layer, float *input, float *output) {
for (int i = 0; i < layer->output_size; i++) {
for (int i = 0; i < layer->output_size; i++)
output[i] = layer->biases[i];
for (int j = 0; j < layer->input_size; j++)
output[i] += input[j] * layer->weights[j * layer->output_size + i];

for (int j = 0; j < layer->input_size; j++) {
float in_j = input[j];
float *weight_row = &layer->weights[j * layer->output_size];
for (int i = 0; i < layer->output_size; i++) {
output[i] += in_j * weight_row[i];
}
}

for (int i = 0; i < layer->output_size; i++)
output[i] = output[i] > 0 ? output[i] : 0;
}


void backward(Layer *layer, float *input, float *output_grad, float *input_grad, float lr) {
for (int i = 0; i < layer->output_size; i++) {
if (input_grad) {
for (int j = 0; j < layer->input_size; j++) {
int idx = j * layer->output_size + i;
float grad = output_grad[i] * input[j];
layer->weights[idx] -= lr * grad;
input_grad[j] = 0.0f;
float *weight_row = &layer->weights[j * layer->output_size];
for (int i = 0; i < layer->output_size; i++) {
input_grad[j] += output_grad[i] * weight_row[i];
}
}
}

for (int j = 0; j < layer->input_size; j++) {
float in_j = input[j];
float *weight_row = &layer->weights[j * layer->output_size];
float *momentum_row = &layer->weight_momentum[j * layer->output_size];
for (int i = 0; i < layer->output_size; i++) {
float grad = output_grad[i] * in_j;
momentum_row[i] = MOMENTUM * momentum_row[i] + lr * grad;
weight_row[i] -= momentum_row[i];
if (input_grad)
input_grad[j] += output_grad[i] * layer->weights[idx];
input_grad[j] += output_grad[i] * weight_row[i];
}
layer->biases[i] -= lr * output_grad[i];
}

for (int i = 0; i < layer->output_size; i++) {
layer->bias_momentum[i] = MOMENTUM * layer->bias_momentum[i] + lr * output_grad[i];
layer->biases[i] -= layer->bias_momentum[i];
}
}

void train(Network *net, float *input, int label, float lr) {
float hidden_output[HIDDEN_SIZE], final_output[OUTPUT_SIZE];

float* train(Network *net, float *input, int label, float lr) {
static float final_output[OUTPUT_SIZE];
float hidden_output[HIDDEN_SIZE];
float output_grad[OUTPUT_SIZE] = {0}, hidden_grad[HIDDEN_SIZE] = {0};

forward(&net->hidden, input, hidden_output);
for (int i = 0; i < HIDDEN_SIZE; i++)
hidden_output[i] = hidden_output[i] > 0 ? hidden_output[i] : 0; // ReLU

forward(&net->output, hidden_output, final_output);
softmax(final_output, OUTPUT_SIZE);

Expand All @@ -95,15 +124,14 @@ void train(Network *net, float *input, int label, float lr) {
hidden_grad[i] *= hidden_output[i] > 0 ? 1 : 0; // ReLU derivative

backward(&net->hidden, input, hidden_grad, NULL, lr);

return final_output;
}

int predict(Network *net, float *input) {
float hidden_output[HIDDEN_SIZE], final_output[OUTPUT_SIZE];

forward(&net->hidden, input, hidden_output);
for (int i = 0; i < HIDDEN_SIZE; i++)
hidden_output[i] = hidden_output[i] > 0 ? hidden_output[i] : 0; // ReLU

forward(&net->output, hidden_output, final_output);
softmax(final_output, OUTPUT_SIZE);

Expand Down Expand Up @@ -163,10 +191,13 @@ void shuffle_data(unsigned char *images, unsigned char *labels, int n) {
}
}


int main() {
Network net;
InputData data = {0};
float learning_rate = LEARNING_RATE, img[INPUT_SIZE];
clock_t start, end;
double cpu_time_used;

srand(time(NULL));

Expand All @@ -182,24 +213,14 @@ int main() {
int test_size = data.nImages - train_size;

for (int epoch = 0; epoch < EPOCHS; epoch++) {
start = clock();
float total_loss = 0;
for (int i = 0; i < train_size; i += BATCH_SIZE) {
for (int j = 0; j < BATCH_SIZE && i + j < train_size; j++) {
int idx = i + j;
for (int k = 0; k < INPUT_SIZE; k++)
img[k] = data.images[idx * INPUT_SIZE + k] / 255.0f;

train(&net, img, data.labels[idx], learning_rate);

float hidden_output[HIDDEN_SIZE], final_output[OUTPUT_SIZE];
forward(&net.hidden, img, hidden_output);
for (int k = 0; k < HIDDEN_SIZE; k++)
hidden_output[k] = hidden_output[k] > 0 ? hidden_output[k] : 0; // ReLU
forward(&net.output, hidden_output, final_output);
softmax(final_output, OUTPUT_SIZE);

total_loss += -logf(final_output[data.labels[idx]] + 1e-10f);
}
for (int i = 0; i < train_size; i++) {
for (int k = 0; k < INPUT_SIZE; k++)
img[k] = data.images[i * INPUT_SIZE + k] / 255.0f;

float* final_output = train(&net, img, data.labels[i], learning_rate);
total_loss += -logf(final_output[data.labels[i]] + 1e-10f);
}
int correct = 0;
for (int i = train_size; i < data.nImages; i++) {
Expand All @@ -208,15 +229,23 @@ int main() {
if (predict(&net, img) == data.labels[i])
correct++;
}
printf("Epoch %d, Accuracy: %.2f%%, Avg Loss: %.4f\n", epoch + 1, (float)correct / test_size * 100, total_loss / train_size);
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;

printf("Epoch %d, Accuracy: %.2f%%, Avg Loss: %.4f, Time: %.2f seconds\n",
epoch + 1, (float)correct / test_size * 100, total_loss / train_size, cpu_time_used);
}

free(net.hidden.weights);
free(net.hidden.biases);
free(net.hidden.weight_momentum);
free(net.hidden.bias_momentum);
free(net.output.weights);
free(net.output.biases);
free(net.output.weight_momentum);
free(net.output.bias_momentum);
free(data.images);
free(data.labels);

return 0;
}
}

0 comments on commit 271499b

Please sign in to comment.