#include "neural.h" void printMatrix(matrix *m){ printf("%dx%d\n", m->rows, m->cols); for(int i = 0; i < m->rows; ++i){ for(int j = 0; j < m->cols; ++j){ printf("%.3Lf ", m->data[i][j]); } printf("\n"); } } void printLayer(layer *l){ printf("function: %d, inputs: %d, nneurons: %d\n", l->function, l->inputs, l->nneurons); printf("Weights\n"); printMatrix(l->weights); printf("Bias\n"); printMatrix(l->bias); printf("Neurons\n"); printMatrix(l->neurons); } void printNet(net *n){ printf("learningrate: %.3Lf, inputs: %d, outputs, %d, nlayers: %d\n", n->learningrate, n->inputs, n->outputs, n->nlayers); printf("input:\n"); printMatrix(n->input); for(int i = 0; i < n->nlayers; ++i){ printf("Layer %d:\n", i); printLayer(n->layers[i]); } } static long double linear(long double n){ return n; } static long double derivedLinear(long double n){ return 0.0; } static long double ReLu(long double n){ return fmaxl(0.0, n); } static long double derivedReLu(long double n){ return n > 0; } static long double sigmoid(long double n){ return 1/(1 + expl(-n)); } static long double derivedSigmoid(long double n){ return n*(1 - n); } static long double derivedTanhl(long double n){ return 1 - tanhl(n)*tanhl(n); } static long double he(long double inputs){ long long int scale = 10000000000; int r = rand()%(int)(sqrtl(2.0/inputs)*scale); return (long double)(r/scale); } static long double xavier(long double inputs){ } static long double (*functions[])(long double) = { linear, ReLu, sigmoid, tanhl, }; static long double (*derivedFunctions[])(long double) = { derivedLinear,derivedReLu, derivedSigmoid, derivedTanhl, }; static long double placeholder(long double n){ long double high = 1/sqrtl(n), low = (-1)/sqrtl(n); long double difference = high - low; // The difference between the two int scale = 10000; int scaled_difference = (int)(difference * scale); return low + (1.0 * (rand() % scaled_difference) / scale); } // Rework void initializeLayer(layer *l){ srand(time(NULL)); // TODO implement different initialization functions (he, xavier) for(int i = 0; i < l->weights->rows; ++i){ for(int j = 0; j < l->weights->cols; ++j){ l->weights->data[i][j] = placeholder(l->nneurons); } } } static layer *newLayer(FUNCTIONS function, int inputs, int nneurons){ layer *l = malloc(sizeof(layer)); l->function = function, l->inputs = inputs; l->nneurons = nneurons; l->weights = newMatrix(inputs, nneurons); initializeLayer(l); l->bias = newMatrix(1, nneurons); fillMatrix(l->bias, 0); l->neurons = newMatrix(1, nneurons); fillMatrix(l->neurons, 0); return l; } static void freeLayer(layer **l){ freeMatrix(&(*l)->weights); freeMatrix(&(*l)->bias); freeMatrix(&(*l)->neurons); free(*l); l = NULL; } static void saveLayer(layer *l, FILE *fp){ char header = 'L'; fwrite(&header, sizeof(char), 1, fp); fwrite(&l->function, sizeof(int), 1, fp); fwrite(&l->inputs, sizeof(int), 1, fp); fwrite(&l->nneurons, sizeof(int), 1, fp); saveMatrix(l->weights, fp); saveMatrix(l->bias, fp); saveMatrix(l->neurons, fp); char end = 'E'; fwrite(&end, sizeof(char), 1, fp); } static layer *loadLayer(FILE *fp){ char header; fread(&header, sizeof(char), 1, fp); if(header != 'L'){ fprintf(stderr, "Header is '%c' not 'L'\n", header); exit(EXIT_FAILURE); } FUNCTIONS function; int inputs, nneurons; fread(&function, sizeof(int), 1, fp); fread(&inputs, sizeof(int), 1, fp); fread(&nneurons, sizeof(int), 1, fp); layer *l = malloc(sizeof(layer)); l->function = function; l->inputs = inputs; l->nneurons = nneurons; l->weights = loadMatrix(fp); l->bias = loadMatrix(fp); l->neurons = loadMatrix(fp); char end; fread(&end, sizeof(char), 1, fp); if(end != 'E'){ fprintf(stderr, "End is '%c' not 'E'\n", end); exit(EXIT_FAILURE); } return l; } net *newNet(FUNCTIONS function, long double learningrate, int inputs, int outputs, int nlayers, ...){ // TODO check if outputs == last layer net *n = malloc(sizeof(net)); n->learningrate = learningrate; n->inputs = inputs; n->outputs = outputs; n->nlayers = nlayers; n->input = newMatrix(1, inputs); fillMatrix(n->input, 1); n->layers = malloc(nlayers*sizeof(layer*)); va_list layers; va_start(layers, nlayers); for(int i = 0; i < nlayers; ++i){ int size = va_arg(layers, int); n->layers[i] = newLayer(function, inputs, size); inputs = size; } va_end(layers); return n; } void freeNet(net **n){ freeMatrix(&(*n)->input); for(int i = 0; i < (*n)->nlayers; ++i){ freeLayer(&(*n)->layers[i]); } free((*n)->layers); (*n)->layers = NULL; free(*n); n = NULL; } void saveNet(net *n, FILE *fp){ char header = 'N'; fwrite(&header, sizeof(char), 1, fp); fwrite(&n->learningrate, sizeof(long double), 1, fp); fwrite(&n->inputs, sizeof(int), 1, fp); fwrite(&n->outputs, sizeof(int), 1, fp); fwrite(&n->nlayers, sizeof(int), 1, fp); saveMatrix(n->input, fp); for(int i = 0; i < n->nlayers; ++i){ saveLayer(n->layers[i], fp); } char end = 'E'; fwrite(&end, sizeof(char), 1, fp); } net *loadNet(FILE *fp){ char header; fread(&header, sizeof(char), 1, fp); if(header != 'N'){ fprintf(stderr, "Header is '%c' not 'N'\n", header); exit(EXIT_FAILURE); } long double learningrate; int inputs, outputs, nlayers; fread(&learningrate, sizeof(long double), 1, fp); fread(&inputs, sizeof(int), 1, fp); fread(&outputs, sizeof(int), 1, fp); fread(&nlayers, sizeof(int), 1, fp); net *n = malloc(sizeof(net)); n->learningrate = learningrate; n->inputs = inputs; n->outputs = outputs; n->nlayers = nlayers; n->input = loadMatrix(fp); n->layers = malloc(nlayers*sizeof(layer*)); for(int i = 0; i < nlayers; ++i){ n->layers[i] = loadLayer(fp); } char end; fread(&end, sizeof(char), 1, fp); if(end != 'E'){ fprintf(stderr, "End is '%c' not 'E'\n", end); exit(EXIT_FAILURE); } return n; } static void applyFunction(func function, matrix *m){ for(int i = 0; i < m->rows; ++i){ for(int j = 0; j < m->cols; ++j){ m->data[i][j] = function(m->data[i][j]); } } } static void propagateLayer(layer *l, matrix *inputs){ matrix *m = multiplyMatrices(inputs, l->weights); matrix *a = addMatrices(m, l->bias); freeMatrix(&m); copyMatrix(l->neurons, a); freeMatrix(&a); applyFunction(functions[l->function], l->neurons); } matrix *propagate(net *n, matrix *input){ n->input = input; for(int i = 0; i < n->nlayers; ++i){ propagateLayer(n->layers[i], input); input = n->layers[i]->neurons; } return n->layers[n->nlayers-1]->neurons; } void backPropagate(net *n, matrix *expected){ matrix **errors = malloc(n->nlayers*sizeof(matrix*)); matrix *corrected = subtractMatrices(expected, n->layers[n->nlayers-1]->neurons); for(int i = n->nlayers-1; i >= 0; --i){ matrix *derived = cloneMatrix(n->layers[i]->neurons); applyFunction(derivedFunctions[n->layers[i]->function], derived); errors[i] = HadamardProduct(corrected, derived); freeMatrix(&corrected); freeMatrix(&derived); matrix *transposedWeights = transpose(n->layers[i]->weights); corrected = multiplyMatrices(errors[i], transposedWeights); freeMatrix(&transposedWeights); } matrix *lastOutput = n->input; for(int i = 0; i < n->nlayers; ++i){ matrix *transposedOutput = transpose(lastOutput); multiplyMatrix(transposedOutput, n->learningrate); matrix *weightChangeMatrix = multiplyMatrices(transposedOutput, errors[i]); freeMatrix(&transposedOutput); matrix *t = addMatrices(n->layers[i]->weights, weightChangeMatrix); freeMatrix(&weightChangeMatrix); copyMatrix(n->layers[i]->weights, t); freeMatrix(&t); multiplyMatrix(errors[i], n->learningrate); t = addMatrices(n->layers[i]->bias, errors[i]); copyMatrix(n->layers[i]->bias, t); freeMatrix(&t); lastOutput = n->layers[i]->neurons; } lastOutput = NULL; for(int i = 0; i < n->nlayers; ++i){ freeMatrix(&errors[i]); } free(errors); errors = NULL; freeMatrix(&corrected); } void feedData(matrix *m, long double array[m->rows][m->cols]){ for(int i = 0; i < m->rows; ++i){ for(int j = 0; j < m->cols; ++j){ m->data[i][j] = array[i][j]; } } } matrix *imageToInput(image *im){ matrix *r = newMatrix(1, im->img->rows*im->img->cols); int l = 0; for(int i = 0; i < im->img->rows; ++i){ for(int j = 0; j < im->img->cols; ++j){ r->data[0][l++] = im->img->data[i][j]; } } return r; } static int maxOutputs(matrix *out){ long double max = 0; int mi = -1; for(int i = 0; i < 10; ++i){ if(out->data[0][i] > max){ max = out->data[0][i]; mi = i; } } return mi; } int main(){ matrix **expectedOutputs = malloc(10*sizeof(matrix*)); for(int i = 0; i < 10; ++i){ expectedOutputs[i] = newMatrix(1, 10); fillMatrix(expectedOutputs[i], 0); expectedOutputs[i]->data[0][i] = 1; } // Training net *n = newNet(SIGMOID, 1, 784, 10, 2, 300, 10); int training = 0; FILE *trainFP = fopen("mnist_train.csv", "r"); char line[MAXCHARS]; fgets(line, MAXCHARS, trainFP); /*while(!feof(trainFP)){ image *im = loadCSV(trainFP); propagate(n, imageToInput(im)); backPropagate(n, expectedOutputs[im->label]); if(training%100 == 0){ printf("\nTrained %d\n", training+1); printMatrix(n->layers[n->nlayers-1]->neurons); printf("\n"); } freeImage(&im); ++training; } fclose(trainFP); printMatrix(n->input); FILE *fp = fopen("nn", "wb"); saveNet(n, fp); fclose(fp);*/ freeNet(&n); FILE *fp = fopen("nn", "rb"); n = loadNet(fp); fclose(fp); // Testing int testing = 0; FILE *testFP = fopen("mnist_test.csv", "r"); //char line[MAXCHARS]; fgets(line, MAXCHARS, testFP); int count = 0; while(!feof(testFP)){ image *im = loadCSV(testFP); matrix *in = imageToInput(im); matrix *expectedOutput = expectedOutputs[im->label]; matrix *output = propagate(n, in); if(testing%300 == 0){ printf("Tested %d so far\n", testing+1); } /*printImage(im); printf("Expected:\n"); printMatrix(expectedOutput); printf("Got:\n"); printMatrix(output); printf("Max output: %d\n", maxOutputs(output));*/ if(maxOutputs(output) == im->label){ count++; } freeMatrix(&in); output = NULL; freeImage(&im); ++testing; //if(testing == 10) // break; } fclose(testFP); long double success = ((long double)count/testing)*100; printf("success rate: %Lf (%d/%d)\n", success, count, testing); return 0; }