// // BackPropagation.cpp //version 2.0 //This code uses backpropagation to train a feedfoward artificial neural network. // //The code is organized around the node as the fundamental object, and relies on: //- a generic Node class, declared as a class template in Node.h; //- Node member functions defined in Node.cpp; //- a derived FeedforwardNode abstract base class declared in FeedforwardNode.h, // with member functions defined in FeedforwardNode.cpp; and, //- a derived (from the FeedforwardNode class) BackpropagationNode class // declared in BackpropagationNode.h, with member functions defined in // BackpropagationNode.cpp. // //Note that I have chosen object orientation at the expense of speed wherever a //choice was to be made. // //Use: //The user specifies a data file containing training vectors of the form //(input1, input2, ... , inputn, output1, output2, ... , outputn). The file //must be ASCII text format, with spaces separating the components of each //vector, and hard returns separating the vectors. //The user also specifies the network architecture: number of hidden layers, // number of nodes. // //The training vectors are split (80/20) into a training set and a test set, //with training "goodness" measured as the mean square error over the test set. //(Thus, the test set is not true test set, but an extension of the training set.) // //At the completion of training, based either on attaining the target mean square //error, or completing a specified number of training epochs, the final weights //are written into a file entitled "trainw.dat". // // //NB: This is a work in progress. Subsequent versions will allow addition/deletion //of connections in an established network, and automated network pruning. // //Copyright 1998 Michael J. Wax //You may use this code as is, or incorporate it in another program, //as long as proper attribution is given. However, I make no warranties, //express or implied, regarding the performance of this code, its freedom from //error, or its suitability for use in a given application. //Questions or suggestions? Contact me at neural@michaelwax.com // // #include #include #include #include #include "BackpropagationNode.cpp" #include "DataVector.cpp" #include "Weight.cpp" using std::cin; using std::cout; using std::endl; using std::ifstream; using std::ofstream; using std::ios; using std::srand; using std::time; int main () { srand((unsigned)time(NULL)); //seed random number generator //establish name of data file and number of training vectors char file_name[18]; int numberVectors; cout << "Name of file containing data? "; cin >> file_name; cout << "Number of training vectors? "; cin >> numberVectors; //what is the desired network structure? int totalNodes = 0; int numberHiddenLayers; int numberIn, *numberHidden, numberOut; //node counts in each layer cout << "Network Structure:" << endl; cout << " Number of input nodes? "; cin >> numberIn; cout << " Number of hidden layers? "; cin >> numberHiddenLayers; numberHidden = new int[numberHiddenLayers]; for (int counter = 0; counter < numberHiddenLayers; ++counter) { cout << " Number of hidden nodes in layer " << (counter+1) << "? "; cin >> numberHidden[counter]; totalNodes += numberHidden[counter]; } cout << " Number of output nodes? "; cin >> numberOut; totalNodes += numberOut; //---------------------set up the network---------------------------- //The nodes are stored as a one-dimensional array. // BackpropagationNode *node; node = new BackpropagationNode[totalNodes]; //----------------set up connections between nodes-------------------- //start by setting up a fully connected network //remember that the pointers to input nodes on the first hidden layer nodes are // left as null pointers BackpropagationNode *nullNode; nullNode = 0; //-----set up connections on hidden nodes first------ int nodeCounter = -1; //keep track of how many nodes we've done for (int counter = 0; counter < numberHiddenLayers; ++counter) { for (int counter2 = 0; counter2 < numberHidden[counter]; ++counter2) { ++nodeCounter; if (numberHiddenLayers == 1) { //there's only one hidden layer node[nodeCounter].initialize(numberIn, numberOut); for (int counter3 = 0; counter3 < numberIn; ++counter3) { //input connections node[nodeCounter].initializeInputConnection(*nullNode); } for (int counter3 = 0; counter3 < numberOut; ++counter3) { //output connections node[nodeCounter].initializeOutputConnection(node[(totalNodes-numberOut+counter3)]); } } else { if (counter == 0) { //there's more than one hidden layer, but we're on the first one node[nodeCounter].initialize(numberIn, numberHidden[1]); for (int counter3 = 0; counter3 < numberIn; ++counter3) { //input connections node[nodeCounter].initializeInputConnection(*nullNode); } for (int counter3 = 0; counter3 < numberHidden[1]; ++counter3) { //output connections node[nodeCounter].initializeOutputConnection(node[(numberHidden[0]+counter3)]); } } else { if (counter == (numberHiddenLayers-1)) { //there's more than one hidden layer, but we're on the last one node[nodeCounter].initialize(numberHidden[counter-1], numberOut); for (int counter3 = 0; counter3 < numberHidden[counter-1]; ++counter3) { node[nodeCounter].initializeInputConnection(node[(nodeCounter-counter2-numberHidden[counter-1]+counter3)]); } for (int counter3 = 0; counter3 < numberOut; ++counter3) { node[nodeCounter].initializeOutputConnection(node[(totalNodes-numberOut+counter3)]); } } else { //we're somewhere in the middle of a bunch of hidden layers node[nodeCounter].initialize(numberHidden[counter-1], numberHidden[counter+1]); for (int counter3 = 0; counter3 < numberHidden[counter-1]; ++counter3) { node[nodeCounter].initializeInputConnection(node[(nodeCounter-counter2-numberHidden[counter-1]+counter3)]); } for (int counter3 = 0; counter3 < numberHidden[1]; ++counter3) { //output connections node[nodeCounter].initializeOutputConnection(node[(numberHidden[0]+counter3)]); } } } } } } //this seems too difficult - the Node class or one of its derived classes probably should be //rewritten to make setting up connections easier //-------set up connections on output nodes------- for (int counter = (totalNodes-numberOut); counter < totalNodes; ++counter) { //remember - no output connections on these nodes - they're at the top of the network node[counter].initialize(numberHidden[numberHiddenLayers-1], 0); for (int counter2 = 0; counter2 < numberHidden[numberHiddenLayers-1]; ++counter2) { node[counter].initializeInputConnection(node[(totalNodes-numberOut-numberHidden[numberHiddenLayers-1]+counter2)]); } } //---------------open data file and read in training vectors----------------- cout << endl << "Reading in training vectors . . ." << endl; DataVector *trainingVector; trainingVector = new DataVector[numberVectors]; ifstream training_input (file_name, ios::in); for (int counter = 0; counter < numberVectors; ++counter) { trainingVector[counter].initialize(numberIn, numberOut); for (int counter2 = 0; counter2 < numberIn; ++counter2) { training_input >> trainingVector[counter].vinput[counter2]; } for (int counter2 = 0; counter2 < numberOut; ++counter2) { training_input >> trainingVector[counter].voutput[counter2]; } } training_input.close(); //find out what acceptable level of error, and how long to run training double meanSquareError = 10000, maximumError, minimumError = 10000; int maximumEpochs, numberEpochs = 0, minimumErrorEpoch = 0; cout << endl << "What is the maximum acceptable mean square error? "; cin >> maximumError; cout << "Try how many training epochs if the maximum acceptable error is not reached? "; cin >> maximumEpochs; //use 80% of data for training with backpropagation; //reserve 20% for testing of the mean square error //(i.e., keep from memorizing the training data) int numberTrain = int(0.8 * numberVectors); cout << endl << numberTrain << " training vectors; " << (numberVectors-numberTrain) << " test vectors." << endl; //-------------------------------------------------------------------------- //BEGIN TRAINING cout << "Beginning training . . ." << endl; while (numberEpochs < maximumEpochs) { ++numberEpochs; //adjust weights over each set of input values for (int master = 0; master < numberTrain; ++master) { //calculate network output for (int counter = 0; counter < numberHidden[0]; ++counter) { node[counter].firstHiddenLayerActivation(trainingVector[master].vinput); } for (int counter = numberHidden[0]; counter < totalNodes; ++counter) { node[counter].activation(); } //determine error in output of each node for (int counter = 0; counter < numberOut; ++counter) { node[(totalNodes-numberOut+counter)].calculateOutputNodeError(trainingVector[master].voutput[counter]); } for (int counter = 0; counter < (totalNodes - numberOut); ++counter) { node[counter].calculateError(); } //adjust node weights for (int counter = 0; counter < numberHidden[0]; ++counter) { node[counter].adjustFirstHiddenLayerNodeWeight(trainingVector[master].vinput, 0.7, 0.5); } for (int counter = numberHidden[0]; counter < totalNodes; ++counter) { node[counter].adjustWeight(); } } //we've gone through all of the training vectors //calculate mean squared error for training and test sets, // and end training if acceptable meanSquareError = 0; for (int master = numberTrain; master < numberVectors; ++master) { for (int counter = 0; counter < numberHidden[0]; ++counter) { node[counter].firstHiddenLayerActivation(trainingVector[master].vinput); } for (int counter = numberHidden[0]; counter < totalNodes; ++counter) { node[counter].activation(); } for (int counter = 0; counter < numberOut; ++counter) { meanSquareError += (trainingVector[master].readOutputVector(counter) - node[(totalNodes-numberOut+counter)].readActivation()) * (trainingVector[master].readOutputVector(counter) - node[(totalNodes-numberOut+counter)].readActivation()); } } meanSquareError /= (numberVectors - numberTrain); //how big is the mean square error? //if we reach the user's target, we're done if (meanSquareError < maximumError) break; //otherwise, let the user know the current error cout << "Epoch " << numberEpochs << " mean square error = " << meanSquareError << endl; if (meanSquareError < minimumError) { minimumError = meanSquareError; minimumErrorEpoch = numberEpochs; } } //when we reach here, training is over //first, determine why we're here: have we succeeded in lowering error enough? if (meanSquareError < maximumError) { cout << "\nTarget maximum acceptable error achieved.\nFinal mean square error " << meanSquareError << endl; } else { cout << "Target maximum acceptable error not achieved." << endl << "Minimum error of " << minimumError << " reached after " << minimumErrorEpoch << " training epochs." << endl; } //print final weights, and write weights to file ofstream weights_output ("trainw.dat", ios::out); nodeCounter = -1; for (int counter = 0; counter < numberHiddenLayers; ++counter) { cout << "Hidden layer " << (counter+1) << ":" << endl; for (int counter2 = 0; counter2 < numberHidden[counter]; ++counter2) { ++nodeCounter; //we're on the next node for (int counter3 = 0; counter3 < node[nodeCounter].readNumberInputConnections(); ++counter3) { weights_output << node[nodeCounter].readInputConnectionWeight(counter3) << endl; cout << " Weight (" << counter2 << "," << counter3 << ") = "<< node[nodeCounter].readInputConnectionWeight(counter3) << endl; } } } for (int counter = (totalNodes-numberOut); counter < totalNodes; ++counter) { for (int counter2 = 0; counter2 < node[counter].readNumberInputConnections(); ++counter2) { weights_output << node[counter].readInputConnectionWeight(counter2) << endl; cout << "Output weight (" << counter << "," << counter2 << ") = "<< node[counter].readInputConnectionWeight(counter2) << endl; } } weights_output.close(); //print out calculated output values vs. actual output values for (int master = 0; master < numberVectors; ++master) { cout << "Vector " << master << ": "; //calculate network output for (int counter = 0; counter < numberHidden[0]; ++counter) { node[counter].firstHiddenLayerActivation(trainingVector[master].vinput); } for (int counter = numberHidden[0]; counter < totalNodes; ++counter) { node[counter].activation(); } //display output values for (int counter = 0; counter < numberOut; ++counter) { cout << node[(totalNodes-numberOut+counter)].readActivation() << ", " << trainingVector[master].voutput[counter] << "; "; } cout << endl; } return 0; } //end main