/****************************************************************************/ /* */ /* C implementation of the Recurrent Cascade 2 learning algorithm. The */ /* candidate training phase is all new. No more correlation. Each */ /* candidate has a set of input weights and a set of output weights. */ /* We try to adjust both sets of weights to minimize the sum-squared */ /* difference between the candidate's weighted outputs and the residual */ /* error in the active net at the corresponding outputs. */ /* */ /* This version was put together by Peter McCluskey (pcm@rahul.net; */ /* pcm@world.std.com) from the Recurrent Cascade Correlation code written by*/ /* Conor Doherty (version dated Sept-25-91) and the Cascade 2 code written */ /* by P. Michael Kingsley (version dated Jan-27-93, which were based on */ /* code written by R. Scott Crowder, III and Scott E. Fahlman. */ /* */ /* This code has been placed in the public domain by the author. As a */ /* matter of simple courtesy, anyone using or adapting this code is */ /* expected to acknowledge the source. */ /* */ /* For an explanation of original algorithm and some results, see "The */ /* Cascade-Correlation Learning Architecture" by Scott E. Fahlman and */ /* Christian Lebiere in D. S. Touretzky (ed.), "Advances in Neural */ /* Information Processing Systems 2", Morgan Kaufmann, 1990. A somewhat */ /* longer version is available as CMU Computer Science Tech Report */ /* CMU-CS-90-100. */ /* For an explanation of the recurrent algorithm and some results, see "The*/ /* Recurrent Cascade-Correlation Learning Architecture" by Scott E. */ /* Fahlman in D. S. Touretzky (ed.), "Advances in Neural Information */ /* Processing Systems 3", Morgan Kaufmann, 1991. A somewhat longer */ /* version is available as CMU Computer Science Tech Report */ /* CMU-CS-91-100. */ /* */ /****************************************************************************/ #include #include #include #include #include #include #include #define VERSION 1.00 #define REL_DATE "RC2: May-10-94" /* some stuff to make C code a little more readable */ typedef int BOOLEAN; #ifdef mips /* Pmax compiler is almost ANSI, it just doesn't understand 'void *' */ typedef char *VOIDP; #else typedef void *VOIDP; #endif /*****************************************************************************/ /* Parameter table contains all user settable parameters. It is used by the */ /* input routines to change the values of the named parameters. The list of */ /* parameters must be sorted alphabetically so that the search routine will */ /* work. Parameter names are used as keywords in the search. Keywords are */ /* lower case to speed the comparison process. */ /*****************************************************************************/ struct parmentry {char *keyword; /* variable name in lower case */ int vartype; /* can be INT, FLOAT, or ENUM */ VOIDP varptr; /* cast to correct type before use */ }; typedef struct parmentry PARMS; /* general symbols */ #define TRUE 1 #define FALSE 0 #define BOMB -99 #define LINELEN 80 #define EOL '\0' /* switches used in the interface routines */ #define INT 0 #define FLOAT 1 #define ENUM 2 /* interger values that use #defines */ #define BOOLE 3 #define GETTRAINING 4 #define GETTEST 5 #define GO 6 #define INT_NO 7 /* parameters only good in netfile */ #define FLOAT_NO 8 /* most are used in memory allocation */ #define ENUM_NO 9 /* and cannot be changed mid-simulation */ #define BOOLE_NO 10 #define VALUE 11 #define GETTRAININGFILE 12 #define GETTESTFILE 13 #define SAVE 14 #define INITFILE 15 #define NEXTLINE 0 #define FAILURE -1 /* switch constants */ #define SIGMOID 0 #define GAUSSIAN 1 #define LINEAR 2 #define ASYMSIGMOID 3 #define VARSIGMOID 4 #define WIN 20 #define STAGNANT 21 #define TIMEOUT 22 #define LOSE 23 #define BITS 30 #define INDEX 31 #define FATAL 0 #define WARN 1 /* Allocate global storage */ /***********************************************************************/ /* Assorted Parameters. */ /* These parameters and switches control the quickprop learning */ /* algorithm used to train the output weights. */ /***********************************************************************/ BOOLEAN OvershootOK; /* If TRUE, candidates incur no penalty for */ /* overshooting the error. Used when reuslts*/ /* will be fed into a sigmoid output layer. */ int UnitType; /* hidden unit type can be SIGMOID or GAUSIAN*/ int OutputType; /* output unit type can be SIGMOID or LINEAR */ float SigmoidMax; /* Maximum output vaule for sigmoid units. Used */ /* to alter sigmoid range without having to edit */ /* training values. Use the symbols "min" and */ /* "max" in the input file. The input routines */ /* will translate to the appropriate float values.*/ float SigmoidMin; /* Minimum output vaule for sigmoid units. */ float WeightRange; /* Random-init weights in range [-WR,+WR] */ float OutPrimeOffset; /* Add to output unit's activation fns to kill flat spots */ float ActPrimeOffset; /* Add to candidate unit's activation fns to kill flat spots */ float OutputMu; /* Mu used to quickprop train output weights. */ float OutputShrinkFactor; /* Used in computing whether the proposed step is */ /* too large. Related to OutputMu. */ float OutputEpsilon; /* Controls the amount of linear gradient descent */ /* to use in updating output weights. */ float OutputDecay; /* This factor times the current weight is added */ /* to the slope at the start of each output epoch */ /* Keeps weights from growing too big. */ int OutputPatience; /* If we go for this many epochs with no real */ /* change, it's time to stop tuning. */ float OutputChangeThreshold; /* The error must change by at least this */ /* fraction of its old value to count as a */ /* significant change. */ float CandInputMu; /* Mu used to quickprop train input weights to candidates. */ float CandInputShrinkFactor; /* Used in computing whether the proposed step is */ /* too large. Related to CandInputMu. */ float CandInputEpsilon; /* Controls the amount of linear gradient descent */ /* to use in updating candidate input weights. */ float CandInputDecay; /* This factor times the current candidate input */ /* weight is added to the slope at the start of */ /* each candidate training epoch. Keeps weights */ /* from growing too big. */ float CandOutputMu; /* Mu used to quickprop train output weights to candidates. */ float CandOutputShrinkFactor; /* Used in computing whether the proposed step is */ /* too large. Related to CandOutputMu. */ float CandOutputEpsilon; /* Controls the amount of linear gradient descent */ /* to use in updating candidate output weights. */ float CandOutputDecay; /* This factor times the current candidate output */ /* weight is added to the slope at the start of */ /* each candidate training epoch. Keeps weights */ /* from growing too big. */ int CandPatience; /* If we go for this many epochs with no real */ /* change, it's time to stop tuning the candidates.*/ float CandChangeThreshold; /* The error must change by at least this */ /* fraction of its old value to count as a */ /* significant change. */ /***********************************************************************/ /* Variables related to error. */ /***********************************************************************/ float ErrorThreshold; /* This close to desired value => bit is correct */ int ErrorBits; /* Total # bits in epoch that were wrong */ float SumSqError; /* The sum-squared error accumulated over all */ /* outputs and all training cases. */ float BestCandidateScore; /* Best score of all candidate units. */ int BestCandidate; /* Index of the candidate unit with best score. */ /***********************************************************************/ /* These variables and switches control the simulation and display. */ /***********************************************************************/ BOOLEAN UseCache; /* If TRUE, cache the forward-pass values instead */ /* of repeatedly computing them. */ int Epoch; /* Current epoch number */ BOOLEAN Graphics; /* If TRUE, print progress after each epoch. */ BOOLEAN NonRandomSeed; /* TRUE => use 1 as the seed for the random */ /* number generator. Useful when comparing */ /* different parameter settings. FALSE => use */ /* system clock to start random sequence. */ BOOLEAN Test; /* If TRUE, run a test epoch and print the result */ /* after each round of output tuning. */ BOOLEAN SinglePass; /* TRUE => Pause after forward/backward cycle */ BOOLEAN SingleEpoch; /* TRUE => Pause after each training epoch */ BOOLEAN Step; /* Turned to TRUE after each pause, briefly */ int Trial; /* Current trial number, used in log outputs */ /***********************************************************************/ /* The sets of training inputs and outputs. */ /***********************************************************************/ int NTrainingPatterns; /* !! Not in Lisp version. Needed here. */ int NTestPatterns; /* !! Not in Lisp version. Needed here. */ float **TrainingInputs; float **TrainingOutputs; float *Goal; /* Goal vector for the current training or */ /* testing case. */ char *T_T_files; /* Pointer to Training or Test filenames */ /* in input line updated by PROCESS_LINE, */ /* each time the user needs a file for input */ /* of training or test data. */ /***************************************************************************/ /* For some benchmarks there is a separate set of values used for testing */ /* the network's ability to generalize. These values are not used during */ /* training. */ /***************************************************************************/ float **TestInputs; float **TestOutputs; /***************************************************************************/ /* */ /* Fundamental data structures. */ /* */ /* Unit outputs and weights are floats. */ /* */ /* Instead of representing each unit by a structure, we represent the */ /* unit by a int. This is used to index into various arrays that hold */ /* per-unit information, such as the activation value of each unit. */ /* */ /* Per-connection information for each connection COMING INTO unit is */ /* stored in a array of arrays. The outer array is indexed by the unit */ /* number, and the inner array is then indexed by connection number. */ /* */ /* Unit 0 is always at a maximum-on value. Connections from this unit */ /* supply a bias. Next come some input units, then some hidden units. */ /* */ /* Output units have their own separate set of data structures, as do */ /* candidate units whose inputs are currently being trained. */ /***************************************************************************/ int MaxUnits; /* Maximum number of input values and hidden */ /* in the network. */ int Nunits; /* Total number of active units in net */ int Ninputs; /* Number of input units */ int Noutputs; /* Number of output units */ int Ncandidates; /* Number of candidate units trained at once. */ int MaxCases; /* Maximum number of training cases that can be */ /* accommdated by the current data structures. */ int Ncases; /* Number of training cases currently in use. */ /* Assume a contiguous block beginning with */ int FirstCase; /* Address of the first training case in the */ /* currently active set. Usually zero, but may */ /* differ if we are training on different chunks */ /* of the training set at different times. */ /***************************************************************************/ /* The following vectors hold values related to hidden units in the active */ /* net and their input weights. */ /***************************************************************************/ float *Values; /* Current activation value for each unit */ float *PrevValues; /* Previous hidden output values for use in RCC */ float **ValuesCache; /* Holds a distinct Values array for each of the */ /* MaxCases training cases. */ float *ExtraValues; /* Extra Values vector to use when no cache. */ int *Nconnections; /* # of INCOMING connections per unit */ int **Connections; /* C[i][j] lists jth unit projecting to unit i */ float **Weights; /* W[i][j] holds weight of C[i][j] */ /***************************************************************************/ /* The following arrays of arrays hold values for the outputs of the active*/ /* network and the output-side weights. */ /***************************************************************************/ float *Outputs; /* Network output values */ float *Errors; /* Final error value for each unit */ float **ErrorsCache; /* Holds a distinct Errors array for each of the */ /* MaxCases training cases. */ float *ExtraErrors; /* Extra Errors vector to use when no cache. */ float **OutputWeights; /* OW[i][j] holds the weight from hidden unit i */ /* to output unit j */ float **OutputDeltas; /* Change between previous OW and current one */ float **OutputSlopes; /* Partial derivative of TotalError wrt OW[i][j] */ float **OutputPrevSlopes; /* Previous value of OutputSlopes[i][j] */ /***************************************************************************/ /* The following arrays have one entry for each candidate unit in the */ /* pool of trainees. */ /***************************************************************************/ float *CandValues; /* Current output value of each candidate unit. */ float *CandPrevValues; /* Previous Candidate output values for RCC */ float *CandScores; /* A vector holding score for each candidate . */ float **CandInWeights; /* A vector with one entry for each candidate */ /* unit. This entry holds the current input */ /* weights for that candidate unit. */ float **CandInDeltas; /* Input weights deltas for each candidate unit. */ float **CandInSlopes; /* Input weights slopes for each candidate unit. */ float **CandInPrevSlopes; /* Holds the previous values of CandInSlopes. */ float **CandDvDw; /* For storing present values of dv/dw for RCC */ float **CandOutWeights; /* A vector with one entry for each candidate */ /* unit. This entry holds the current output */ /* weights for that candidate unit. */ float **CandOutDeltas; /* Output weights deltas for each candidate unit. */ float **CandOutSlopes; /* Output weights slopes for each candidate unit. */ float **CandOutPrevSlopes; /* Holds the previous values of CandOutSlopes. */ /***************************************************************************/ /* This saves memory if each candidate unit receives a connection from */ /* each existing unit and input. That's always true at present, but may */ /* not be in future. */ /***************************************************************************/ int *AllConnections; /* A standard connection that connects a unit to */ /* all previous units, in order, but not to the */ /* bias unit.*/ /***************************************************************************/ /* ErrorIndex specific globals. Not in release Lisp version */ /***************************************************************************/ int NtrainingOutputValues; /* Number of outputs in the training set. */ int NtestOutputValues; /* Number of outputs in the test set. */ float TrainingStdDev; /* Std Dev of entire training set. Used to*/ /* normalize the ErrorIndex. */ float TestStdDev; float ErrorIndex; /* Normalized error function for continuos */ /* output training sets. */ float ErrorIndexThreshold; /* Stop training when ErrorIndex is < EIT. */ int ErrorMeasure; /* Set to BITS for using ErrorBits to stop */ /* of INDEX to use ErrorIndex to stop. */ /***************************************************************************/ /* Save and plot file related varibles */ /***************************************************************************/ BOOLEAN DumpWeights; /* Are we dumping weights into a file. */ char DumpFileRoot[LINELEN+1]; /* Root of the names for the files */ FILE *WeightFile; /* Contains weights from the current net. */ /**************************************************************************/ /* Recurrent CC switches for scheduled network resets */ /**************************************************************************/ BOOLEAN UseTrainingBreaks; /* reset during training? */ BOOLEAN UseTestBreaks; /* reset during testing? */ BOOLEAN *TrainingBreaks; /* structure containing training reset schedule */ BOOLEAN *TestBreaks; /* structure containing test reset schedule */ /*********************************************************************/ /* keyword table used for updating the simulation parameters without */ /* recompilation. */ /*********************************************************************/ PARMS ParmTable[] = { {"actprimeoffset", FLOAT, (VOIDP)&ActPrimeOffset}, {"candchangethreshold", FLOAT, (VOIDP)&CandChangeThreshold}, {"candinputdecay", FLOAT, (VOIDP)&CandInputDecay}, {"candinputepsilon", FLOAT, (VOIDP)&CandInputEpsilon}, {"candinputmu", FLOAT, (VOIDP)&CandInputMu}, {"candoutputdecay", FLOAT, (VOIDP)&CandOutputDecay}, {"candoutputepsilon", FLOAT, (VOIDP)&CandOutputEpsilon}, {"candoutputmu", FLOAT, (VOIDP)&CandOutputMu}, {"candpatience", INT, (VOIDP)&CandPatience}, {"errorindexthreshold", FLOAT, (VOIDP)&ErrorIndexThreshold}, {"errormeasure", ENUM_NO, (VOIDP)&ErrorMeasure}, {"errorthreshold", FLOAT, (VOIDP)&ErrorThreshold}, {"go", GO, (VOIDP)NULL}, /* special keyword */ {"graphics", BOOLE, (VOIDP)&Graphics}, {"maxunits", INT_NO, (VOIDP)&MaxUnits}, {"ncandidates", INT_NO, (VOIDP)&Ncandidates}, {"ninputs", INT_NO, (VOIDP)&Ninputs}, {"nonrandomseed", BOOLE, (VOIDP)&NonRandomSeed}, {"noutputs", INT_NO, (VOIDP)&Noutputs}, {"ntestpatterns", INT_NO, (VOIDP)&NTestPatterns}, {"ntrainingpatterns", INT_NO, (VOIDP)&NTrainingPatterns}, {"outprimeoffset", FLOAT, (VOIDP)&OutPrimeOffset}, {"outputchangethreshold", FLOAT, (VOIDP)&OutputChangeThreshold}, {"outputdecay", FLOAT, (VOIDP)&OutputDecay}, {"outputepsilon", FLOAT, (VOIDP)&OutputEpsilon}, {"outputmu", FLOAT, (VOIDP)&OutputMu}, {"outputpatience", INT, (VOIDP)&OutputPatience}, {"outputtype", ENUM, (VOIDP)&OutputType}, {"overshootok", BOOLE, (VOIDP)&OvershootOK}, {"quit", BOMB, (VOIDP)NULL}, /* special keyword */ {"save", SAVE, (VOIDP)NULL}, /* special keyword */ {"sigmoidmax", FLOAT_NO, (VOIDP)&SigmoidMax}, {"sigmoidmin", FLOAT_NO, (VOIDP)&SigmoidMin}, {"singleepoch", BOOLE, (VOIDP)&SingleEpoch}, {"singlepass", BOOLE, (VOIDP)&SinglePass}, {"test", BOOLE, (VOIDP)&Test}, {"testing", GETTEST, (VOIDP)NULL}, /* special keyword */ {"training", GETTRAINING, (VOIDP)NULL}, /* special keyword */ {"unittype", ENUM_NO, (VOIDP)&UnitType}, {"usecache", BOOLE_NO, (VOIDP)&UseCache}, {"usetestbreaks", BOOLE, (VOIDP)&UseTestBreaks}, {"usetrainingbreaks", BOOLE, (VOIDP)&UseTrainingBreaks}, {"values", VALUE, (VOIDP)NULL}, /* special keyword */ {"weightfile", INITFILE, (VOIDP)NULL}, /* special keyword */ {"weightrange", FLOAT, (VOIDP)&WeightRange} }; int Nparameters = /* Number of entries in ParmTable */ sizeof(ParmTable)/sizeof(PARMS); BOOLEAN InterruptPending; /* TRUE => user has pressed Control-C */ char ErrMsg[1025]; /* general error message buffer */ /******************** end of global storage allocation **********************/ #ifdef CONNX long conx; #endif /***********************************************************************/ /* */ /* function prototypes (ANSI format) */ /* */ /***********************************************************************/ /************ * main routines mostly C specific *************/ void GET_NETWORK_CONFIGURATION(char *fname); VOIDP GET_ARRAY_MEM(unsigned elt_count, unsigned elt_size, char *fun_name); void ERROR(int type, char *message); /************ * learning utilities *************/ float ACTIVATION(float sum); float ACTIVATION_PRIME(float value, float sum); float OUTPUT_FUNCTION(float sum); float OUTPUT_PRIME(float out); /************ * Network-building utilities. *************/ void BUILD_NET(void); float RANDOM_WEIGHT(void); void INIT_NET(void); /************ * Interface utilities *************/ int FIND_KEY(char *searchkey); void PRINT_VALUE(int k); void LIST_ALL_VALUES(void); void PROMPT_FOR_VALUE(int k); void GET_TRAINING_DATA(FILE *infile); void GET_TEST_DATA(FILE *infile); void GET_TEST_DATA_FILE(void); void GET_TRAINING_DATA_FILE(void); void add_extension(char *fname, char *ext); int PROCESS_LINE(char *line); void strdncase(char *s); BOOLEAN Y_OR_N_P(char *prompt); void INTERACTIVE_PARM_UPDATE(void); char *TYPE_STRING(int var); char *BOOLE_STRING(int var); int TYPE_CONVERT(char *input); void CHECK_INTERRUPT(void); void TRAP_CONTROL_C(int sig); /************ * Parameter setting function. *************/ void INITIALIZE_GLOBALS(void); /************ * Candidate training and selecting utilities *************/ void INIT_CANDIDATES(void); void COMPUTE_SLOPES(BOOLEAN reset); void UPDATE_INPUT_WEIGHTS(void); /************ * outer training loop *************/ void LIST_PARAMETERS(void); int TRAIN(int outlimit, int inlimit, int rounds, BOOLEAN interact); void TEST_EPOCH(float test_threshold); void PRINT_SUMMARY(void); void PRINT_TST_SUMMARY(FILE *dumpfile); void OUT_PASS_USER_INTERFACE(void); void OUT_EPOCH_USER_INTERFACE(void); void CAND_EPOCH_USER_INTERFACE(void); void OUT_EPOCH_OUTPUT(void); void IN_EPOCH_OUTPUT(void); void OUT_PASS_OUTPUT(void); int TRAIN_CANDIDATES(int max_epochs); /************ * quickprop routine *************/ void QUICKPROP_UPDATE(int i, float weights[], float deltas[], float slopes[], float prevs[], float epsilon, float decay, float mu, float shrink_factor); /************ * training functions *************/ void SETUP_INPUTS(float input[]); void OUTPUT_FORWARD_PASS(void); void COMPUTE_UNIT_VALUE(int j, BOOLEAN reset); void FULL_FORWARD_PASS(float input[], BOOLEAN reset); void COMPUTE_ERRORS(float goal[], BOOLEAN output_slopesp, BOOLEAN statsp); void UPDATE_OUTPUT_WEIGHTS(void); void TRAIN_OUTPUTS_EPOCH(void); int TRAIN_OUTPUTS(int max_epochs); /************ * candidate train functions *************/ void TRAIN_INPUTS_EPOCH(void); int TRAIN_INPUTS(int max_epochs); /************ * ErrorIndex routines *************/ float ERROR_INDEX(float std_dev, int num); float STANDARD_DEV(float **outputs, int npatterns, int nvalues); void INTERACT_SAVE_FILES(void); void SAVE_NET_FILE(void); void GET_WEIGHTS(char *realfname); void INTERACT_GET_WEIGHTS(void); void DUMP_WEIGHTS(FILE *fout); void SAVE_ALL_PARMS(FILE *fout); void SAVE_PARM_VALUE(FILE *fout, int k); void SAVE_TRAINING_SET(FILE *fout); void SAVE_TEST_SET(FILE *fout); void DUMP_PARMS(FILE *fout); void WRITE_NET_OUTPUT(void); void WRITE_UNIT_OUTPUT(void); void INTERACT_DUMP_WEIGHTS(void); void INIT_DUMP_FILES(char *fname); void SETUP_DUMP_FILES(void); /* function prototypes from */ /* this commented out when stdlib included extern VOIDP calloc(unsigned etl_count, unsigned elt_size); */ #ifdef __STDC__ /* compiler does conform to the standard */ #ifndef atof extern double atof(const char *s); #endif extern char *strtok(char *s, const char *set); #else /* compiler doesn't conform to the standard */ extern double atof(); extern char *strtok(); #endif #ifndef INT_MAX #define INT_MAX 32767 #endif /******************end of prototypes ****************************/ #ifndef PREDICT_ONLY main(int argc, char *argv[]) { int inlim, outlim, rounds, trials; int nhidden; /* number of hidden units used in run */ int vics, defs, i; long total_epochs, total_units, total_trials; long min_units, max_units, min_epochs, max_epochs; char fname[LINELEN+1]; BOOLEAN interact = FALSE; /***************/ if((argc != 1) && (argc != 6)){ /* wrong number of args */ printf("Usage: rcascade2 NetFile InEpochs OutEpochs NewUnits Trials\n"); printf(" or rcascade2\n"); return(-1); } else if(argc == 1) interact = TRUE; INITIALIZE_GLOBALS(); /* initialize testing parms */ total_epochs = 0; total_units = 0; min_units = INT_MAX; min_epochs = INT_MAX; max_units = 0; max_epochs = 0; total_trials = 0; vics = 0; defs = 0; /* Get network */ if(interact){ printf ("\nEnter network file name: "); scanf ("%s", fname); } else strcpy(fname, argv[1]); GET_NETWORK_CONFIGURATION(fname); /* initialize the random number generator before initializing the network*/ if(NonRandomSeed) /* Does user want a fixed sequence? */ srand(1); /* Use a fixed starting point */ else srand(time(NULL)); /* Use a random starting point */ INIT_NET(); /* Start the main processing loop */ do { if(interact){ printf("Number of epochs to train inputs: "); scanf ("%d", &inlim); printf("Number of epochs to train outputs: "); scanf ("%d", &outlim); printf("Maximum number of new units: "); scanf ("%d", &rounds); printf("Trials for this problem: "); scanf ("%d", &trials); if(Y_OR_N_P("Change some parameters?")) INTERACTIVE_PARM_UPDATE(); } else{ inlim = atoi(argv[2]); outlim = atoi(argv[3]); rounds = atoi(argv[4]); trials = atoi(argv[5]); } printf("Starting run for %s, Ilim %d, Olim %d, MaxUnits %d, Trials %d.\n", fname, inlim, outlim, rounds, trials); if(NonRandomSeed) printf(" Fixed starting point used for random weights.\n\n"); else printf(" Random starting point used for random weights.\n\n"); for(i=0;i max_epochs) ? Epoch : max_epochs; min_units = (nhidden < min_units) ? nhidden : min_units; max_units = (nhidden > max_units) ? nhidden : max_units; if(interact && Y_OR_N_P(" Do you want to save the current settings?")) SAVE_NET_FILE(); if(DumpWeights) DUMP_WEIGHTS(WeightFile); else if(interact && Y_OR_N_P(" Do you want to save the current weights?")) INTERACT_DUMP_WEIGHTS(); } /* print out loop stats */ printf("\n\nTRAINING LOOP STATS\n"); LIST_PARAMETERS(); printf("\n Victories: %d, Defeats: %d, \n", vics, defs); printf(" Training Epochs - Min: %ld, Avg: %ld, Max: %ld,\n", min_epochs, (total_epochs / total_trials), max_epochs); printf(" Hidden Units - Min: %ld, Avg: %4.1f, Max: %ld,\n", min_units,((float)total_units /total_trials), max_units); }while((interact) && Y_OR_N_P("Do you want to run more trials?")); /* Test the network. */ if((interact) && Y_OR_N_P("\nDo you want to test the last network?\nA file test.dmp will be created\noverwriting any existing file of the same name!")){ if ( Y_OR_N_P("\nDo you want a different test set?") ) /* pmk032292 */ GET_TEST_DATA_FILE(); TEST_EPOCH(ErrorThreshold); } else return(TRUE); while((interact) && Y_OR_N_P("Do you want to test the network again?")) { GET_TEST_DATA_FILE(); TEST_EPOCH(ErrorThreshold); } return(TRUE); } #else /* PREDICT_ONLY */ main(int argc, char *argv[]) { int i,j; char nfname[LINELEN+1], wfname[LINELEN+1], dfname[LINELEN+1]; BOOLEAN interact = FALSE; void GET_INPUT_DATA(char *dfname); /***************/ if((argc != 1) && (argc != 4)){ /* wrong number of args */ printf("Usage: castest NetFile WeightFile DataFile\n"); printf(" or castest\n"); return; } else if(argc == 1) interact = TRUE; INITIALIZE_GLOBALS(); /* Get network */ if(interact){ printf ("Enter name of network file: "); scanf ("%s", nfname); printf ("Enter name of weight file: "); scanf ("%s", wfname); printf ("Enter name of data file: "); scanf ("%s", dfname); } else{ strcpy(nfname, argv[1]); strcpy(wfname, argv[2]); strcpy(dfname, argv[3]); } GET_NETWORK_CONFIGURATION(nfname); UseCache = FALSE; /* no reason to use cache for prediction only */ INIT_NET(); GET_WEIGHTS(wfname); GET_INPUT_DATA(dfname); for(i=0; iNTestPatterns) MaxCases = NTrainingPatterns; else MaxCases = NTestPatterns; Ncases = NTrainingPatterns; FirstCase = 0; Nunits = 1 + Ninputs; /* setup for ErrorIndex */ NtrainingOutputValues = Noutputs * NTrainingPatterns; NtestOutputValues = Noutputs * NTestPatterns; if(Nunits>MaxUnits) ERROR(FATAL, "MaxUnits must be greater than Ninputs."); /* allocate memory for outer arrays */ ValuesCache = (float **)GET_ARRAY_MEM(MaxCases, sizeof(float *), fn); ExtraValues = (float *)GET_ARRAY_MEM(MaxUnits, sizeof(float), fn); Values = ExtraValues; PrevValues = (float *)GET_ARRAY_MEM(MaxUnits, sizeof(float), fn); Nconnections = (int *)GET_ARRAY_MEM(MaxUnits, sizeof(int), fn); Connections = (int **)GET_ARRAY_MEM(MaxUnits, sizeof(int *), fn); Weights = (float **)GET_ARRAY_MEM(MaxUnits, sizeof(float *), fn); ErrorsCache = (float **)GET_ARRAY_MEM(MaxCases, sizeof(float *), fn); ExtraErrors = (float *)GET_ARRAY_MEM(Noutputs, sizeof(float), fn); Errors = ExtraErrors; Outputs = (float *)GET_ARRAY_MEM(Noutputs, sizeof(float), fn); OutputWeights = (float **)GET_ARRAY_MEM(Noutputs, sizeof(float *), fn); OutputDeltas = (float **)GET_ARRAY_MEM(Noutputs, sizeof(float *), fn); OutputSlopes = (float **)GET_ARRAY_MEM(Noutputs, sizeof(float *), fn); OutputPrevSlopes = (float **)GET_ARRAY_MEM(Noutputs, sizeof(float *), fn); CandValues = (float *)GET_ARRAY_MEM(Ncandidates, sizeof(float), fn); CandScores = (float *)GET_ARRAY_MEM(Ncandidates, sizeof(float), fn); CandInWeights = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandInDeltas = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandInSlopes = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandInPrevSlopes = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandOutWeights = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandOutDeltas = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandOutSlopes = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); CandOutPrevSlopes = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); /* Allocate memory for RCC */ CandPrevValues = (float *)GET_ARRAY_MEM(Ncandidates, sizeof(float),fn); CandDvDw = (float **)GET_ARRAY_MEM(Ncandidates, sizeof(float *), fn); TrainingBreaks = (BOOLEAN *)GET_ARRAY_MEM(NTrainingPatterns, sizeof(BOOLEAN),fn); TestBreaks = (BOOLEAN *)GET_ARRAY_MEM(NTestPatterns, sizeof(BOOLEAN),fn); TrainingInputs = (float **)GET_ARRAY_MEM(NTrainingPatterns, sizeof(float *), fn); TrainingOutputs = (float **)GET_ARRAY_MEM(NTrainingPatterns, sizeof(float *), fn); if(NTestPatterns){ TestInputs = (float **)GET_ARRAY_MEM(NTestPatterns, sizeof(float *), fn); TestOutputs = (float **)GET_ARRAY_MEM(NTestPatterns, sizeof(float *), fn); } else{ /* no test patterns so just point at training set */ TestInputs = TrainingInputs; TestOutputs = TrainingOutputs; } /* Only create the caches if UseCache is on -- may not always have room. */ printf("UseCache is %d\n", UseCache); if(UseCache){ for(i=0; i 15.0) return(0.5); else return (1.0 /(1.0 + exp(-sum)) - 0.5); case GAUSSIAN: /* Gaussian activation function in range 0.0 to 1.0. */ temp = -0.5 * sum * sum; if (temp < -75.0) return(0.0); else return (exp(temp)); case ASYMSIGMOID: /* asymmetrical sigmoid function in range 0.0 to 1.0. */ if (sum < -15.0) return(0.0); else if (sum > 15.0) return(1.0); else return (1.0 /(1.0 + exp(-sum))); case VARSIGMOID: /* Sigmoid function in range SigmoidMin to SigmoidMax. */ if (sum < -15.0) return(SigmoidMin); else if (sum > 15.0) return(SigmoidMax); else return ((SigmoidMax - SigmoidMin)/ (1.0 + exp(-sum)) + SigmoidMin); } } /* * Given the unit's activation value and sum of weighted inputs, compute * the derivative of the activation with respect to the sum. Defined unit * types are SIGMOID, VARSIGMOID, and GAUSSIAN. * * Note: ActPrimeOffset not implemented for gaussian units. * */ float ACTIVATION_PRIME(float value, float sum) { switch(UnitType){ case SIGMOID: /* Symmetrical sigmoid function. */ return ( ActPrimeOffset + (0.25 - value*value) ); case GAUSSIAN: /* Gaussian activation function. */ return (sum * value); case ASYMSIGMOID: /* asymmetrical sigmoid function in range 0.0 to 1.0. */ return ( ActPrimeOffset + (value * (1.0 - value)) ); case VARSIGMOID: /* Sigmoid function with range SigmoidMin to SigmoidMax. */ return ( ActPrimeOffset + (value - SigmoidMin) * (1.0 - (value - SigmoidMin) / (SigmoidMax - SigmoidMin))); } } /* Compute the value of an output, given the weighted sum of incoming values. * Defined output types are SIGMOID, ASYMSIGMOID, and LINEAR. */ float OUTPUT_FUNCTION(float sum) { switch(OutputType){ case SIGMOID: /* Symmetrical sigmoid function, used for binary functions. */ if (sum < -15.0) return(-0.5); else if (sum > 15.0) return(0.5); else return (1.0 /(1.0 + exp(-sum)) - 0.5); case LINEAR: /* Linear output function, used for continuous functions. */ return (sum); case ASYMSIGMOID: /* asymmetrical sigmoid function in range 0.0 to 1.0. */ if (sum < -15.0) return(0.0); else if (sum > 15.0) return(1.0); else return (1.0 /(1.0 + exp(-sum))); case VARSIGMOID: /* Sigmoid function in range SigmoidMin to SigmoidMax. */ if (sum < -15.0) return(SigmoidMin); else if (sum > 15.0) return(SigmoidMax); else return ((SigmoidMax - SigmoidMin)/ (1.0 + exp(-sum)) + SigmoidMin); } } /* Compute the value of an output, given the weighted sum of incoming values. * Defined output types are SIGMOID, ASYMSIGMOID, and LINEAR. * * Sigmoid_Prime_Offset used to keep the back-prop error value from going to * zero. */ float OUTPUT_PRIME(float output) { switch(OutputType){ case SIGMOID: /* Symmetrical sigmoid function, used for binary functions. */ return (OutPrimeOffset + 0.25 - output*output); case LINEAR: /* Linear output function, used for continuous functions. */ return (1.0); case ASYMSIGMOID: /* asymmetrical sigmoid function in range 0.0 to 1.0. */ return (OutPrimeOffset + output * (1.0 - output)); case VARSIGMOID: /* Sigmoid function with range SigmoidMin to SigmoidMax. */ return (OutPrimeOffset + (output - SigmoidMin) * (1.0 - (output - SigmoidMin) / (SigmoidMax - SigmoidMin))); } } /* The basic routine for doing quickprop-style update of weights, given a * pair of slopes and a delta. * * Given arrays holding weights, deltas, slopes, and previous slopes, * and an index i, update weight[i] and delta[i] appropriately. Move * slope[i] to prev[i] and zero out slope[i]. Add weight decay term to * each slope before doing the update. */ void QUICKPROP_UPDATE(int i, float weights[], float deltas[], float slopes[], float prevs[], float epsilon, float decay, float mu, float shrink_factor) { float w,d,s,p, next_step; /********/ w = weights[i]; d = deltas[i]; s = slopes[i] + decay * w; p = prevs[i]; next_step = 0.0; /* The step must always be in direction opposite to the slope. */ if(d < 0.0){ /* If last step was negative... */ if(s > 0.0) /* Add in linear term if current slope is still positive.*/ next_step -= epsilon * s; /*If current slope is close to or larger than prev slope... */ if(s >= (shrink_factor*p)) next_step += mu * d; /* Take maximum size negative step. */ else next_step += d * s / (p - s); /* Else, use quadratic estimate. */ } else if(d > 0.0){ /* If last step was positive... */ if(s < 0.0) /* Add in linear term if current slope is still negative.*/ next_step -= epsilon * s; /* If current slope is close to or more neg than prev slope... */ if(s <= (shrink_factor*p)) next_step += mu * d; /* Take maximum size negative step. */ else next_step += d * s / (p - s); /* Else, use quadratic estimate. */ } else /* Last step was zero, so use only linear term. */ next_step -= epsilon * s; /* update global data arrays */ deltas[i] = next_step; weights[i] = w + next_step; prevs[i] = s; slopes[i] = 0.0; } /* Set up all the inputs from the INPUT vector as the first few entries in in the values vector. */ void SETUP_INPUTS(float inputs[]) { int i; /*********/ Values[0] = 1.0; /* bias unit */ for(i=0; i