// Copyright 2010 Andrew McSweeny // All rights reserved import static java.lang.System.out; import individual.IndividualCreator; import java.io.IOException; import substitution.Substitution; import dominance.Dominance; import engine.Crossover; import engine.Stats; import fitness.FitnessCalc; import gene.CodonTable; class Gem { public static void main (String args[]) throws IOException { // create instance of Sim class: // main class where majority of execution occurs // Sim class is where starting population size N is created, individuals acquire mutations, // individuals mate and pass mutations to offspring, N most fit offspring become the // new population mating repeats for desired number of generations Sim s= new Sim(); if (args.length > 0){ // read configuration file, store values for parameters within Sim object, s try { s.readConfigFile(args[0]); } catch (IOException ioe) { out.println("Unable to read from configuration file \"" + args[0] + "\""); System.exit(-1); } // static init method for class engine.Stats // entire Gem simulation uses only one random number generator, // contained in Stats. Allows debugging by using a fixed seed // for consistent results each run. Stats.init(); // static init method for class dominance.Dominance // dominance filename has been read into Sim object, s, from config file if (!s.isDominanceWeighted_){ Dominance.init(s.getDominanceFilename()); } else { Dominance.init(s.getDominanceWeight()); } // static init method for class substitution.Substitution // mutation filename has been read into Sim object, s, from config file // Mutation filename contains the probabilities of different substitutions // Transitions are more common than transversions Substitution.init(s.getMutationFilename()); // static init method for class individual.Creator // Purpose of Creator class is to assemble the first individual: "adam" // This individual contains two copies of the contig specified in the config file // The Creator class is responsible for dividing the contig sequence into multiple // blocks (loci), each is called packedBlock. These packedBlocks are stored in // the internal arrays of packedChromosome objects. The first individual thus // has two packedChromosomes, each containing many packedBlocks. // read codon table into program CodonTable.readCodonTable(s.getCodonTableFilename()); if (s.isTruncated()){ // truncation option for simulation is set in config file // if truncation is used, only use the first maxSize nucleotides // to make a chromosome. E.g. create a chromosome from the first 1000 // nucleotides in the FASTA file // truncation is useful for local debugging. We use a 27Mbp sequence // for our simulation which takes a lot of memory. truncating the sequence // is useful for debugging locally, where not enough memory may be available // to use the entire sequence //SAM_0628: Creator now accepts number of environments as argument IndividualCreator.init (s.getChrFilename(), s.getMaxSize(), s.isSeqStored(), s.getNumEnv(), s.isAlleleShared(),s.isDominanceWeighted()); } else { IndividualCreator.init (s.getChrFilename(), s.isSeqStored(), s.getNumEnv(), s.isAlleleShared(),s.isDominanceWeighted()); } // The Creator class divides the contig sequence into blocks using the position // of CDS and non-CDS regions in the contig. These positions are obtained from // the exon file, which gives the position of CDS exons. So a contig sequence is // broken into blocks like this : nonCDS/CDS/nonCDS/CDS/nonCDS IndividualCreator.readExons(s.getExonFilename(), IndividualCreator.getChrSize()); // The Creator class is also used later in the program to return information about // a specific bp position, such as whether the position is within an MRI region. // Here, the positions of GC-rich MRI regions are demarcated according to the MRI // filename specified in the MRI file IndividualCreator.readMRI(s.getMRIFilename(), IndividualCreator.getChrSize()); // Once the information about the position of CDS/non-CDS regions is processed // a single (static) Individual object is constructed within the Creator class IndividualCreator.createIndividual(s.getMaxBlockSize()); // Set the multiplier for genetic distance of the genetic map provided at // startup. Default is 1. See Sim.java for an explanation of crossover // multiplier. Crossover.setMultiplier(s.getCrossoverMultiplier()); // static init method for the class engine.Crossover // reads the genetic map data giving the genetic distance in cM of many sites // across the contig under evaluation. Crossover.init(s.getCrossoverFilename(), IndividualCreator.getChrSize()); // static init method for the class FitnessCalc // FitnessCalc class contains a matrix of scalar selection coefficients (s-values) // for each possible base substitution in the entire contig sequence // synonymous mutations (ie: A->A at position 191289) have 0 for s.... // The remaining s-values for the matrix are built from random values // drawn from two probability distributions: one for CDS regions and one for // nonCDS regions. Additionally, an MRI constant may be added to these s vales // depending on whether the bp position is inside or outside a GC-rich MRI region // See Sim.java for more information. if (s.getGenNum() == 0){ //SAM_0628: fitnessCalc now takes number of environments in constructor FitnessCalc.init(s.isSeqStored(), IndividualCreator.getStrChr(), s.getFitProbFilename(), s.getFitnessShift(), s.getFitnessMultiplier(), s.getCDSfitProbFilename(), s.getCDSfitnessShift(), s.getCDSfitnessMultiplier(), s.getMRIConstant(), IndividualCreator.getNumBlocks(), s.getNumEnv(), s.getCodonBiasedFitness(), s.getNumberNonIdenticalEnvDistribtionFilename(), s.getEnvNonIdenticalDistribtionFilename(), s.getPercentIdenticalFitness(), s.getOutputDirectory()); } s.runSimulation(); } else { System.out.println("Usage: Gem "); } } } IndividualComparator.java // Copyright 2010 Andrew McSweeny // All rights reserved import java.util.Comparator; import individual.Individual; // Indoubtedly the simplest class in the program // compares two Individual objects by looking at their fitness class IndividualComparator implements Comparator { int numEnvironments_; //SAM_0628: Added number of environments to comparator public IndividualComparator (int numEnvironments){ numEnvironments_ = numEnvironments; } public int compare ( Individual i1, Individual i2 ){ float fitness1 = i1.getFitness()[numEnvironments_]; float fitness2 = i2.getFitness()[numEnvironments_]; // Sorts in descending order!! if ( fitness1 < fitness2 ){ return 1; } else if ( fitness1 > fitness2 ){ return -1; } else { return 0; } } } Sim.java // Copyright 2010 Andrew McSweeny // Note: Do not update repository until this sequence storage thing // is worked out // import static java.lang.System.out; import engine.Stats; import fitness.FitnessCalc; import individual.IndividualCreator; import individual.Individual; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Random; import java.util.Set; import java.util.StringTokenizer; import java.util.TreeSet; import java.util.Vector; import bitpack.PackedChromosome; import tools.Clock; import tools.Consts; import tools.EasyClock; public class Sim { // integer int matingType_; // name of file containing probabilities of different types of dominance public String dominanceFilename_; // name of FASTA (.fa) file containing the sequence under evaluation private String contigFilename_; // if the configuration file contains a max_size value // the contig under evaluation will be shortened to the truncated size public int truncatedSize_ = Consts.MAX_INT; public float dominanceWeight_; public float getDominanceWeight() { return dominanceWeight_; } public boolean isDominanceWeighted_ = false; public boolean isDominanceWeighted() { return isDominanceWeighted_; } // set to true if truncateSize_ is set // default is false private boolean truncated_ = false; // name of file containing the probabilities of each possible substitution // i.e. P(G->T)=x, P(G->A)=y, etc public String substitutionFilename_; // name of file containing the bp positions of CDS exons // CDS exon positions are used to divide the contig under evaluation // into coding and non-coding loci public String exonFilename_; // name of file containing the bp positions GC-rich and GC-poor MRI regions // if desired, base substitutions promoting GC-richness can have a small // constant // added to the fitness value for the substitution public String MRIFilename_; // the contig under evaluation is divided into coding and non-coding blocks // since non-coding (intergenic) regions can be very large in size, // noncoding blocks // may be divided into multiple noncoding blocks // TODO: evaluate whether this is necessary, now that sequence data is no // longer // stored in each individual ... i think it is (AM) as large blocks having // multiple // alleles that differ only slightly may be cumbersome since each allele // must be stored private int maxBlockSize_; // the simulation uses a genetic map obtained from the HapMap consortium // to simulate crossover. the mean number of crossovers/bp is somewhat low // and therefore deleterious mutations are removed less efficiently than // what // is possible with a higher rate of crossover. Therefore, a // "crossover multiplier" // allows one to multiply the genetic distances of the map, in cM, by a // constant public float crossoverMultiplier_; // filename for genetic map containing two columns: bp and genetic distance // in cM public String geneticMapFilename_; // filename containing the probability distribution of fitness values // for mutations within non-coding regions public String nonCdsfitnessDistributionFilename_; // filename containing the probability distribution of fitness values // for mutations within coding regions public String cdsFitnessDistributionFilename_; // constant which is added to selection coefficient for a base substitution // favoring GC-richness within GC-rich regions // (subtracted from s for substitutions favoring AT-richness within these // regions) public float MRIConstant_; // distributions of selection coefficient (s) that are read from the files // can be shifted and multiplied. // for instance, a file called normal.txt is provided with the standard // normal distribution s~N(0,1) (mean=0, stddev=1) // however, by using a shift of +1 and a multiplier of 2, this distribution // can be transformed to N(1,2), for example public float nonCdsFitnessShift_ = 0; public float nonCdsFitnessMultiplier_ = 0; public float cdsFitnessShift_ = 0; public float cdsFitnessMultiplier_ = 0; // All of the following parameters have default values in case nothing is // specified in the ".in" file. // number of mutations per individual per generation private int mutPerGen_ = 100; // number of generations private int nGen_ = 1000; // population size private int pSize_ = 100; private int meanNumOffspringPerMatingPair_ = 3; // set to true if number of offspring is dependent on the fitness of // the mating pair public boolean fertilityFitnessDependence_ = false; // set to true if user wants extra output to System.out, particularly // the time it will take to complete the simulation public boolean verbose_; // set true if user wants to store sequence in each individual as // opposed to mutations public boolean isSeqStored_; // number of generations between fixating the mutations public int fixationFreq_; // number of generations between backing up all data and outputing consensus // sequence public int backupFrequency_; // starting generation number public int genNum_; // used to create a zero padded generation for file i/o Integer numDigits_; // Population of Individuals private Vector population_; // number of populations int numEnv_; // Holds offspring, population_ will point this at // the end of a generation and then this Vector becomes // the new population_ Vector Vector> nextGeneration_; // Mean Population fitness, calculated after each generation private float[] meanFitness_; // number of threads, should be set equal to the number of cores available int nThreads_; // Percentage of Population that migrates to a new environment each // generation float migrationRate_; // Codon Biased Fitness coefficient value float codonBiasedFitness_; String codonTableFilename_; boolean isAlleleShared_; private int outputFrequency_; private float percentIdenticalFitness_; private String numberNonIdenticalEnvDistribtionFilename_; private String envNonIdenticalDistribtionFilename_; private String outputDirectory_; /* * Getters and setters for different fields */ public boolean isTruncated() { return truncated_; } public float getFitnessShift() { return nonCdsFitnessShift_; } public float getFitnessMultiplier() { return nonCdsFitnessMultiplier_; } public String getFitProbFilename() { return nonCdsfitnessDistributionFilename_; } public String getMutationFilename() { return substitutionFilename_; } public String getDominanceFilename() { return dominanceFilename_; } public String getChrFilename() { return contigFilename_; } public int getMaxSize() { return truncatedSize_; } public String getExonFilename() { return exonFilename_; } public String getMRIFilename() { return MRIFilename_; } public int getMaxBlockSize() { return maxBlockSize_; } public float getCrossoverMultiplier() { return crossoverMultiplier_; } public String getCrossoverFilename() { return geneticMapFilename_; } public String getCDSfitProbFilename() { return cdsFitnessDistributionFilename_; } public String getCodonTableFilename() { return codonTableFilename_; } public float getMRIConstant() { return MRIConstant_; } public float getCDSfitnessShift() { return cdsFitnessShift_; } public float getCDSfitnessMultiplier() { return cdsFitnessMultiplier_; } public boolean isSeqStored() { return isSeqStored_; } public int getGenNum() { return genNum_; } public int getNumEnv() { return numEnv_; } public float getCodonBiasedFitness() { return codonBiasedFitness_; } public boolean isAlleleShared() { return isAlleleShared_; } public String getNumberNonIdenticalEnvDistribtionFilename() { return numberNonIdenticalEnvDistribtionFilename_; } public String getEnvNonIdenticalDistribtionFilename() { return envNonIdenticalDistribtionFilename_; } public float getPercentIdenticalFitness() { return percentIdenticalFitness_; } public String getOutputDirectory() { return outputDirectory_; } /** * main function of Sim object Once the readConfigFile function is * completed, this function is run * * @throws IOException */ @SuppressWarnings("unchecked") public void runSimulation() throws IOException { // set population or load back up file if starting genNum_ not equal to // 0 if (genNum_ == 0) { // Create vector to hold population population_ = new Vector(pSize_); if (verbose_) { Clock.startClock(); out.print("Creating first individual: "); } Individual adam = IndividualCreator.getIndividual(); if (verbose_) { out.println(Clock.getStepTime()); out.print("Initializing population vector: "); } // Create a population of identical individuals for (int pi = 0; pi < pSize_; pi++) { population_.add(pi, new Individual(adam)); } // for (int pi = 0; pi < pSize_; pi++) { // population_.elementAt(pi).randomMutation(mutPerGen_); // create // // random // mutation // } if (verbose_) out.println(Clock.getStepTime()); } else { String zeroPaddedGen = String.format("%0" + numDigits_ + "d", genNum_); FileInputStream backup_File; try { backup_File = new FileInputStream(outputDirectory_ + "backup_" + zeroPaddedGen + ".ser"); ObjectInputStream Object_Input = new ObjectInputStream( backup_File); population_ = (Vector) Object_Input.readObject(); // sets // population FitnessCalc.Afits_ = (Vector) Object_Input .readObject(); // sets Fitness table for A FitnessCalc.Cfits_ = (Vector) Object_Input .readObject(); // sets Fitness table for C FitnessCalc.Tfits_ = (Vector) Object_Input .readObject(); // sets Fitness table for T FitnessCalc.Gfits_ = (Vector) Object_Input .readObject(); // sets Fitness table for G FitnessCalc.consensusSeq_ = (StringBuffer) Object_Input .readObject(); FitnessCalc.isSeqStored_ = isSeqStored_; FitnessCalc.fitnessOffset_ = (Vector) Object_Input .readObject(); FitnessCalc.numEnv_ = numEnv_; FitnessCalc.totalNumPolySites_ = (Integer) Object_Input .readObject(); FitnessCalc.totalNumMutFixed_ = (Integer) Object_Input .readObject(); Stats.r_ = (Random) Object_Input.readObject(); Object_Input.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } // main output file FileWriter mainOutput = new FileWriter( outputDirectory_ + "Results.csv", true); // temporary output file between backups being created FileWriter tmpOutput = new FileWriter(outputDirectory_ + "TmpResults.csv"); // clock time // FileWriter clockOutput = new // FileWriter(outputDirectory_+"ClockOutput.csv"); // Since the number of offspring per mating pair follows // a Poisson distribution, we are interested in calculating // the total number of mating pairs and total number of offspring // Then, we can calculate mean number of offspring per mating pair // to make sure this number is close to the value lambda used // for the Poisson process /** * Main loop of the simulation FOR EACH generation: get number of mating * pairs FOR EACH Each mating pair: select a male and a female from * population_ Vector generate number of offspring using Poisson process * FOR EACH offspring generate a gamete for male generate a gamete for * female create a new Individual: maternal chromosome = male gamete * paternal chromosome = female gamete add new Individual to * nextGeneration_ Vector END END contents of nextGeneration_ comprise * new population_ Vector END */ for (int gen = genNum_ + 1; gen <= nGen_; gen++) { // set up title of columns for .csv output if (gen == 1) { // clockOutput.write("\"gen\",\"before_threads\",\"during threads\",\"after_threads\""); // clockOutput.flush(); tmpOutput.write("\"gen\","); for (int j = 0; j < numEnv_; j++) { tmpOutput.write("\"mean_fitness_env_" + (j + 1) + "\","); } // if sequence stored total mutations and fixations not // calculated if (isSeqStored_) { tmpOutput.write("\n"); tmpOutput.write("0"); for (int j = 0; j < numEnv_; j++) { tmpOutput.write(",0"); } tmpOutput.write("\n"); } else { tmpOutput .write("\"polymorphic_sites\",\"fixed_mutations\"\n"); tmpOutput.write("0"); for (int j = 0; j < numEnv_; j++) { tmpOutput.write(",0"); } tmpOutput.write(",0,0\n"); } tmpOutput.flush(); } //EasyClock beforeThreadsClock = new EasyClock(); //beforeThreadsClock.startClock(); // migration randomly reassign a percentage of individuals to new // environments if (matingType_ == Consts.PAIRING_WITHIN_ENVIRONMENTS) { int randInd; int randEnv; for (int j = 0; j < pSize_ * migrationRate_; j++) { randInd = Stats.nextInt(pSize_); // pick random individual randEnv = Stats.nextInt(numEnv_); // pick random environment population_.elementAt(randInd).setEnvironment(randEnv); // move // individual // to // that // environment } } Clock.startClock2(); if (verbose_) out.println("[Generation: " + gen + "]"); nextGeneration_ = new Vector>(); for (int i = 0; i < numEnv_; i++) { TreeSet tmpTS = new TreeSet( new IndividualComparator(i)); nextGeneration_.add(Collections.synchronizedSet(tmpTS)); } if (verbose_) out.print("Mutating population: "); if (verbose_) out.println(Clock.getStepTime()); if (verbose_) out.print("Creating offspring: "); // clockOutput.write(gen + "," + // beforeThreadsClock.getStepTime()+","); //EasyClock duringThreadsClock = new EasyClock(); //duringThreadsClock.startClock(); // SAM~6/15 create an array of threads to use for Mating Thread[] threads = new Thread[nThreads_]; for (int j = 0; j < nThreads_; j++) { threads[j] = new Sim.MatingThread(gen); threads[j].start(); } // wait for all threads to finish b4 continuing for (int j = 0; j < nThreads_; j++) { try { threads[j].join(); } catch (InterruptedException ignore) { } } // clockOutput.write(duringThreadsClock.getStepTime()+","); // EasyClock afterThreadsClock = new EasyClock(); // afterThreadsClock.startClock(); // print statistics on mean #offspring per mating pair // useful to make sure Poisson process is working correctly // and that the mean number of offspring is close to lamdba // TODO: fix incrementing of totalNumOffspring, verbose currently // not implemented if (verbose_) { // out.println("Mean # offspring this generation: " // +numOffspringThisGeneration/(pSize_/2)); // out.println("Mean # offspring (cumulative) : " // +totalNumOffspring/(double)totalMatingPairs); out.println(Clock.getStepTime()); } // clears population population_.removeAllElements(); if (verbose_) { out.print("Sorting offspring by fitness: "); } // sort population using the IndividualComparator class // Individuals are sorted by fitness in descending order // Therefore, the Individuals at the head of the array // are the most fit // sorting differs depending on the type of mating and environment // selection if (matingType_ == Consts.PAIRING_WITHIN_ENVIRONMENTS) { // selects the next population // by picking an environment at random // then taking the most fit individual from that environment // placing them in the new population // repeats until new population is the correct size int env = 0; Individual tmpInd; while (population_.size() < pSize_) { if (nextGeneration_.elementAt(env).size() == 0) { continue; } Iterator it = nextGeneration_.elementAt(env) .iterator(); tmpInd = it.next(); population_.add(tmpInd); nextGeneration_.elementAt(env).remove(tmpInd); env++; if (env % numEnv_ == 0) env = 0; } } else if (matingType_ == Consts.PAIRING_BETWEEN_ENVIRONMENTS) { // in this case the sorting and selection is similar to above // except individuals self select to be in the environment where // they are most fit // so each sorted population for a particular environment // contains all individuals int env = 0; Individual tmpInd; while (population_.size() < pSize_) { Iterator it = nextGeneration_.elementAt(env) .iterator(); tmpInd = it.next(); tmpInd.setEnvironment(env); population_.add(tmpInd); for (int j = 0; j < numEnv_; j++) { nextGeneration_.elementAt(j).remove(tmpInd); } env++; if (env % numEnv_ == 0) env = 0; } } // nextGeneration Vector now points to null and must be // reinstantiated // in the next generation nextGeneration_ = null; if (verbose_) { out.println(Clock.getStepTime()); out.print("Truncating offspring population: "); } // Individual's order statistic for fitness, order k = kth smallest // value // e.g. for N = 100 // highest fitness -> Order = 100 // lowest fitness -> Order = 1 int fitnessOrder = pSize_; // loop while either of the following conditions are true // 1.) the loop counter is less than the population size N // or .. // 2.) the number of individuals in the new generation is less than // the // population size (N) // // As of this moment I CANNOT RECALL why condition 2 is necessary // or what good incrementing the loop counter (x) would do if the // population // size of the new generation is less than N // TODO: possible removal of condition 2 // assigns each individual in the population a percentile rank for // fitness // For example, for a population of size 100 // highest order statistic for fitness is 100, individual is at // index x=0 // individual has 99.5% percentile rank for fitness // lowest order statistic is 1, individual is at index x=99 // individual has 0.5% percentile rank for fitness if (fertilityFitnessDependence_) { for (int x = 0; x < pSize_; x++) { population_.elementAt(x).setFitnessPercentileRank( new Float(new Float(fitnessOrder - 0.5) / pSize_)); fitnessOrder--; } } if (verbose_) { out.println(Clock.getStepTime()); out.print("Calculating population fitness: "); } // mean fitness for the entire population meanFitness_ = new float[numEnv_]; int[] envSize = new int[FitnessCalc.getNumEnv()]; Arrays.fill(envSize, 0); // simple loop to calculate sum of fitness values int indEnv; for (int pi = 0; pi < pSize_; ++pi) { indEnv = population_.elementAt(pi).getEnvironment(); meanFitness_[indEnv] += population_.elementAt(pi).getFitness()[indEnv]; envSize[indEnv] += 1; } // divide the sum by the population size to get mean fitness for (int j = 0; j < numEnv_; j++) { // if (envSize[j] == 0 ) out.println("zero env size"); meanFitness_[j] /= envSize[j]; } if (verbose_) out.println(Clock.getStepTime()); long totalStepTime = Clock.getStepTime2(); if (verbose_) out.println("Total time for one generation (s): " + (totalStepTime / 1000)); // estimate the number of (d)ays (h)ours (m)inutes and (s)econds // for the simulation to complete long d = totalStepTime * (nGen_ - gen) / 1000 / 60 / 60 / 24; long h = totalStepTime * (nGen_ - gen) / 1000 / 60 / 60 % 24; long m = totalStepTime * (nGen_ - gen) / 1000 / 60 % 60; long s = totalStepTime * (nGen_ - gen) / 1000 % 60; if (verbose_) out.println("Estimated time for " + (nGen_ - gen) + " generations: " + d + "d " + h + "h " + m + "m " + s + "s"); // fixation of mutations w/ a frequency of 100% every fixatingFreq_ // gen if (!isSeqStored_) { if (gen % fixationFreq_ == 0) { population_ = FitnessCalc.fixMutations(population_); } } // MAIN OUTPUT FROM SIMULATION // Print .CSV rows of data from the simulation // first column is generation number, second column is mean fitness // of population if (gen % outputFrequency_ == 0) { try { tmpOutput.write(gen + ","); for (int j = 0; j < numEnv_; j++) { tmpOutput.write(meanFitness_[j] + ","); } tmpOutput.write(FitnessCalc.getTotalNumPolySites() + "," + FitnessCalc.getTotalNumMutFixed() + "\n"); tmpOutput.flush(); } catch (IOException e) { e.printStackTrace(); } } // backing the current data and outputting consensus sequence if (gen % backupFrequency_ == 0) { try { // write out current consensus sequence String zeroPaddedGen = String.format("%0" + numDigits_ + "d", gen); FileWriter fw = new FileWriter(outputDirectory_ + "consensus_gen_" + zeroPaddedGen + ".fa"); fw.write(">Consensus sequence for generation " + gen + "\n"); StringBuffer stringBufferTmp = new StringBuffer(); int j; for (j = 0; j < FitnessCalc.getConsensusSeq().length() / 70; j++) { stringBufferTmp.append(FitnessCalc.getConsensusSeq() .substring(j * 70, (j + 1) * 70)); stringBufferTmp.append('\n'); } stringBufferTmp.append(FitnessCalc.getConsensusSeq() .substring(j * 70)); fw.write(stringBufferTmp.toString()); fw.flush(); fw.close(); // create a backup file FileOutputStream fos = null; ObjectOutputStream out = null; fos = new FileOutputStream(outputDirectory_ + "backup_" + zeroPaddedGen + ".ser"); out = new ObjectOutputStream(fos); out.writeObject(population_); out.writeObject(FitnessCalc.Afits_); out.writeObject(FitnessCalc.Cfits_); out.writeObject(FitnessCalc.Tfits_); out.writeObject(FitnessCalc.Gfits_); out.writeObject(FitnessCalc.consensusSeq_); out.writeObject(FitnessCalc.fitnessOffset_); out.writeObject(FitnessCalc.totalNumPolySites_); out.writeObject(FitnessCalc.totalNumMutFixed_); out.writeObject(Stats.r_); out.close(); // append temporary results to the results file tmpOutput.flush(); BufferedReader readTmpResults = new BufferedReader( new FileReader(outputDirectory_ + "TmpResults.csv")); while (readTmpResults.ready()) { mainOutput.write(readTmpResults.readLine() + "\n"); } mainOutput.flush(); // remove previous backup zeroPaddedGen = String.format("%0" + numDigits_ + "d", gen - backupFrequency_); File backupToBeRemoved = new File(outputDirectory_ + "backup_" + zeroPaddedGen + ".ser"); backupToBeRemoved.delete(); // reset temporary results file tmpOutput = new FileWriter(outputDirectory_ + "TmpResults.csv"); } catch (IOException e) { e.printStackTrace(); } } // last generation write tmpOut to main out and delete temporary // file if (gen == nGen_) { tmpOutput.flush(); BufferedReader readTmpResults = new BufferedReader( new FileReader(outputDirectory_ + "TmpResults.csv")); while (readTmpResults.ready()) { mainOutput.write(readTmpResults.readLine() + "\n"); } mainOutput.flush(); mainOutput.close(); tmpOutput.close(); File tmpOutToBeRemoved = new File(outputDirectory_ + "TmpResults.csv"); tmpOutToBeRemoved.delete(); // clockOutput.close(); } // clockOutput.write(afterThreadsClock.getStepTime()+"\n"); // clockOutput.flush(); } } public class MatingThread extends Thread { // pSizeNextGen is a counter for how many new individuals // have been added to the nextGeneration vector int gen_; public MatingThread(int gen) { this.gen_ = gen; } public void run() { int sizeNextGeneration = 0; // This loop selects mating pairs and performs mating // Loop executes while following is true // 1) size of next generation is less than the population size * // mean number of offspring per mating pair while (sizeNextGeneration < (pSize_ * meanNumOffspringPerMatingPair_)) { // these are indices within the population Vector // the population vector is sampled until a male and female are // found // and the indices are stored in mIdx and pIdx int mIdx = 0; int pIdx = 0; // sample from the population vector until a female is found // select a random environment int environment = Stats.nextInt(FitnessCalc.getNumEnv()); // if pairing within environments the pair of individuals must // be listed in same random environment // generated, also their offspring must be put in the same // environment as the parents if (matingType_ == Consts.PAIRING_WITHIN_ENVIRONMENTS) { // sample from the population vector until a female is found for (;;) { mIdx = Stats.nextInt(pSize_); if (population_.elementAt(mIdx).getGender() == Consts.FEMALE && population_.elementAt(mIdx).getEnvironment() == environment) break; } // sample from the population vector until a male is found for (;;) { pIdx = Stats.nextInt(pSize_); if (population_.elementAt(pIdx).getGender() == Consts.MALE && population_.elementAt(pIdx).getEnvironment() == environment) break; } // if pairing between evironments only must check that // pairing occurs // between male and female } else if (matingType_ == Consts.PAIRING_BETWEEN_ENVIRONMENTS) { for (;;) { mIdx = Stats.nextInt(pSize_); if (population_.elementAt(mIdx).getGender() == Consts.FEMALE) break; } // sample from the population vector until a male is found for (;;) { pIdx = Stats.nextInt(pSize_); if (population_.elementAt(pIdx).getGender() == Consts.MALE) break; } } // local variable // number of offspring for this mating pair int numOffspringThisMating; // if number of offspring is dependent on the fitness of the // couple // multiply value obtained from Poisson process by twice the // couple's // average percentile rank for fitness if (fertilityFitnessDependence_ & gen_ > 1) { numOffspringThisMating = Math .round(new Float( (population_.elementAt(mIdx) .getFitnessPercentileRank() / 2 + population_ .elementAt(pIdx) .getFitnessPercentileRank() / 2) * 2 * Stats.poisson(meanNumOffspringPerMatingPair_))); } else { // otherwise number of offspring is determined from a // Poisson process // with lambda equal to the desired mean number of offspring // per mating // pair numOffspringThisMating = Stats .poisson(meanNumOffspringPerMatingPair_); } // local variable, n_umber of o_ffspring // calculated using a Poisson process with // lambda // for each offspring in this mating pair... for (int x = 0; x < numOffspringThisMating; ++x) { // create a new Individual // pass mother's gamete -> maternal chromosome // pass father's gamete -> paternal chromosome Individual father = population_.elementAt(pIdx); Individual mother = population_.elementAt(mIdx); //father.randomMutation(mutPerGen_, gen_); //mother.randomMutation(mutPerGen_, gen_); PackedChromosome mGamete = mother.getGamete(isSeqStored_); PackedChromosome pGamete = father.getGamete(isSeqStored_); Individual offspring = new Individual(mGamete, pGamete, population_.elementAt(pIdx).getEnvironment()); offspring.randomMutation(mutPerGen_, gen_); offspring.computeFitness(); // add this new Individual to nextGeneration Vector if (matingType_ == Consts.PAIRING_WITHIN_ENVIRONMENTS) { nextGeneration_.elementAt(offspring.getEnvironment()).add(offspring); } else if (matingType_ == Consts.PAIRING_BETWEEN_ENVIRONMENTS) { for (int i = 0; i < numEnv_; i++) { nextGeneration_.elementAt(i).add(offspring); } } } if (matingType_ == Consts.PAIRING_WITHIN_ENVIRONMENTS) { sizeNextGeneration = 0; for (int i = 0; i < numEnv_; i++) { sizeNextGeneration += nextGeneration_.elementAt(i) .size(); } } else if (matingType_ == Consts.PAIRING_BETWEEN_ENVIRONMENTS) { sizeNextGeneration = nextGeneration_.elementAt(0).size(); } } } } /** * readConfigFile() * * reads data from filename provided at the command line i.e. java Gem * config.in * * each line in the configuration file is a parameter and a value, separated * by whitespace * * @param filename * Name of file containing configuration data * @throws IOException */ public void readConfigFile(final String filename) throws IOException { // used to separate parts of a string by a character // e.g. a list of comma or tab separated values StringTokenizer st; // HashMap of parameters and their values HashMap config = new HashMap(); try { // open the configuration file BufferedReader in = new BufferedReader(new FileReader(filename)); // while configuration file contains data while (in.ready()) { String line = new String(); // read a line from config file line = in.readLine(); st = new StringTokenizer(line); // key is the parameter (i.e. "chromosome_filename") String key = st.nextToken(); // value for parameter (i.e. "NT_011512.fa") String value = st.nextToken(); // add value to config hash config.put(key, value); } } catch (IOException ioe) { throw new IOException(); } try { // if max_size is set, the contig sequence will be shortened // to the specified number of characters truncatedSize_ = (new Integer(config.get("max_size"))).intValue(); // set truncated_ to true (is initialized to false at declaration) truncated_ = true; } catch (Exception e) { // if max_size is not specified, set to maximum value of int // TODO: this probably can be removed. if truncated_ is set to // false then there should be no need to specify a size, because // the value of truncated_ is presumably checked before truncation // (right?) truncatedSize_ = Consts.MAX_INT; } // check if user wants verbose mode; produces extra output to System.out // containing statistics about mating and estimated time to finish the // simulation try { verbose_ = new Boolean(config.get("verbose")); } catch (Exception e) { verbose_ = false; } // Check if user wants each individual to have its sequence stored // internally // as opposed to just storing the mutations from the consensus sequence try { isSeqStored_ = new Boolean(config.get("sequence_storage")); } catch (Exception e) { isSeqStored_ = false; } try { isAlleleShared_ = new Boolean(config.get("allele_sharing")); } catch (Exception e) { isAlleleShared_ = true; } // "fertility-fitness dependence" is a method of running the simulation // whereby the number of offspring for a given mating pair is increased // or decreased // from the value returned by the mating Poisson process according to // whether // the mating pair has high or low fitness (i.e. the most fit mating // pairs have // greater fertility) try { // if user writes "true" for this parameter fertilityFitnessDependence_ = new Boolean( config.get("fertility_fitness_dependence")); } catch (Exception e) { // if parameter is missing, default is false (all mating pairs // follow // the same distribution for number of offspring fertilityFitnessDependence_ = false; } try { crossoverMultiplier_ = (new Float( config.get("crossover_multiplier"))).floatValue(); } catch (Exception e) { crossoverMultiplier_ = 1.0f; } try { MRIConstant_ = (new Float(config.get("MRI_constant"))).floatValue(); } catch (Exception e) { MRIConstant_ = 0; } try { migrationRate_ = (new Float(config.get("migration_rate"))) .floatValue(); } catch (Exception e) { migrationRate_ = 0.0f; } try { String result = config.get("dominance_method"); if (result.equals("weight")){ isDominanceWeighted_ = true; } else { // result defaults to matrix isDominanceWeighted_ = false; } } catch (Exception e) { isDominanceWeighted_ = false; } try { dominanceWeight_ = (new Float(config.get("dominance_weight"))).floatValue(); } catch (Exception e) { dominanceWeight_ = 0.0f; } try { String matingTypeString = new String(config.get("mating_scheme")); if (matingTypeString.equals("within_environments")) matingType_ = Consts.PAIRING_WITHIN_ENVIRONMENTS; if (matingTypeString.equals("between_environments")) matingType_ = Consts.PAIRING_BETWEEN_ENVIRONMENTS; } catch (Exception e) { matingType_ = Consts.PAIRING_BETWEEN_ENVIRONMENTS; } try { nonCdsFitnessShift_ = (new Float(config.get("fitness_shift"))); } catch (Exception e) { nonCdsFitnessShift_ = 0; } try { nonCdsFitnessMultiplier_ = (new Float( config.get("fitness_multiplier"))); } catch (Exception e) { nonCdsFitnessMultiplier_ = 1; } try { nonCdsFitnessMultiplier_ = (new Float( config.get("fitness_multiplier"))); } catch (Exception e) { nonCdsFitnessMultiplier_ = 1; } try { cdsFitnessShift_ = (new Float(config.get("CDS_fitness_shift"))); } catch (Exception e) { cdsFitnessShift_ = 0; } try { cdsFitnessMultiplier_ = (new Float(config.get("fitness_multiplier"))); } catch (Exception e) { cdsFitnessMultiplier_ = 1; } try { codonBiasedFitness_ = (new Float(config.get("codon_biased_fitness"))); } catch (Exception e) { codonBiasedFitness_ = 0; } try { nThreads_ = (new Integer(config.get("number_threads"))); } catch (Exception e) { nThreads_ = 1; } try { fixationFreq_ = (new Integer(config.get("fixation_frequency"))); } catch (Exception e) { fixationFreq_ = 10000; } try { backupFrequency_ = (new Integer(config.get("backup_frequency"))); } catch (Exception e) { backupFrequency_ = 100000; } try { genNum_ = (new Integer(config.get("start_from_backup"))); } catch (Exception e) { genNum_ = 0; } try { numEnv_ = (new Integer(config.get("number_environments"))); } catch (Exception e) { numEnv_ = 1; } try { maxBlockSize_ = (new Integer(config.get("max_block_size"))) .intValue(); } catch (Exception e) { maxBlockSize_ = 10000000; } try { outputFrequency_ = (new Integer(config.get("output_frequency"))); } catch (Exception e) { outputFrequency_ = 10000; } try { numberNonIdenticalEnvDistribtionFilename_ = config .get("non_identical_environments_distribtion_filename"); envNonIdenticalDistribtionFilename_ = config .get("probability_environment_non_identical_distribtion_filename"); } catch (Exception e){ numberNonIdenticalEnvDistribtionFilename_ = "nonIdentical1EnvDist.txt"; envNonIdenticalDistribtionFilename_ = "nonIdentical1EnvProbabilities.txt"; } if (numberNonIdenticalEnvDistribtionFilename_ == null) numberNonIdenticalEnvDistribtionFilename_ = "nonIdentical1EnvDist.txt"; if(envNonIdenticalDistribtionFilename_ == null) envNonIdenticalDistribtionFilename_ = "nonIdentical1EnvProbabilities.txt"; try { percentIdenticalFitness_ = (new Float( config.get("percent_identical_fitness"))); } catch (Exception e) { percentIdenticalFitness_ = 0.80f; } // All of the parameters within this try block MUST be specified within // the // configuration file. No default parameters are assumed. // if any parameter is missing or has an invalid value the program will // exit // with status -1 // see variable declarations at beginning of file for information on // what // each variable does try { dominanceFilename_ = config.get("dominance_filename"); mutPerGen_ = (new Integer(config.get("mutations_per_generation"))) .intValue(); pSize_ = (new Integer(config.get("population_size"))).intValue(); nGen_ = (new Integer(config.get("num_generations"))).intValue(); numDigits_ = (int) (Math.log10(nGen_) + 1); exonFilename_ = config.get("exon_filename"); contigFilename_ = config.get("chromosome_filename"); MRIFilename_ = config.get("MRI_filename"); meanNumOffspringPerMatingPair_ = (new Integer( config.get("offspring_per_mating_pair"))).intValue(); substitutionFilename_ = config.get("mutation_filename"); geneticMapFilename_ = config.get("crossover_filename"); nonCdsfitnessDistributionFilename_ = config.get("fitness_filename"); cdsFitnessDistributionFilename_ = config .get("CDS_fitness_filename"); codonTableFilename_ = config.get("codon_table"); String tmpOutputDirectory = config.get("output_directory"); // if output directory contained ~ change it to home String userHome = System.getProperty("user.home"); outputDirectory_ = tmpOutputDirectory.replace("~", userHome); } catch (Exception e) { e.printStackTrace(); out.println("Error reading config file--one or more necessary parameters is missing"); System.exit(-1); } // Create the output directory File outputDir = new File(outputDirectory_); outputDir.mkdirs(); // creates input stream of original config file BufferedReader origConfig = new BufferedReader(new FileReader(filename)); // create file of the configurations from the input config file and puts // it in the output directory FileWriter configFile = new FileWriter(outputDirectory_ + "configurations.txt"); while (origConfig.ready()) { configFile.write(origConfig.readLine() + "\n"); } configFile.flush(); configFile.close(); } } BitPack.java // Copyright 2010 Andrew McSweeny // All rights reserved package bitpack; import java.util.TreeMap; import java.util.TreeSet; public class BitPack { /* // This class contains a function called crossBlocks which is used to perform // crossover between two packedBlocks // Once sequence data is no longer stored, much of the manipulation of bytes can be // removed, since sequence data will no longer be stored in bytes, or stored at all // for that matter... // However, each block will still store the position of mutations from the reference // sequence as is done here with the PackedBlock member variable mutationIndices_ // Therefore, this function will still be needed to cross two blocks by getting // the mutations up until the crossover point from the first block, getting the // mutations after the crossover point from the second block, and creating a new // block representing a new allele for the locus where crossover has occured */ // converts integer value between 0 and 255 // to unsigned binary representation in byte (0000000 - 1111111) // normal range of byte is -128 to 127 since it uses two's complement // to represent negative numbers public static byte integerToByte( int i){ if ( i > 127){ i-=256; return (byte)i; } else { return (byte)i; } } // same function overloaded to accept a double value public static byte integerToByte( double i){ if ( i > 127){ i-=256; return (byte)i; } else { return (byte)i; } } /** * Performs crossover between two PackedBlocks of nucleotide * data * * Crossover occurs AFTER position, so position will never be * the last character in the sequence * * @param pb1 * @param pb2 * @param position * @return */ public static PackedBlock CrossBlocks (boolean isSeqStored, PackedBlock pb1, PackedBlock pb2, int position){ if (isSeqStored){ int numBytes = pb1.getNumBytes(); byte [] bytes1 = pb1.getBytes(); byte [] bytes2 = pb2.getBytes(); byte [] bytes3 = new byte [numBytes]; // this conditional handles situations where PackedBlocks // can be split by whole bytes // e.g. position == 7, crossover occurs right after end of // 2nd block // so copy first two blocks // then copy the rest of them if (position % 4 == 3 ){ // block where crossover occurs int crossBlockIdx = position / 4 + 1; // copy bytes from 1st PackedBlock into the result block // e.g. crossBlock is at pos 2 (3rd byte) // copy first 2 bytes System.arraycopy(bytes1, 0, bytes3, 0, crossBlockIdx); // copy last bytes 2nd PackedBlock into the result block // e.g. crossBlock is at pos 2 (3rd byte) // start at pos 2 // total number of bytes is 8 (last index is 7) // from pos 2 to pos 7 (2,3,4,5,6,7) is 6 bytes // (numBytes .8.-2) System.arraycopy(bytes2, crossBlockIdx, bytes3, crossBlockIdx, numBytes-crossBlockIdx); PackedBlock pb3 = new PackedBlock(pb1,bytes3); pb3.mutationIndices_ = new TreeSet(pb1.mutationIndices_.headSet(position,true)); pb3.mutationIndices_.addAll(pb2.mutationIndices_.tailSet(position,false)); pb3.computeFitness(); return pb3; } else { // position within crossover block after where crossover // occurs // 3 , 2 , 1 ... will never be 0 int bitPairSignificance = 3-(position % 4); // copy bytes from 1st PackedBlock into the result block // e.g. crossBlock is at pos 2 (3rd byte) // copy first 2 bytes int crossBlockIdx = position / 4; // actually the index of the byte where the crossover is System.arraycopy(bytes1, 0, bytes3, 0, crossBlockIdx); // mask 1 will be &'d with the first half // mask 2 will be &'d with the second half // results will be added byte mask1 = 0; byte mask2 = 0; // array of masks 00000011 - 11000000 byte [] masks = new byte [4]; for (int bps = 0; bps < 4; bps++){ // 3 = 11 binary masks[bps] = BitPack.integerToByte( Math.pow(4, bps)*3); } // position within crossover block after where // crossover occurs // 3 , 2 , 1 ... will never be 0 switch (bitPairSignificance){ case 3: // 128 64 32 16 8 4 2 1 // 7 6 5 4 3 2 1 0 // mask1 = 11000000 = 192 // mask2 = 00111111 mask1 = masks[3]; mask2 = (byte) (masks[2] + masks[1] + masks[0]); break; case 2: // mask1 = 11110000 // mask2 = 00001111 mask1 = (byte) (masks[3] + masks[2]); mask2 = (byte) (masks[1] + masks[0]); break; case 1: // mask1 = 11111100 // mask2 = 00000011 mask1 = (byte) (masks[3] + masks[2] + masks[1]); mask2 = (byte) (masks[0]); break; } // result is the block with proper crossover // copy last bytes 2nd PackedBlock into the result block // e.g. crossBlock is at pos 2 (3rd byte) // start at pos 3 // total number of bytes is 8 (last index is 7) // from pos 3 to pos 7 is 5 bytes (numBytes-3) bytes3[crossBlockIdx] = (byte)( (mask1&bytes1[crossBlockIdx]) + (mask2&bytes2[crossBlockIdx]) ); System.arraycopy(bytes2, crossBlockIdx+1, bytes3, crossBlockIdx+1, numBytes-(crossBlockIdx+1)); PackedBlock pb3 = new PackedBlock(pb1,bytes3); pb3.mutationIndices_ = new TreeSet(pb1.mutationIndices_.headSet(position,true)); pb3.mutationIndices_.addAll(pb2.mutationIndices_.tailSet(position,false)); pb3.computeFitness(); return pb3; } }else{ TreeMap mutations1= pb1.getMutations(); TreeMap mutations2= pb2.getMutations(); TreeMap mutations3= new TreeMap(); int startIdx = pb1.getStartIdx(); mutations3.putAll(mutations1.headMap(startIdx + position,true)); mutations3.putAll(mutations2.tailMap(startIdx + position,false)); PackedBlock pb3 = new PackedBlock(pb1,mutations3); pb3.computeFitness(); return pb3; } } } // Copyright 2010 Andrew McSweeny // All rights reserved package bitpack; import individual.IndividualCreator; import static java.lang.System.out; import java.io.Serializable; import java.util.Arrays; import java.util.TreeMap; import java.util.TreeSet; import dominance.DomStruct; import dominance.Dominance; import substitution.Substitution; import tools.Consts; import engine.Stats; import fitness.FitnessCalc; import bitpack.BitPack; public class PackedBlock implements Serializable{ /** * */ private static final long serialVersionUID = 1577745672050249245L; // This class could more appropriately be called PackedLocus, or Locus, or something // like that. A block is a sub-portion of the sequence that the simulation is using // The sequence is divided into CDS and nonCDS blocks (see Sim.java for more details). // contains the indices of where mutations from the reference sequence are located // within this locus. public TreeSet mutationIndices_; // array of bytes used to store sequence data public byte [] bytes_; // HashMap to store mutations public TreeMap mutations_; // indices of the first and last bases stored in byte data // For example, if a contig sequence of 600 bp is read into the simulation and // broken into equal sized blocks of 200 bp each, the values would be as follows: // block 0: startIdx_ = 0, endIdx_ = 199 // block 1: startIdx_ = 200, endIdx_ = 399 // block 2: startIdx_ = 400, endIdx_ = 599 private int startIdx_; private int endIdx_; // true if this block contains gene(s) coding for proteins // false if this block is intergenic sequence private boolean isCDS_; public boolean isCDS (){ return isCDS_; } public boolean isSeqStored_; // Dominance coefficients for this block // When fitness is calculated for this particular locus, two alleles are taken into // consideration: maternal and paternal. First, the sum of all selection coefficients for // the maternal allele is computed to give Fm and the same sum is computed for the // paternal allele to give Fp // The resulting fitness for this locus is calculated as follows // Flocus = minCoeff*min(Fm,Fp) + maxCoeff(Fm,Fp) + meanCoeff(Fm,Fp) // These coefficients are used to model the properties of dominance // maxCoeff = 1 and minCoeff = 0, mean coeff = 0 models complete dominance // maxCoeff = 0 and minCoeff = 1, mean coeff = 0 models complete negative dominance* // * or perhaps complete recessivity // maxCoeff = 0 and minCoeff = 0, mean coeff = 1 models co-dominance private float minCoefficient_; private float maxCoefficient_; private float meanCoefficient_; // number of letters in this block private int numLetters_; // number of bytes used to store the letters private int numBytes_; // fitness of this block (sum of all selection coefficients) private float [] fitness_; // block number. 0-based: first block for contig sequence is block 0 private int blockNumber_; /* * Getters and setters for fields */ public void setStartIdx(int startIdx) {this.startIdx_ = startIdx;} public void setEndIdx(int endIdx) {endIdx_ = endIdx;} public float getMinCoefficient() { return minCoefficient_; } public void setMinCoefficient(float minCoefficient){ minCoefficient_ = minCoefficient;} public float getMaxCoefficient() {return maxCoefficient_;} public void setMaxCoefficient(float maxCoefficient){ maxCoefficient_ = maxCoefficient;} public float getMeanCoefficient() { return meanCoefficient_;} public void setMeanCoefficient_(float meanCoefficient) { meanCoefficient_ = meanCoefficient;} public int getNumBytes() { return numBytes_;} public int getEndIdx() { return endIdx_;} public int getBlockNumber() { return blockNumber_; } public int getNumLetters(){ return numLetters_; } public byte [] getBytes(){ return bytes_; } public TreeMap getMutations(){return mutations_;} public void setMutations(TreeMap mutations){this.mutations_ = mutations;} public int getStartIdx() { return startIdx_;} /** * Prints sequence of PackedBlock using getLetter() function * Useful to confirm that getLetter() function works */ public void printSequence2(){ for (int x = 0; x < numLetters_; ++x){ System.out.print(this.getLetter(x)); } } public String getSequenceFromBytes(){ StringBuffer tmp = new StringBuffer(""); // Bit pair Significance // Most significant pair first : 3 // second : 2 // third : 1 // // for 11, 3 * 4^3 = 192 = 11000000 // 3 * 4^2 = 48 = 00110000 // 3 * 4^1 = 12 = 00001100 // 3 * 4^0 = 3 = 00000011 // for 10, 2 * 4^3 = 128 = 10000000 // etc if (isSeqStored_){ int bitPairSignificance = 3; int byteIdx = 0; for (int x = 0; x < numLetters_; ++x){ if ( ((byte) BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.G) & bytes_[byteIdx]) == BitPack.integerToByte( Math.pow( 4,bitPairSignificance) *Consts.G )){ tmp.append("G"); } else if ( ((byte) BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.C) & bytes_[byteIdx]) == BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.C )){ tmp.append("C"); } else if ( ((byte) BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.T) & bytes_[byteIdx]) == BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.T )){ tmp.append("T"); } else { tmp.append("A"); } bitPairSignificance--; if ( bitPairSignificance == -1 ){ bitPairSignificance = 3; byteIdx ++; } } } else { } return tmp.toString(); } /** * Prints sequence of PackedBlock without using getLetter() * method * Useful for comparison */ public void printSequence(){ // Bit pair Significance // Most significant pair first : 3 // second : 2 // third : 1 // // for 11, 3 * 4^3 = 192 = 11000000 // 3 * 4^2 = 48 = 00110000 // 3 * 4^1 = 12 = 00001100 // 3 * 4^0 = 3 = 00000011 // for 10, 2 * 4^3 = 128 = 10000000 // etc int bitPairSignificance = 3; int byteIdx = 0; for (int x = 0; x < numLetters_; ++x){ if ( ((byte) BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.G) & bytes_[byteIdx]) == BitPack.integerToByte( Math.pow( 4,bitPairSignificance) *Consts.G )){ System.out.print("G"); } else if ( ((byte) BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.C) & bytes_[byteIdx]) == BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.C )){ System.out.print("C"); } else if ( ((byte) BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.T) & bytes_[byteIdx]) == BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.T )){ System.out.print("T"); } else { System.out.print("A"); } bitPairSignificance--; if ( bitPairSignificance == -1 ){ bitPairSignificance = 3; byteIdx ++; } } } /** * * returns a character from the binary data in the PackedBlock * * @param position 0-based position of the character in the * block * @return character at this position */ public char getLetter(int position){ // convert 0-based index of letter in sequence to index // for a byte int byteIdx = position / 4; // i.e. if position is 5, byte 5/4=0, 5%4=1, // significance = 3-1=2 // 3 2 1 0 3 2 1 0 // 00000000 00110000 // 0 1 2 3 4 5 6 7 // pos int bitPairSignificance = 3 - (position % 4); if ( ((byte) BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.G) & bytes_[byteIdx]) == BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.G )){ return ('G'); } else if (((byte)BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.C) & bytes_[byteIdx])== BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.C )){ return ('C'); } else if ( ((byte) BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.T) & bytes_[byteIdx]) == BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.T )){ return ('T'); } else { return ('A'); } } /** * Computes fitness for the entire PackedBlock using the static method * getFitness from the FitnessGetter class */ public void computeFitness(){ fitness_ = FitnessCalc.getFitness(this); } public float [] getFitness(){ return fitness_; } // hopefully the method of random mutation will be changed in the future // perhaps the best route is the use of matrices like Jukes-Cantor or // something like that .... // currently the method of random mutation is implemented within // the Individual class, which makes method calls to PackedChromosome that // result in method calls to the random mutation functions within the PackedBlock // class (below) // within the individual class // for each random mutation // * the maternal or paternal packedChromosome is selected // * the randomMutation function is called on the chosen chromosome // > the randomMutation function for packedChromosome chooses a bp // location at random somewhere between the start and end of the entire // contig sequence // > this random position is then used to choose a packedBlock // > the starting and ending bases are chosen according // to the probabilities specified in the substitution file // > random positions are sampled within the packedBlock are sampled // until the starting base is found, if over 100 positions are sampled // and the starting base is not yet found, a new substitution is chosen, // and the sampling process is repeated... this can happen if the starting // base is "A", yet the entire block is very GC-rich and an "A" is hard to // find. // OTHER POSSIBILITIES // Another possible is to sample the reference sequence until the desired starting // base is found ... then translate the position to a block. .. if there is already // a mutation at the chosen position in the block, abort and sample the reference // sequence again, translate to block, try again ... etc .. of course this runs // the problem that a substitution cannot occur twice in the same place... public synchronized boolean checkSeq (int position, String s){ StringBuffer newSubstitution = new StringBuffer(); if (isSeqStored_){ if (s.length() == 1){ newSubstitution.append(this.getLetter(position)); if (!newSubstitution.equals(s)) return false; } else if (s.length() == 2){ try{ newSubstitution.append(this.getLetter(position-1)); newSubstitution.append(this.getLetter(position)); if (!newSubstitution.equals(s)) return false; } catch (IndexOutOfBoundsException a ){ return false; } } else { try{ newSubstitution.append(this.getLetter(position-1)); newSubstitution.append(this.getLetter(position)); newSubstitution.append(this.getLetter(position+1)); if (!newSubstitution.equals(s)) return false; } catch (IndexOutOfBoundsException a ){ return false; } } } else { // sequence not stored newSubstitution = new StringBuffer(); if (s.length() == 1){ if (mutations_.containsKey(position)){ newSubstitution.append(mutations_.get(position)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position)); } if (s.charAt(0) != newSubstitution.charAt(0)){ return false; } }else if (s.length() == 2){ try { if (mutations_.containsKey(position-1)){ newSubstitution.append(mutations_.get(position-1)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position-1)); } if (mutations_.containsKey(position)){ newSubstitution.append(mutations_.get(position)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position)); } if (!s.substring(0, 2).equals(newSubstitution.toString())){ return false; } } catch (IndexOutOfBoundsException a){ return false; } }else{ try { if (mutations_.containsKey(position-1)){ newSubstitution.append(mutations_.get(position-1)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position-1)); } if (mutations_.containsKey(position)){ newSubstitution.append(mutations_.get(position)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position)); } if (mutations_.containsKey(position+1)){ newSubstitution.append(mutations_.get(position+1)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position+1)); } if (!s.substring(0, 3).equals(newSubstitution.toString())){ return false; } } catch (IndexOutOfBoundsException a){ return false; } } } return true; } public synchronized boolean randomMutation (String s, int position){ int bitPair; StringBuffer newSubstitution; // if sequence is stored all the information need is in the this packed block // if it is not the consensus sequence must be looked at if (isSeqStored_){ position = position - startIdx_; //TODO: currently does not look for mutations at beginning/end of block newSubstitution = new StringBuffer(); // sorts the string of base pairs for where mutation might occur] // performs the the random mutation slightly differently depending on the input file // it works for single mutations or context mutation // with either 5' context or 5' and 3' context if (s.length() == 2){ newSubstitution.append(this.getLetter(position)); if (s.charAt(0) != newSubstitution.charAt(0)){ return false; } }else if (s.length() == 4){ try{ newSubstitution.append(this.getLetter(position-1)); newSubstitution.append(this.getLetter(position)); if (!s.substring(0, 2).equals(newSubstitution.toString())){ return false; } }catch(IndexOutOfBoundsException a){ return false; } }else{ try{ newSubstitution.append(this.getLetter(position-1)); newSubstitution.append(this.getLetter(position)); newSubstitution.append(this.getLetter(position+1)); if (!s.substring(0, 3).equals(newSubstitution.toString())){ return false; } }catch(IndexOutOfBoundsException a){ return false; } } this.mutationIndices_.add(position); if (s.length() == 2){ if (s.charAt(1) == 'A'){ bitPair = Consts.A; } else if (s.charAt(1) == 'C'){ bitPair = Consts.C; } else if (s.charAt(1) == 'T'){ bitPair = Consts.T; } else {// s.charAt(1) == 'G' bitPair = Consts.G; } }else if (s.length() == 4){ if (s.charAt(3) == 'A'){ bitPair = Consts.A; } else if (s.charAt(3) == 'C'){ bitPair = Consts.C; } else if (s.charAt(3) == 'T'){ bitPair = Consts.T; } else {// s.charAt(1) == 'G' bitPair = Consts.G; } }else{ if (s.charAt(4) == 'A'){ bitPair = Consts.A; } else if (s.charAt(4) == 'C'){ bitPair = Consts.C; } else if (s.charAt(4) == 'T'){ bitPair = Consts.T; } else {// s.charAt(1) == 'G' bitPair = Consts.G; } } int byteIdx = position / 4; int bitPairSignificance = 3 - (position % 4); // i.e. if position is 5, byte 5/4=0, 5%4=1, // significance = 3-1=2 // 3 2 1 0 3*2*1 0 // 00000000 00110000 // 0 1 2 3 4 5 6 7 // position byte bMask = -1; bMask-=((byte)BitPack.integerToByte(Math.pow(4, bitPairSignificance)*3)); /** * Determines which block a character is found in */ float [] oldFitness = FitnessCalc.getFitnessAt(this, position); bytes_[byteIdx] &= bMask; bytes_[byteIdx] += BitPack.integerToByte( Math.pow(4, bitPairSignificance)*bitPair); float [] newFitness = FitnessCalc.getFitnessAt(this, position); for (int j=0; j < FitnessCalc.getNumEnv(); j++){ fitness_[j] -= oldFitness[j]; fitness_[j] += newFitness[j]; } // following is similar to above except that since sequence is not stored // sometime the program will look up the base pair from the consensus sequence }else{ // sequence not stored newSubstitution = new StringBuffer(); if (s.length() == 2){ if (mutations_.containsKey(position)){ newSubstitution.append(mutations_.get(position)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position)); } if (s.charAt(0) != newSubstitution.charAt(0)){ return false; } }else if (s.length() == 4){ try { if (mutations_.containsKey(position-1)){ newSubstitution.append(mutations_.get(position-1)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position-1)); } if (mutations_.containsKey(position)){ newSubstitution.append(mutations_.get(position)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position)); } if (!s.substring(0, 2).equals(newSubstitution.toString())){ return false; } } catch (IndexOutOfBoundsException a){ return false; } }else{ try { //TODO: Currently does not perform correct mutation at beginning/end of block and chromosome if (mutations_.containsKey(position-1)){ newSubstitution.append(mutations_.get(position-1)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position-1)); } if (mutations_.containsKey(position)){ newSubstitution.append(mutations_.get(position)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position)); } if (mutations_.containsKey(position+1)){ newSubstitution.append(mutations_.get(position+1)); }else{ newSubstitution.append(FitnessCalc.consensusSeq_.charAt(position+1)); } if (!s.substring(0, 3).equals(newSubstitution.toString())){ return false; } } catch (IndexOutOfBoundsException a){ return false; } } if (s.length() == 2){ mutations_.put(position, s.charAt(1)); }else if (s.length() == 4){ mutations_.put(position, s.charAt(3)); }else{ mutations_.put(position, s.charAt(4)); } fitness_ = FitnessCalc.getFitness(this); } return true; } // constructor for PackedBlock // used by the bitpack.BitPack function CrossBlocks // template packedBlock is used to make a new packedBlock // recombined sequence data is passed as second argument and stored public PackedBlock(PackedBlock pb, byte [] pbData){ this.isCDS_ = pb.isCDS_; this.maxCoefficient_ = pb.getMaxCoefficient(); this.minCoefficient_ = pb.getMinCoefficient(); this.meanCoefficient_ = pb.getMeanCoefficient(); this.startIdx_ = pb.startIdx_; this.endIdx_ = pb.endIdx_; numLetters_ = pb.numLetters_; numBytes_ = pb.numBytes_; this.blockNumber_ = pb.blockNumber_; this.mutationIndices_ = new TreeSet(pb.mutationIndices_ ); isSeqStored_ = pb.isSeqStored_; this.bytes_ = pbData; } public PackedBlock(PackedBlock pb, TreeMap pbData){ this.isCDS_ = pb.isCDS_; this.maxCoefficient_ = pb.getMaxCoefficient(); this.minCoefficient_ = pb.getMinCoefficient(); this.meanCoefficient_ = pb.getMeanCoefficient(); this.startIdx_ = pb.startIdx_; this.endIdx_ = pb.endIdx_; this.numLetters_ = pb.numLetters_; this.mutations_ = pbData; this.blockNumber_ = pb.blockNumber_; isSeqStored_ = pb.isSeqStored_; } // constructor for PackedBlock // used in bitpack.PackedChromosome randomMutation function // creates an exact copy of another packedBlock public PackedBlock(PackedBlock pb){ this.isCDS_ = pb.isCDS_; this.maxCoefficient_ =pb.getMaxCoefficient(); this.minCoefficient_ = pb.getMinCoefficient(); this.meanCoefficient_ = pb.getMeanCoefficient(); this.startIdx_ = pb.startIdx_; this.endIdx_ = pb.endIdx_; this.numLetters_ = pb.numLetters_; isSeqStored_ = pb.isSeqStored_; if (isSeqStored_){ this.numBytes_ = pb.numBytes_; this.bytes_ = new byte[numBytes_]; System.arraycopy(pb.bytes_,0,this.bytes_,0,numBytes_); this.mutationIndices_ = new TreeSet(pb.mutationIndices_); }else{ this.mutations_ = new TreeMap(pb.getMutations()); } this.fitness_ = new float[IndividualCreator.numEnv_]; System.arraycopy(pb.fitness_,0,this.fitness_,0,IndividualCreator.getNumEnv()); this.blockNumber_ = pb.blockNumber_; } // Constructor for packedBlock used by individual.Creator class // Only used at beginning of program to create blocks // from original sequence, when the sequence is being divided // into blocks depending on whether it is coding or noncoding public PackedBlock (boolean isSeqStored, String sequence, int blockNumber, int startIdx, boolean isCDS){ this.isSeqStored_ = isSeqStored; this.isCDS_ = isCDS; if (isCDS && !Dominance.isDominanceWeighted()){ DomStruct dr = Dominance.getRandomDominance(); this.maxCoefficient_ = dr.getMaxCoeff(); this.minCoefficient_ = dr.getMinCoeff(); this.meanCoefficient_ = dr.getMeanCoeff(); } else { this.maxCoefficient_ = 0; this.minCoefficient_ = 0; this.meanCoefficient_ = 1; } this.fitness_ = new float [IndividualCreator.getNumEnv()]; this.blockNumber_ = blockNumber; this.startIdx_ = startIdx; numLetters_ = sequence.length(); if (isSeqStored){ this.mutationIndices_ = new TreeSet(); numBytes_ = numLetters_ / 4; if ( (numLetters_ % 4) != 0){ numBytes_++; } bytes_ = new byte[numBytes_]; bytes_[0] = 0; // zero out the first byte int bitPairSignificance = 3; int packIdx = 0; for (int i = 0; i < numLetters_; ++i){ if ( Character.toUpperCase(sequence.charAt(i)) == 'C'){ bytes_[packIdx] += BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.C); } else if ( Character.toUpperCase(sequence.charAt(i)) == 'T'){ bytes_[packIdx] += BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.T); } else if ( Character.toUpperCase(sequence.charAt(i)) == 'G'){ bytes_[packIdx] += BitPack.integerToByte( Math.pow(4,bitPairSignificance)*Consts.G); } bitPairSignificance--; if ( bitPairSignificance == -1 ){ bitPairSignificance = 3; if ( packIdx+1 < numBytes_ ){ packIdx ++; bytes_[packIdx] = 0; } } } }else{ mutations_ = new TreeMap(); } } } PackedChromosome.java // Copyright 2010 Andrew McSweeny // All rights reserved package bitpack; import individual.IndividualCreator; import java.io.Serializable; import java.util.Iterator; import java.util.Vector; import substitution.Substitution; import engine.Stats; public class PackedChromosome implements Serializable{ /** * */ private static final long serialVersionUID = -5900584538613085916L; // number of letters in the contig sequence that is loaded private int numLetters_; // number of locus blocks the PackedChromosome has been broken into private int numBlocks_ = 0; // used by Creator class to set numBlocks_ class variable // once all blocks have been divided by Creator class public void setNumBlocks(int numBlocks) {this.numBlocks_ = numBlocks;} // Vector containing blocks public Vector blocks_; public synchronized String getSequence (){ StringBuffer tmp = new StringBuffer(""); Iterator it = blocks_.iterator(); while (it.hasNext()){ tmp.append(it.next().getSequenceFromBytes()); } return tmp.toString(); } public synchronized void randomMutation (){ boolean isSuccessful = false; // generate random mutation String s = Substitution.getPair(); String ntFrom = ""; if (s.length() == 2){ ntFrom = s.substring(0, 1); } else if (s.length() == 4){ ntFrom = s.substring(0, 2); } else if (s.length() == 6){ ntFrom = s.substring(0,3); } PackedBlock pb = null; PackedBlock checkPb = null; int bn = 0; // run until a position is found that can perform the generated mutation while (!isSuccessful){ // clear pb pb = null; // get a random position for the mutation between 0 and numLetters_(exclusive) int position = Stats.nextInt(numLetters_); // get block number for the position bn = IndividualCreator.getPosBlock(position); checkPb = this.blocks_.elementAt(bn); if ( checkPb.checkSeq(position,ntFrom) == false){ continue; } // create a new packedBlock, as the mutation will create // a new allele at this locus pb = new PackedBlock(this.blocks_.elementAt(bn)); // perform a random mutation within the packedBlock isSuccessful = pb.randomMutation(s, position); } // assign new allele to correct position in blocks_ Vector blocks_.setElementAt(null, bn); blocks_.setElementAt(pb, bn); } public int getNumLetters(){ return this.numLetters_; } // Constructor to make an exact copy of a PackedChromosome public PackedChromosome (PackedChromosome pc){ this.numLetters_ = pc.numLetters_; this.numBlocks_ = pc.numBlocks_; blocks_ = new Vector(); for (int i = 0; i < pc.numBlocks_; ++i){// if (IndividualCreator.isAlleleShared()){ this.addBlock(pc.getBlock(i)); }else{ PackedBlock pb = new PackedBlock(pc.getBlock(i)); this.addBlock(pb); } } } // returns the internal Vector of packedBlocks public Vector getBlocks(){ return blocks_; } // Constructor to create an empty PC // used by individual.Creator class public PackedChromosome( int numLetters ){ this.numLetters_ = numLetters; blocks_ = new Vector(); } // add a block to the internal Vector // used by individual.Creator class and // PackedChromosome(PackedChromosome) constructor public void addBlock(PackedBlock pb){ blocks_.add(pb); } // return block number specified by idx public PackedBlock getBlock(int idx){ return blocks_.elementAt(idx); } // assign a packedBlock to index idx in the internal Vector public void setBlock(PackedBlock pb, int idx){ blocks_.setElementAt(null, idx); blocks_.setElementAt(pb, idx); } // return the number of blocks in the packedChromosome public int getNumBlocks() { return numBlocks_; } } DomStruct.java // Copyright 2010 Andrew McSweeny // All rights reserved package dominance; public class DomStruct { // probability of this dominance pattern, i.e. what proportion of loci will // have this pattern float prob_; // coefficients for computing a single fitness value from two values for two alleles float maxCoeff_; float minCoeff_; float meanCoeff_; /* Getters and setters for fields */ public float getMaxCoeff() { return maxCoeff_; } public float getMinCoeff() { return minCoeff_; } public float getMeanCoeff() { return meanCoeff_; } public float getProb() { return prob_; } // Constructor, self explanatory DomStruct( float frequency, float mean, float max, float min){ prob_ = frequency; meanCoeff_ = mean; maxCoeff_ = max; minCoeff_ = min; } } Dominance.java // Copyright 2010 Andrew McSweeny // All rights reserved package dominance; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Iterator; import java.util.Vector; import engine.Stats; // When fitness is computed for an individual, there are two alleles for each locus block // For non-CDS blocks, fitness is the mean of the two alleles // For CDS blocks, fitness is computed according to the fitness for each allele and the pattern of dominance // set for that locus block. The patterns of dominance are assigned to loci during the beginning // of the program by the Creator class. Patterns of dominance are assigned at random, according // to the probability distribution specified in a file. See Sim.java public class Dominance { // Each DomStruct is a set of values for minCoefficient, maxCoefficient, and meanCoefficient // and a probability of that set of values being assigned to a packedBlock static Vector dominanceProbabilities_; static float dominanceWeight_; public static float getDominanceWeight() { return dominanceWeight_; } static boolean isDominanceWeighted_; // static init method for this class; this class is used mostly in static context // called by Sim.init() method, which passes the filename public static void init(float dominanceWeight){ dominanceWeight_ = dominanceWeight; isDominanceWeighted_ = true; } public static boolean isDominanceWeighted() { return isDominanceWeighted_; } public static void init(String filename){ // Vector containing DomStructs // each DomStruct contains a set of values for minCoefficient, maxCoefficient, and meanCoefficient // as well as a probability that this set of values is assigned to a CDS block // For example the program could use a distribution like this: // P(maxCoeff=1,minCoeff=0,meanCoeff=0)=0.85 // P(maxCoeff=0,minCoeff=1,meanCoeff=0)=0.15 dominanceProbabilities_ = new Vector(); isDominanceWeighted_ = false; try { BufferedReader in = new BufferedReader(new FileReader(filename)); String line; while(in.ready()){ line = in.readLine(); if (line.charAt(0) == '#') // lines beginning with # are used for comments within continue; String [] tokens = line.split("\t"); // probability that this dominance pattern will be assigned to a CDS block float probability = (new Float(tokens[0])).floatValue(); // dominance pattern for computing fitness float mean = (new Float(tokens[1])).floatValue(); float max = (new Float(tokens[2])).floatValue(); float min = (new Float(tokens[3])).floatValue(); // create a DomStruct with the probability and values for coefficients DomStruct dp = new DomStruct(probability, mean, max, min); // add the DomStruct to the set of all dominance patterns and their probabilities dominanceProbabilities_.add(dp); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } // returns a DomStruct, containing values for maxCoefficient, minCoefficient, meanCoefficient public static DomStruct getRandomDominance(){ // a value between 0 and 1 that must be exceeded within the while loop below to break out float prob = Stats.nextFloat(); Iterator i = dominanceProbabilities_.iterator(); // summation of all probabilities that are read when iterating through loop float cumProb = 0.0f; DomStruct d = null; while (i.hasNext()){ d = i.next(); // add probability of the current dominance pattern to the summation so far cumProb += d.getProb(); // if the summation of probabilities exceeds the random value (prob), use the // current dominance pattern if (cumProb >= prob) { break; } } return d; } } Crossover.java // Copyright 2010 Andrew McSweeny // All rights reserved package engine; import individual.IndividualCreator; import java.io.BufferedReader; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Map; public class Crossover { static String outputDirectory_; // maximum genetic distance in cM where crossover can occur // last entry in genetic map table should have position of last base pair and maxCm_ for // genetic distance private static double maxCm_=0.0; // A given index gives both position in bp and distance in cM for the genetic map private static int [] bpPosition_; private static double [] cmDistance_; // index of last entry in the above arrays private static int lastIdx_=0; // the simulation uses a genetic map obtained from the HapMap consortium // to simulate crossover. the mean number of crossovers/bp is somewhat low // and therefore deleterious mutations are removed less efficiently than what // is possible with a higher rate of crossover. Therefore, a "crossover multiplier" // allows one to multiply the genetic distances of the map, in cM, by a constant // default = 1 private static float multiplier_=1; public static void setMultiplier(float multiplier) { Crossover.multiplier_=multiplier; } // get a random number of crossovers for a given gametogenesis using a poisson process // where lambda = genetic distance of chromosome in M public static int getNumCrossovers(){ // divide by 100 to get M from cM int numCrossovers = Math.round(Stats.poisson(maxCm_/100*multiplier_)); return numCrossovers; } // Static init function for Crossover class. // takes filename of genetic map as well as size of genome under evaluation // if user supplies a chromosome sequence that is shorter in bp than bp positions at the end // of the genetic map, the genetic map will be shortened accordingly public static void init(String geneticMapFileName, int chromosomeSize){ try { BufferedReader in= new BufferedReader(new FileReader(geneticMapFileName)); String line; int linesRead=0; HashMap hm=new HashMap(); while(in.ready()){ line=in.readLine(); linesRead++; if(linesRead == 1){ continue; // skip first line of header information } String [] tokens=line.split("\t"); // basepair position int bp=new Integer(tokens[0]).intValue(); // genetic distance in centiMorgans double cM=new Double(tokens[1]).doubleValue(); // put the bp, cM pair into a hashmap hm.put(bp, cM); // if a value in genetic map table exceeds chromosome size // break out and use last value read in as the end of the map if(bp > chromosomeSize){ break; } } // get key set from hashMap, which is the bp positions ArrayList keys=new ArrayList(hm.keySet()); // genetic map should be from decreasing to increasing bp/genetic distance // anyways, but sort the bp positions just to be sure Collections.sort(keys); Iterator it=keys.iterator(); // instantiate arrays that will hold bp position/cM distance pairs bpPosition_ = new int[keys.size()]; cmDistance_ = new double[keys.size()]; int i=0; for(i=0; it.hasNext(); i++){ // while iterator for keys (bp positions) has values // store bp position and cmDistance in separate arrays bpPosition_[i] =(it.next()).intValue(); cmDistance_[i]=hm.get(bpPosition_[i]).doubleValue(); } int lastIdx=i-1; // remember the last index used for the two arrays lastIdx_=lastIdx; int lastBpPosition=bpPosition_[lastIdx]; // the cM value for the last base pair in the chromosome must be imputed // this imputation will add or subtract to/from the last cM value in the // table of bp/genetic distances according to the rate of bp/cM calculated // by dividing the (bp difference/cm difference) between the last two entries in the // table int realEnd=chromosomeSize; double realCmDiff=(lastBpPosition-realEnd)*(cmDistance_[lastIdx]-cmDistance_[lastIdx-1]) /(bpPosition_[lastIdx]-bpPosition_[lastIdx-1]); // create final entry in HapMap table at the end of the // genome loaded into the program (i.e. at the final bp position, // create an estimated cM value cmDistance_[lastIdx] -= realCmDiff; maxCm_=cmDistance_[lastIdx]; bpPosition_[lastIdx]= realEnd; } catch(Exception e) { e.printStackTrace(); } } public static int getRandomCrossover() throws Exception { // get a random position in cM between 0 and end of chromosome double randCm=Stats.nextDouble() * maxCm_; // we use a binary search to find the entry in the genetic map table // that is closest to the random position int guessIdxHigh=lastIdx_; int guessIdxLow=0; int guessIdx; // start with a guess at where the correct table entry is int ctr=0; while(true){ ctr ++; if(ctr > 100000){ System.out.println("getRandomCrossover function stuck in infinite loop"); } guessIdx=guessIdxLow + (guessIdxHigh-guessIdxLow) / 2; if(cmDistance_[guessIdx] <= randCm && cmDistance_[guessIdx+1] >= randCm){ break; } if(cmDistance_[guessIdx] < randCm){ // guess was low guessIdxLow=guessIdx; } if(guessIdx > 0 && cmDistance_[guessIdx] > randCm){ // guess was high guessIdxHigh=guessIdx; } } // at this point the binary search is complete and // guessIdx is equal the index in the table for the greatest position in cM // in the table that is less than the random cM position (i.e. the random cM value // is between this index and the next index (guessIdx+1) ) // get bp and cM values for crossover points before and after the random cM position double sCM=cmDistance_[guessIdx]; double eCM=cmDistance_[guessIdx+1]; int sBp=bpPosition_[guessIdx]; int eBp=bpPosition_[guessIdx+1]; // the position in bp of the random cM value is imputed by looking at the // rate of bp/cM between the two crossover points flanking the random cM value Float fltBpAdded=new Float((randCm-sCM)*(eBp-sBp) /(eCM-sCM)); int bpAdded=Math.round(fltBpAdded); int pos=sBp + bpAdded; int finalpos; if(pos < IndividualCreator.getChrSize()) finalpos = pos; else finalpos = IndividualCreator.getChrSize()-1; return finalpos; } } Stats.java // Copyright 2010 Andrew McSweeny // All rights reserved package engine; import java.io.Serializable; import java.util.Random; /* * This class does two important things. First, it contains a single random number generator that is used * throughout the program. I (Andrew) have seen strange things happen when many random number generators * are instantiated and the program is compiled using OpenJDK. It doesn't seem to be a problem when using * the Oracle JDK. * * Second, this class provides a Poisson process that will produce a discrete random integer, 0 or greater, * for a given value of lambda */ public class Stats implements Serializable{ /** * */ private static final long serialVersionUID = -4896492186930205284L; public static Random r_; public static void init(){ // for debugging, used a fixed seed: r = new Random(1000); r_ = new Random(); } public static int nextInt(){ return r_.nextInt(); } public static int nextInt(int n){ return r_.nextInt(n); } public static double nextDouble(){ return r_.nextDouble(); } public static float nextFloat(){ return r_.nextFloat(); } // Poisson process, produces a discrete random integer, 0 or greater, // for a given value of lambda public static int poisson(double c) { // c is the intensity (lambda) int x = 0; double t = 0.0; for (;;) { t -= Math.log(r_.nextDouble())/c; if (t > 1.0) return x; x++; } } } FitStruct.java // Copyright 2010 Andrew McSweeny // All rights reserved package fitness; import engine.Stats; public class FitStruct { float fitness1_ = 0.0f; float fitness2_ = 0.0f; float probability_ = 0.0f; public float getFitness() { float fitness = fitness1_ + Stats.nextFloat()*Math.abs(fitness2_ - fitness1_); return fitness; } public float getProbability() { return probability_; } protected FitStruct(float fitness1, float fitness2, float probability ){ fitness1_ = fitness1; fitness2_ = fitness2; probability_ = probability; } } FitnessCalc.java // Copyright 2010 Andrew McSweeny // All rights reserved package fitness; import static java.lang.System.out; import engine.Stats; import gene.CodonTable; import gene.Exon; import individual.IndividualCreator; import individual.Individual; import java.util.Arrays; import java.util.Iterator; import java.util.TreeMap; import java.util.TreeSet; import java.util.Vector; import java.io.BufferedReader; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.Serializable; import java.lang.Character; import java.lang.Math; import java.lang.reflect.Array; import tools.Consts; import bitpack.PackedBlock; public class FitnessCalc implements Serializable{ /** * */ private static final long serialVersionUID = -8462402711634333079L; /** * Converts integer to byte, unsigned representation * * 00000000 - 01111111 represent 0 - 127 in Java's "byte" * primitive * 10000000 - 11111111 represent -128 to -1 although we want * them to represent 128 - 255. Therefore * we subtract 256 from the integer, i, * to store it in unsigned representation * i.e. 128-256=-128 -> 10000000 * 255-256=-1 -> 11111111 * * @param i integer to store as byte * @return byte representation of integer (unsigned) */ public static byte i_to_ub( int i){ if ( i > 127){ i-=256; return (byte)i; } else { return (byte)i; } } /** * Converts double to byte, unsigned representation * * 00000000 - 01111111 represent 0 - 127 in Java's "byte" * primitive * 10000000 - 11111111 represent -128 to -1 although we want * them to represent 128 - 255. Therefore * we subtract 256 from the integer, i, * to store it in unsigned representation * i.e. 128-256=-128 -> 10000000 * 255-256=-1 -> 11111111 * * @param i double to store as byte, actually not a double * precision number, but a *positive integer*, * probably returned by Math.pow * @return byte representation of i (unsigned) */ public static byte i_to_ub( double i){ if ( i > 127){ i-=256; return (byte)i; } else { return (byte)i; } } // Four arrays, length of each array will be equal to the length of the sequence // used for the chromosome in each Individual. Each array contains selection coefficients // for when a base at the given index is mutated to A, C, T or G // For example if Afits_[127254] = -1.21, when the base at index 127254 // is mutated to "A" there will be a decrease in fitness of -1.21 for the allele at this position public static Vector Afits_; public static Vector Cfits_; public static Vector Tfits_; public static Vector Gfits_; public static StringBuffer consensusSeq_; public static boolean isSeqStored_; //SAM 6/28 fitness offset is now a Vector, equal in size to number of environments //containing public static Vector fitnessOffset_; public static int numEnv_; public static Integer totalNumMutFixed_ = new Integer(0); public static Integer totalNumPolySites_ = new Integer(0); public static float codonBiasedFitness_; public static int getNumEnv() { return numEnv_;} public static int getTotalNumMutFixed() { return totalNumMutFixed_; } public static int getTotalNumPolySites() { return totalNumPolySites_; } public static Vector getFitnessOffset() { return fitnessOffset_; } // constant for G+C rich MRI regions. This constant is added to selection coeffecient for // substitutions promoting GC richness within GC-rich regions, and subtracted for substitutions // promoting AT richness public static float MRIConstant_; public static Vector fitnessProbabilities_; public static Vector CDSfitnessProbabilities_; private static Vector numNonIdenticalProbabilities_; private static Vector NonIdenticalProbabilities_; public static StringBuffer getConsensusSeq() { return consensusSeq_; } public static void readCDSProbabilities(String filename){ CDSfitnessProbabilities_ = new Vector(); try { BufferedReader in = new BufferedReader(new FileReader(filename)); String line; Vector fitnessVector = new Vector(); Vector frequencyVector = new Vector(); Vector areaUnderCurveVector = new Vector(); float totalAreaUnderCurve = 0f; int lineNum = 0; while(in.ready()){ line = in.readLine(); String [] tokens = line.split("\t"); float fitness2 = (new Float(tokens[0])).floatValue(); fitnessVector.add(fitness2); float frequency2 = (new Float(tokens[1])).floatValue(); frequencyVector.add(frequency2); if (lineNum != 0){ Float frequency1 = frequencyVector.elementAt(lineNum-1); Float fitness1 = fitnessVector.elementAt(lineNum-1); Float areaUnderCurve = Math.min(frequency1, frequency2)*(Math.abs(fitness2-fitness1)); areaUnderCurve += Math.abs(.5f*(frequency2-frequency1)*(fitness2-fitness1)); areaUnderCurveVector.add(areaUnderCurve); totalAreaUnderCurve += areaUnderCurve; } lineNum++; } for (int i=0; i < areaUnderCurveVector.size(); i++){ Float fitness1 = fitnessVector.elementAt(i); Float fitness2 = fitnessVector.elementAt(i+1); Float probability = areaUnderCurveVector.elementAt(i)/totalAreaUnderCurve; FitStruct fp = new FitStruct(fitness1, fitness2, probability); CDSfitnessProbabilities_.add(fp); } } catch (IOException e) { e.printStackTrace(); } } public static void readProbabilities(String filename){ fitnessProbabilities_ = new Vector(); try { BufferedReader in = new BufferedReader(new FileReader(filename)); String line; Vector fitnessVector = new Vector(); Vector frequencyVector = new Vector(); Vector areaUnderCurveVector = new Vector(); float totalAreaUnderCurve = 0f; int lineNum = 0; while(in.ready()){ line = in.readLine(); String [] tokens = line.split("\t"); float fitness2 = (new Float(tokens[0])).floatValue(); fitnessVector.add(fitness2); float frequency2 = (new Float(tokens[1])).floatValue(); frequencyVector.add(frequency2); if (lineNum != 0){ Float frequency1 = frequencyVector.elementAt(lineNum-1); Float fitness1 = fitnessVector.elementAt(lineNum-1); Float areaUnderCurve = Math.min(frequency1, frequency2)*(Math.abs(fitness2-fitness1)); areaUnderCurve += Math.abs(.5f*(frequency2-frequency1)*(fitness2-fitness1)); areaUnderCurveVector.add(areaUnderCurve); totalAreaUnderCurve += areaUnderCurve; } lineNum++; } for (int i=0; i < areaUnderCurveVector.size(); i++){ Float fitness1 = fitnessVector.elementAt(i); Float fitness2 = fitnessVector.elementAt(i+1); Float probability = areaUnderCurveVector.elementAt(i)/totalAreaUnderCurve; FitStruct fp = new FitStruct(fitness1, fitness2, probability); fitnessProbabilities_.add(fp); } } catch (IOException e) { e.printStackTrace(); } } public static void readProbabilityNonIdentical(String filename){ NonIdenticalProbabilities_ = new Vector(); try { BufferedReader in = new BufferedReader(new FileReader(filename)); String line; while(in.ready()){ line = in.readLine(); String [] tokens = line.split("\t"); int environment = (new Integer(tokens[0])).intValue()-1; float probability = (new Float(tokens[1])).floatValue(); ProbabilityStruct fp = new ProbabilityStruct(environment, probability); NonIdenticalProbabilities_.add(fp); } } catch (IOException e) { e.printStackTrace(); } } public static void readNumNonIdentical(String filename){ numNonIdenticalProbabilities_ = new Vector(); try { BufferedReader in = new BufferedReader(new FileReader(filename)); String line; while(in.ready()){ line = in.readLine(); String [] tokens = line.split("\t"); int environment = (new Integer(tokens[0])).intValue(); float probability = (new Float(tokens[1])).floatValue(); ProbabilityStruct fp = new ProbabilityStruct(environment, probability); numNonIdenticalProbabilities_.add(fp); } } catch (IOException e) { e.printStackTrace(); } } public static float getRandomCDSFitness(){ float prob = Stats.nextFloat(); // cumulative probability Iterator i = CDSfitnessProbabilities_.iterator(); float cumProb = 0.0f; FitStruct f = null; while (i.hasNext()){ f = i.next(); cumProb += f.getProbability(); if (cumProb >= prob) { break; } } return f.getFitness(); } public static float getRandomFitness(){ float prob = Stats.nextFloat(); // cumulative probability Iterator i = fitnessProbabilities_.iterator(); float cumProb = 0.0f; FitStruct f = null; while (i.hasNext()){ f = i.next(); cumProb += f.getProbability(); if (cumProb >= prob) { break; } } return f.getFitness(); } public static int getRandomNumberNonIdenticalEnv(){ float prob = Stats.nextFloat(); // cumulative probability Iterator i = numNonIdenticalProbabilities_.iterator(); float cumProb = 0.0f; ProbabilityStruct p = null; while (i.hasNext()){ p = i.next(); cumProb += p.getProbability(); if (cumProb >= prob) { break; } } return p.getSingleton(); } public static int getRandomNonIdenticalEnv(){ float prob = Stats.nextFloat(); // cumulative probability Iterator i = NonIdenticalProbabilities_.iterator(); float cumProb = 0.0f; ProbabilityStruct p = null; while (i.hasNext()){ p = i.next(); cumProb += p.getProbability(); if (cumProb >= prob) { break; } } return p.getSingleton(); } /** * Initializes FitnessGetter static object * no longer takes blocksize as a parameter as * block starts and finishes will somehow be used to choose * the correct block and offset * * do we need an individual? no, get it from Creator. * * Creates a matrix of mutation fitness values according to * the fraction beneficial mutations * @throws IOException * */ public static void init (boolean isSeqStored, String sequence, String filename, float fitnessShift, float fitnessMultiplier, String CDSfilename, float CDSfitnessShift, float CDSfitnessMultiplier, float MRIConstant, int numBlocks, int numEnv, float codonBiasedFitness, String numberNonIdenticalEnvDistribtionFilename, String envNonIdenticalDistribtionFilename, float percentIdenticalFitness, String outputDirectory) throws IOException{ isSeqStored_ = isSeqStored; MRIConstant_ = MRIConstant; numEnv_ = numEnv; codonBiasedFitness_ = codonBiasedFitness; System.out.print ("Initializing fitness table ["); readProbabilities(filename); readCDSProbabilities(CDSfilename); readNumNonIdentical(numberNonIdenticalEnvDistribtionFilename); readProbabilityNonIdentical(envNonIdenticalDistribtionFilename); int numFits = sequence.length(); Afits_ = new Vector(); Cfits_ = new Vector(); Tfits_ = new Vector(); Gfits_ = new Vector(); final float fillerVal = 1234567.0f; fitnessOffset_ = new Vector(); for (int i=0; i < numEnv_; i++){ Afits_.add(new float[numFits]); Arrays.fill(Afits_.elementAt(i),fillerVal); Cfits_.add(new float[numFits]); Arrays.fill(Cfits_.elementAt(i),fillerVal); Tfits_.add(new float[numFits]); Arrays.fill(Tfits_.elementAt(i),fillerVal); Gfits_.add(new float[numFits]); Arrays.fill(Gfits_.elementAt(i),fillerVal); } for ( int x = 0; x < numFits; x++){ char seqChar = Character.toUpperCase(sequence.charAt(x)); if ( x % (numFits/10) == 0 ) System.out.print("."); if (IndividualCreator.isExonic_[x]){ if (IndividualCreator.getFrame(x) == 2){ String codon = IndividualCreator.getCodons()[x]; Iterator familyMut = CodonTable.getTripletFamilies().get(codon).iterator(); String synonymousCodon; char mutation; while(familyMut.hasNext()){ synonymousCodon = familyMut.next(); mutation = synonymousCodon.charAt(2); if (mutation == 'A'){ Afits_.elementAt(0)[x] = 0f; } if (mutation == 'C'){ Cfits_.elementAt(0)[x] = 0f; } if (mutation == 'T'){ Tfits_.elementAt(0)[x] = 0f; } if (mutation == 'G'){ Gfits_.elementAt(0)[x] = 0f; } } String optimalTriplet = CodonTable.getOptimalTriplet(codon); String worstTriplet = CodonTable.getWorstTriplet(codon); char optimalMutation = optimalTriplet.charAt(2); char worstMutation = worstTriplet.charAt(2); if (optimalMutation == 'A'){ Afits_.elementAt(0)[x] = codonBiasedFitness_; } if (worstMutation == 'A'){ Afits_.elementAt(0)[x] = -codonBiasedFitness_; } if (optimalMutation == 'C'){ Cfits_.elementAt(0)[x] = codonBiasedFitness_; } if (worstMutation == 'C'){ Cfits_.elementAt(0)[x] = -codonBiasedFitness_; } if (optimalMutation == 'T'){ Tfits_.elementAt(0)[x] = codonBiasedFitness_; } if (worstMutation == 'T'){ Tfits_.elementAt(0)[x] = -codonBiasedFitness_; } if (optimalMutation == 'G'){ Gfits_.elementAt(0)[x] = codonBiasedFitness_; } if (worstMutation == 'G'){ Gfits_.elementAt(0)[x] = -codonBiasedFitness_; } } if (Afits_.elementAt(0)[x] == fillerVal){ Afits_.elementAt(0)[x] = getRandomCDSFitness()*CDSfitnessMultiplier + CDSfitnessShift; } if (Cfits_.elementAt(0)[x] == fillerVal){ Cfits_.elementAt(0)[x] = getRandomCDSFitness()*CDSfitnessMultiplier + CDSfitnessShift; } if (Tfits_.elementAt(0)[x] == fillerVal){ Tfits_.elementAt(0)[x] = getRandomCDSFitness()*CDSfitnessMultiplier + CDSfitnessShift; } if (Gfits_.elementAt(0)[x] == fillerVal){ Gfits_.elementAt(0)[x] = getRandomCDSFitness()*CDSfitnessMultiplier + CDSfitnessShift; } } else { Afits_.elementAt(0)[x] = getRandomFitness()*fitnessMultiplier + fitnessShift; Cfits_.elementAt(0)[x] = getRandomFitness()*fitnessMultiplier + fitnessShift; Tfits_.elementAt(0)[x] = getRandomFitness()*fitnessMultiplier + fitnessShift; Gfits_.elementAt(0)[x] = getRandomFitness()*fitnessMultiplier + fitnessShift; } if (seqChar == 'A') { Afits_.elementAt(0)[x] = 0f; Cfits_.elementAt(0)[x] += IndividualCreator.MRI_[x]*MRIConstant_; Gfits_.elementAt(0)[x] += IndividualCreator.MRI_[x]*MRIConstant_; } else if (seqChar == 'C') { Cfits_.elementAt(0)[x] = 0f; Tfits_.elementAt(0)[x] -= IndividualCreator.MRI_[x]*MRIConstant_; Afits_.elementAt(0)[x] -= IndividualCreator.MRI_[x]*MRIConstant_; } else if (seqChar == 'T') { Tfits_.elementAt(0)[x] = 0f; Cfits_.elementAt(0)[x] += IndividualCreator.MRI_[x]*MRIConstant_; Gfits_.elementAt(0)[x] += IndividualCreator.MRI_[x]*MRIConstant_; } else { // seqChar == 'G' Gfits_.elementAt(0)[x] = 0f; Tfits_.elementAt(0)[x] -= IndividualCreator.MRI_[x]*MRIConstant_; Afits_.elementAt(0)[x] -= IndividualCreator.MRI_[x]*MRIConstant_; } } for (int i=1; i nonIdenticalEnvs = new Vector(); for (int i=0; i envIt = nonIdenticalEnvs.iterator(); while (envIt.hasNext()){ int env = envIt.next(); try{ Afits_.elementAt(env)[x] = fillerVal; Cfits_.elementAt(env)[x] = fillerVal; Tfits_.elementAt(env)[x] = fillerVal; Gfits_.elementAt(env)[x] = fillerVal; }catch (Exception e){ System.out.println("input file has too many environments"); System.exit(-1); } char seqChar = Character.toUpperCase(sequence.charAt(x)); if (IndividualCreator.isExonic_[x]){ if (IndividualCreator.getFrame(x) == 2){ String codon = IndividualCreator.getCodons()[x]; Iterator familyMut = CodonTable.getTripletFamilies().get(codon).iterator(); String synonymousCodon; char mutation; while(familyMut.hasNext()){ synonymousCodon = familyMut.next(); mutation = synonymousCodon.charAt(2); if (mutation == 'A'){ Afits_.elementAt(env)[x] = 0f; } if (mutation == 'C'){ Cfits_.elementAt(env)[x] = 0f; } if (mutation == 'T'){ Tfits_.elementAt(env)[x] = 0f; } if (mutation == 'G'){ Gfits_.elementAt(env)[x] = 0f; } } String optimalTriplet = CodonTable.getOptimalTriplet(codon); String worstTriplet = CodonTable.getWorstTriplet(codon); char optimalMutation = optimalTriplet.charAt(2); char worstMutation = worstTriplet.charAt(2); if (optimalMutation == 'A'){ Afits_.elementAt(env)[x] = codonBiasedFitness_; } if (worstMutation == 'A'){ Afits_.elementAt(env)[x] = -codonBiasedFitness_; } if (optimalMutation == 'C'){ Cfits_.elementAt(env)[x] = codonBiasedFitness_; } if (worstMutation == 'C'){ Cfits_.elementAt(env)[x] = -codonBiasedFitness_; } if (optimalMutation == 'T'){ Tfits_.elementAt(env)[x] = codonBiasedFitness_; } if (worstMutation == 'T'){ Tfits_.elementAt(env)[x] = -codonBiasedFitness_; } if (optimalMutation == 'G'){ Gfits_.elementAt(env)[x] = codonBiasedFitness_; } if (worstMutation == 'G'){ Gfits_.elementAt(env)[x] = -codonBiasedFitness_; } } if (Afits_.elementAt(env)[x] == fillerVal){ Afits_.elementAt(env)[x] = getRandomCDSFitness()*CDSfitnessMultiplier + CDSfitnessShift; } if (Cfits_.elementAt(env)[x] == fillerVal){ Cfits_.elementAt(env)[x] = getRandomCDSFitness()*CDSfitnessMultiplier + CDSfitnessShift; } if (Tfits_.elementAt(env)[x] == fillerVal){ Tfits_.elementAt(env)[x] = getRandomCDSFitness()*CDSfitnessMultiplier + CDSfitnessShift; } if (Gfits_.elementAt(env)[x] == fillerVal){ Gfits_.elementAt(env)[x] = getRandomCDSFitness()*CDSfitnessMultiplier + CDSfitnessShift; } } else { Afits_.elementAt(env)[x] = getRandomFitness()*fitnessMultiplier + fitnessShift; Cfits_.elementAt(env)[x] = getRandomFitness()*fitnessMultiplier + fitnessShift; Tfits_.elementAt(env)[x] = getRandomFitness()*fitnessMultiplier + fitnessShift; Gfits_.elementAt(env)[x] = getRandomFitness()*fitnessMultiplier + fitnessShift; } if (seqChar == 'A') { Afits_.elementAt(env)[x] = 0f; Cfits_.elementAt(env)[x] += IndividualCreator.MRI_[x]*MRIConstant_; Gfits_.elementAt(env)[x] += IndividualCreator.MRI_[x]*MRIConstant_; } else if (seqChar == 'C') { Cfits_.elementAt(env)[x] = 0f; Tfits_.elementAt(env)[x] -= IndividualCreator.MRI_[x]*MRIConstant_; Afits_.elementAt(env)[x] -= IndividualCreator.MRI_[x]*MRIConstant_; } else if (seqChar == 'T') { Tfits_.elementAt(env)[x] = 0f; Cfits_.elementAt(env)[x] += IndividualCreator.MRI_[x]*MRIConstant_; Gfits_.elementAt(env)[x] += IndividualCreator.MRI_[x]*MRIConstant_; } else { // seqChar == 'G' Gfits_.elementAt(env)[x] = 0f; Tfits_.elementAt(env)[x] -= IndividualCreator.MRI_[x]*MRIConstant_; Afits_.elementAt(env)[x] -= IndividualCreator.MRI_[x]*MRIConstant_; } } } // 10/10/2011 removed writing of fitness tables for (int env=0; env pbData = pb.getMutations(); for (int i=0; i < numEnv_; i++){ if (pbData.get(position) == 'G'){ fitness[i] = Gfits_.elementAt(i)[position]; }else if(pbData.get(position) == 'C'){ fitness[i] = Cfits_.elementAt(i)[position]; }else if(pbData.get(position) == 'T'){ fitness[i] = Tfits_.elementAt(i)[position]; }else{ fitness[i] = Afits_.elementAt(i)[position]; } } return fitness; } } /** * * Calculate fitness for an entire PackedBlock * * @param pb PackedBlock for which you need fitness * @param blockNumber blockNumber in the prototype Indvidual * @return fitness value for specified PackedBlock */ public static float [] getFitness (PackedBlock pb){ float [] fitness = new float [numEnv_]; if (isSeqStored_){ Iterator i = pb.mutationIndices_.iterator(); // index of mutation int idx; // array of fitnesses for each population float [] tmpFloat = new float [numEnv_]; // while this packedBlock has more mutations while (i.hasNext()){ // get index of mutation idx = i.next().intValue(); // get fitness array (size = number pops )for mutation at this index tmpFloat = getFitnessAt(pb, idx); for (int j=0; j < numEnv_; j++){ fitness[j] += tmpFloat[j]; } } return fitness; }else{ Iterator i = pb.mutations_.keySet().iterator(); int idx; float [] tmpFloat = new float [numEnv_]; int blockNumber = pb.getBlockNumber(); for (int j=0; j < numEnv_; j++){ fitness[j] = fitnessOffset_.elementAt(j)[blockNumber]; } while (i.hasNext()){ // get index of mutation idx = i.next().intValue(); // get fitness array (size = number pops )for mutation at this index tmpFloat = getFitnessAt(pb, idx); for (int j=0; j < numEnv_; j++){ fitness[j] += tmpFloat[j]; } } return fitness; } } /** * @param pb PackedBlock containing base for which * fitness is needed * @param position position within block (not chromosome * sequence) * @return fitness value for specified position */ public static Vector fixMutations(Vector population){ totalNumPolySites_ = 0; int numBlocks = IndividualCreator.getNumBlocks(); int pSize = population.size(); for (int blockIdx=0; blockIdx < numBlocks; blockIdx++){ TreeMap fixedMutations = new TreeMap (population.elementAt(0).maternal_.blocks_.elementAt(blockIdx).getMutations()); int mutIdx; for (int popIdx=0; popIdx < pSize; popIdx++){ Iterator mutIt = fixedMutations.keySet().iterator(); TreeMap popIdxMutMaternal = population.elementAt(popIdx).maternal_. blocks_.elementAt(blockIdx).getMutations(); TreeMap popIdxMutPaternal = population.elementAt(popIdx).paternal_. blocks_.elementAt(blockIdx).getMutations(); Vector mutNotFixed = new Vector(); while (mutIt.hasNext()){ mutIdx = mutIt.next().intValue(); if (!popIdxMutPaternal.containsKey(mutIdx) || !popIdxMutMaternal.containsKey(mutIdx)){ mutNotFixed.add(mutIdx); } if (popIdxMutPaternal.containsKey(mutIdx) && popIdxMutMaternal.containsKey(mutIdx) && (!popIdxMutPaternal.get(mutIdx).equals(fixedMutations.get(mutIdx)) || !popIdxMutMaternal.get(mutIdx).equals(fixedMutations.get(mutIdx)))){ mutNotFixed.add(mutIdx); } } for (int j=0; j < mutNotFixed.size(); j++){ fixedMutations.remove(mutNotFixed.elementAt(j)); } } // remove old fitness offset at positions where new fixed mutations occur Iterator mutIt = fixedMutations.keySet().iterator(); mutIt = fixedMutations.keySet().iterator(); while (mutIt.hasNext()){ mutIdx = mutIt.next().intValue(); for (int i =0; i < numEnv_; i++){ if (consensusSeq_.charAt(mutIdx) == 'G'){ fitnessOffset_.elementAt(i)[blockIdx] -= Gfits_.elementAt(i)[mutIdx]; }else if(consensusSeq_.charAt(mutIdx) == 'C'){ fitnessOffset_.elementAt(i)[blockIdx] -= Cfits_.elementAt(i)[mutIdx]; }else if(consensusSeq_.charAt(mutIdx) == 'T'){ fitnessOffset_.elementAt(i)[blockIdx] -= Tfits_.elementAt(i)[mutIdx]; }else{ fitnessOffset_.elementAt(i)[blockIdx] -= Afits_.elementAt(i)[mutIdx]; } } } totalNumMutFixed_ += fixedMutations.size(); mutIt = fixedMutations.keySet().iterator(); // set new consensus sequence w/ fixed mutations while (mutIt.hasNext()){ mutIdx = mutIt.next().intValue(); consensusSeq_.setCharAt(mutIdx, fixedMutations.get(mutIdx)); } // remove fixed mutations from population for (int popIdx=0; popIdx < pSize; popIdx++){ mutIt = fixedMutations.keySet().iterator(); while (mutIt.hasNext()){ mutIdx = mutIt.next().intValue(); population.elementAt(popIdx).paternal_.blocks_ .elementAt(blockIdx).mutations_.remove(mutIdx); population.elementAt(popIdx).maternal_.blocks_ .elementAt(blockIdx).mutations_.remove(mutIdx); } } // set new fitness offset mutIt = fixedMutations.keySet().iterator(); while (mutIt.hasNext()){ mutIdx = mutIt.next().intValue(); for (int i =0; i < numEnv_; i++){ if (fixedMutations.get(mutIdx) == 'G'){ fitnessOffset_.elementAt(i)[blockIdx] += Gfits_.elementAt(i)[mutIdx]; }else if(fixedMutations.get(mutIdx) == 'C'){ fitnessOffset_.elementAt(i)[blockIdx] += Cfits_.elementAt(i)[mutIdx]; }else if(fixedMutations.get(mutIdx) == 'T'){ fitnessOffset_.elementAt(i)[blockIdx] += Tfits_.elementAt(i)[mutIdx]; }else{ fitnessOffset_.elementAt(i)[blockIdx] += Afits_.elementAt(i)[mutIdx]; } } } TreeSet polySites = new TreeSet(); for (int popIdx=0; popIdx < pSize; popIdx++){ TreeMap popIdxMutMaternal = population.elementAt(popIdx).maternal_. blocks_.elementAt(blockIdx).getMutations(); TreeMap popIdxMutPaternal = population.elementAt(popIdx).paternal_. blocks_.elementAt(blockIdx).getMutations(); mutIt = popIdxMutMaternal.keySet().iterator(); while (mutIt.hasNext()){ mutIdx = mutIt.next().intValue(); polySites.add(mutIdx); } mutIt = popIdxMutPaternal.keySet().iterator(); while (mutIt.hasNext()){ mutIdx = mutIt.next().intValue(); polySites.add(mutIdx); } } totalNumPolySites_ += polySites.size(); } return population; } } ProbabilityStruct.java package fitness; import engine.Stats; public class ProbabilityStruct { int singleton_ = 0; float probability_ = 0.0f; public int getSingleton() { return singleton_; } public float getProbability() { return probability_; } protected ProbabilityStruct(int singleton, float probability ){ singleton_ = singleton; probability_ = probability; } } CodonTable.java package gene; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Hashtable; import java.util.Iterator; import java.util.Scanner; import java.util.Vector; public class CodonTable { static Hashtable> tripletFamilies_ = new Hashtable>(); static Hashtable tripletFrequency_ = new Hashtable(); static Hashtable optimalTriplet_ = new Hashtable(); static Hashtable worstTriplet_ = new Hashtable(); public static Hashtable> getTripletFamilies(){ return tripletFamilies_; } public static Hashtable getTripletFreqencyTable(){ return tripletFrequency_; } public static String getOptimalTriplet(String codon){ String optimalTriplet = optimalTriplet_.get(codon); return optimalTriplet; } public static String getWorstTriplet(String codon){ String worstTriplet = worstTriplet_.get(codon); return worstTriplet; } public static void readCodonTable(String codonFileName){ Vector familyCharacters = new Vector(); Hashtable> tmpCodonUsage = new Hashtable>(); try { Scanner in = new Scanner(new BufferedReader(new FileReader(codonFileName))); int ctr = 0; String triplet = null; Character letter = null; Float frequency; while(in.hasNext()) { String str = new String(in.next()); if (ctr % 5 == 0){ triplet = new String(str); } if (ctr % 5 == 1){ letter = new Character(str.charAt(0)); if (!tmpCodonUsage.containsKey(letter)){ tmpCodonUsage.put(letter, new Vector()); familyCharacters.add(letter); } } if (ctr % 5 == 2){ frequency = new Float(str); tmpCodonUsage.get(letter).add(triplet); tripletFrequency_.put(triplet, frequency); } ctr++; } in.close(); } catch (IOException ioe){ ioe.printStackTrace(); } // done reading codon table from file Iterator it = familyCharacters.iterator(); String triplet; String synonomousTriplet; Character familyCharacter; while(it.hasNext()){ familyCharacter = it.next(); Iterator tripletIterator = tmpCodonUsage.get(familyCharacter).iterator(); while(tripletIterator.hasNext()){ triplet = tripletIterator.next(); Vector synonomousTriplets = new Vector(); Iterator synonomousTripletIterator = tmpCodonUsage.get(familyCharacter).iterator(); while (synonomousTripletIterator.hasNext()){ synonomousTriplet = synonomousTripletIterator.next(); if (synonomousTriplet.charAt(0) == triplet.charAt(0)){ synonomousTriplets.add(synonomousTriplet); } } tripletFamilies_.put(triplet, synonomousTriplets); } } // find optimal triplet for each aa Iterator tripletIterator = tripletFamilies_.keySet().iterator(); // iterate through all codons String optimalTriplet; while(tripletIterator.hasNext()){ triplet = tripletIterator.next(); optimalTriplet_.put(triplet, triplet); // set optimal codon to cur codon Iterator optimalTripletIterator = tripletFamilies_.get(triplet).iterator(); // iterate through all codon for family while(optimalTripletIterator.hasNext()){ optimalTriplet = optimalTripletIterator.next(); // set current codon to optimal if freq > that which is already stored if (tripletFrequency_.get(optimalTriplet) > tripletFrequency_.get(optimalTriplet_.get(triplet))){ optimalTriplet_.put(triplet, optimalTriplet); } } } // find worst triplet for each aa tripletIterator = tripletFamilies_.keySet().iterator(); String worstTriplet; while(tripletIterator.hasNext()){ triplet = tripletIterator.next(); worstTriplet_.put(triplet, triplet); Iterator worstTripletIterator = tripletFamilies_.get(triplet).iterator(); while(worstTripletIterator.hasNext()){ worstTriplet = worstTripletIterator.next(); if (tripletFrequency_.get(worstTriplet) < tripletFrequency_.get(worstTriplet_.get(triplet))){ worstTriplet_.put(triplet, worstTriplet); } } } } } Exon.java // Exon.java // Copyright 2010 Andrew McSweeny // All rights reserved package gene; import tools.Consts; import tools.Seq; public class Exon { int number_; int start_; int end_; int strand_; int frame_; int idxStart_; int idxEnd_; String sequence_; public int getStart() { return start_; } public int getEnd() { return end_; } public int getIdxStart() { return idxStart_; } public int getIdxEnd() { return idxEnd_; } public int getStrand() { return strand_; } public int getFrame(){ return frame_; } public Exon (int start, int end, int number, int frame, String sequence){ sequence_ = sequence; start_ = start; end_ = end; frame_ = frame; if (start>end){ // minus strand idxEnd_ = Seq.bpToIdx(start); idxStart_ = Seq.bpToIdx(end); strand_ = Consts.MINUS; } else { idxStart_ = Seq.bpToIdx(start); idxEnd_ = Seq.bpToIdx(end); strand_ = Consts.PLUS; } } } Gene.java // Gene.java // Copyright 2010 Andrew McSweeny // All rights reserved package gene; import java.util.Iterator; import java.util.TreeMap; import java.util.Vector; import tools.Consts; public class Gene { private int strand_; private int idxStart_ = Consts.MAX_INT; private int idxEnd_ = 0; public String name_; TreeMap exons_; public int getStrand() { return strand_; } public Exon getExon(int exonNum) { return exons_.get(exonNum); } public int getIdxStart() { return idxStart_; } public int getIdxEnd() { return idxEnd_; } public Gene(String name, int strand){ name_ = name; strand_ = strand; exons_ = new TreeMap(); } public void addExon(int exonNum, Exon e){ exons_.put(exonNum,e); } public void computeIndexes(){ Iterator exonIterator = exons_.keySet().iterator(); while (exonIterator.hasNext()){ Integer exonNum = exonIterator.next(); if (exons_.get(exonNum).getIdxStart() < idxStart_){ idxStart_ = exons_.get(exonNum).getIdxStart(); } if (exons_.get(exonNum).getIdxEnd() > idxEnd_){ idxEnd_ = exons_.get(exonNum).getIdxEnd(); } } } } Individual.java // Copyright 2010 Andrew McSweeny // All rights reserved // 06/28: computeFitness modified to use float[] package individual; import static java.lang.System.out; import java.util.Arrays; import java.util.Iterator; import java.util.TreeSet; import java.util.Vector; import java.io.FileWriter; import java.io.Serializable; import java.lang.System; import dominance.Dominance; import tools.Consts; import engine.Crossover; import engine.Stats; import fitness.FitnessCalc; import bitpack.BitPack; import bitpack.PackedBlock; import bitpack.PackedChromosome; public class Individual implements Serializable{ // fields private static final long serialVersionUID = 7969007602224845329L; // will be set to either Consts.MALE or Consts.FEMALE int gender_; int mutationGen_=-1; // Percentile rank in fitness when compared with the entire population float fitnessPercentileRank_; private int environment_; // maternal and paternal chromosomes public PackedChromosome maternal_; public PackedChromosome paternal_; // Fitness of individual public float [] fitness_; public int getEnvironment() { return environment_; } public void setEnvironment(int environment) { this.environment_ = new Integer(environment); } public float getFitnessPercentileRank() { return fitnessPercentileRank_; } public void setFitnessPercentileRank( float fitnessPercentileRank){ this.fitnessPercentileRank_ = fitnessPercentileRank; } // methods public int getGender() { return gender_; } public void setGender(int gender) { this.gender_ = gender; } // takes a 1 or 0 as an argument, flips 1 to 0, 0 to 1 private static int flip(int one_or_zero){ switch(one_or_zero){ case 0: one_or_zero = 1; break; case 1: one_or_zero = 0; break; } return one_or_zero; } public PackedChromosome getMaternalChromosome(){ return this.maternal_; } public PackedChromosome getPaternalChromosome(){ return this.paternal_; } // returns a packedChromosome representing a gamete for this Individual // Two gametes are joined to create a new Individual using the // Individual (packedChromosome, packedChromsome) constructor public PackedChromosome getGamete(boolean isSeqStored){ int numCrossovers = Crossover.getNumCrossovers(); int numLetters = maternal_.getNumLetters(); int startChr = Stats.nextInt(2); int currentChr = startChr; PackedChromosome gamete = null; PackedChromosome gameteChr = null; PackedChromosome copyChr = null; PackedBlock pbTmp; switch ( startChr ){ case (Consts.MATERNAL): gamete = new PackedChromosome(maternal_); gameteChr = maternal_; copyChr = paternal_; break; case (Consts.PATERNAL): gamete = new PackedChromosome(paternal_); gameteChr = paternal_; copyChr = maternal_; break; } if (numCrossovers == 0) {return gamete;} if (numCrossovers > maternal_.getNumBlocks()) numCrossovers = maternal_.getNumBlocks(); // create an array to hold crossover locations // since crossover occurs between pairs of locations, // last crossover position is the end of the chromosome if (numCrossovers == 0) {return gamete;} // create an array to hold crossover locations // since crossover occurs between pairs of locations, // last crossover position is the end of the chromosome int [] crossovers = new int[numCrossovers+1]; for (int x = 0; x < numCrossovers; x++){ try { crossovers[x] = Crossover.getRandomCrossover(); } catch ( Exception e ){ e.printStackTrace(); } } crossovers[numCrossovers] = numLetters - 1; // sort crossovers, since we will be taking position // pairs from index 0 to numLetters-1 Arrays.sort(crossovers); // randomly determine which chromosome to start with // 0: maternal, 1: paternal int endIdx = -1; int blockIdx = 0; // Crossover loop for (int x = 0; x <= numCrossovers; x++){ // in case crossover occurred within the same block if (x > 0 && IndividualCreator.getPosBlock(crossovers[x]) == IndividualCreator.getPosBlock(crossovers[x-1]) && crossovers[x] != numLetters-1){ blockIdx--; endIdx = crossovers[x]; int offset = endIdx - gamete.getBlock(blockIdx).getStartIdx(); if (currentChr == startChr){ pbTmp = BitPack.CrossBlocks(isSeqStored, gamete.getBlock(blockIdx), copyChr.getBlock(blockIdx), offset); pbTmp.computeFitness(); gamete.setBlock(pbTmp, blockIdx); currentChr = flip(currentChr); blockIdx++; continue; } else { pbTmp = BitPack.CrossBlocks(isSeqStored, gamete.getBlock(blockIdx), gameteChr.getBlock(blockIdx), offset); pbTmp.computeFitness(); gamete.setBlock(pbTmp, blockIdx); currentChr = flip(currentChr); blockIdx++; continue; } } endIdx = crossovers[x]; if (currentChr == startChr) { // last crossover is last position in genome // at this point we are done crossing over and the rest // of the gamete is composed of the starting chromosome // so break out if (endIdx == crossovers[numCrossovers]) break; if (gamete.blocks_.size() == blockIdx) break; // continue to loop through blocks because no blocks need to be changed // current chromosome has already been created while (gamete.getBlock(blockIdx).getEndIdx() < endIdx) { // find block where crossover occurs blockIdx++; } // create a new block w/ part the maternal and part paternal // offset is where crossover occurs within block int offset = endIdx - gamete.getBlock(blockIdx).getStartIdx(); if (gamete.getBlock(blockIdx) != copyChr.getBlock(blockIdx)) { pbTmp = BitPack.CrossBlocks(isSeqStored, gamete.getBlock(blockIdx), copyChr.getBlock(blockIdx), offset); pbTmp.computeFitness(); gamete.setBlock(pbTmp, blockIdx); currentChr = flip(currentChr); } blockIdx++; } else { if (gamete.blocks_.size() == blockIdx) break; while (gamete.getBlock(blockIdx).getEndIdx() < endIdx) { // find block where crossover occurs and // copy blocks leading up to it if // current chromosome not the starting chromosome if(IndividualCreator.isAlleleShared()){ gamete.setBlock(copyChr.getBlock(blockIdx),blockIdx); }else{ gamete.setBlock(new PackedBlock(copyChr.getBlock(blockIdx)),blockIdx); } blockIdx++; } // create a new block w/ part the maternal and part paternal int offset = endIdx - gamete.getBlock(blockIdx).getStartIdx(); // check and see if next crossover occurs in same block if (gamete.getBlock(blockIdx) != copyChr.getBlock(blockIdx)) { pbTmp = BitPack.CrossBlocks(isSeqStored, copyChr.getBlock(blockIdx), gamete.getBlock(blockIdx), offset); pbTmp.computeFitness(); gamete.setBlock(pbTmp, blockIdx); currentChr = flip(currentChr); } blockIdx++; } } return gamete; } public float [] getFitness(){ return fitness_; } // perform random base substitutions on maternal and paternal chromosomes public synchronized void randomMutation(int numMutations, int gen){ if ( gen == mutationGen_) return; mutationGen_ = gen; for (int x = 0; x < numMutations; x++){ switch (Stats.nextInt(2)){ case 0: maternal_.randomMutation(); break; case 1: paternal_.randomMutation(); } } } // Compute fitness for an Individual by examining each locus block // which has two alleles. Use principles of dominance to determine // fitness for each locus according to the alleles present public void computeFitness(){ // fitness = maternal_.getFitness() + paternal_.getFitness(); //SAM 0628: Fitness is now an array, for each environment fitness_ = new float[FitnessCalc.getNumEnv()]; Vector mBlocks = maternal_.getBlocks(); Vector pBlocks = paternal_.getBlocks(); // fitness of maternal and paternal alleles for a particular locus //SAM 0628: Fitness is now an array, for each environment float [] mFitness; float [] pFitness; // loop through loci (locus blocks) for (int x = 0; x < mBlocks.size(); x++){ // get maternal and paternal alleles PackedBlock mBlock = mBlocks.elementAt(x); PackedBlock pBlock = pBlocks.elementAt(x); // SAM 0628: getFitness() now returns a float [] // compute fitness for maternal and paternal alleles mFitness = mBlock.getFitness(); pFitness = pBlock.getFitness(); // coefficients used to calculate fitness for two alleles float maxCoeff = mBlock.getMaxCoefficient(); float minCoeff = mBlock.getMinCoefficient(); float meanCoeff = mBlock.getMeanCoefficient(); // calculate fitness for locus and add to total fitness // for individual for (int j =0 ; j < FitnessCalc.getNumEnv(); j++){ if ( Dominance.isDominanceWeighted()){ if (mBlock.isCDS()){ fitness_[j] += Math.abs(mFitness[j]-pFitness[j])*Dominance.getDominanceWeight() + Math.min(mFitness[j],pFitness[j]); } else { fitness_[j] += (mFitness[j]+pFitness[j])/2; } } else { fitness_[j] += Math.max(mFitness[j], pFitness[j])*maxCoeff + Math.min(mFitness[j], pFitness[j])*minCoeff + (mFitness[j]+pFitness[j])/2*meanCoeff; } } } } // create a new individual from two chromosomes // 06/28: assigns an environment to the individual public Individual (PackedChromosome maternal, PackedChromosome paternal, int environment){ this.environment_ = new Integer(environment); this.gender_ = Stats.nextInt(2); this.maternal_ = new PackedChromosome(maternal); this.paternal_ = new PackedChromosome(paternal); this.computeFitness(); } // create a new individual from two chromosomes // used by Creator class to create first individual public Individual (PackedChromosome maternal,PackedChromosome paternal){ // gender assigned randomly this.gender_ = Stats.nextInt(2); this.maternal_ = new PackedChromosome(maternal); this.paternal_ = new PackedChromosome(paternal); this.computeFitness(); } // make an exact copy of an individual // this constructor is used only at beginning of simulation // to make copies of the "adam" individual to start the initial population public Individual (Individual i){ // gender assigned randomly this.gender_ = Stats.nextInt(2); maternal_ = new PackedChromosome(i.maternal_); paternal_ = new PackedChromosome(i.paternal_); // this constructor only used at beginning of population // initiation //SAM 0628: Individual fitness is now a float[] this.fitness_ = new float [FitnessCalc.getNumEnv()]; environment_ = Stats.nextInt(FitnessCalc.getNumEnv()); } public void printFitness(){ System.out.println( fitness_ ); } } IndividualCreator.java // Creator.java // Copyright 2010 Andrew McSweeny // All rights reserved // 6/28 modified to take number of environments package individual; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Vector; import javax.lang.model.util.SimpleAnnotationValueVisitor6; import javax.swing.text.StyledEditorKit.BoldAction; import dominance.DomStruct; import tools.Consts; import tools.Seq; import bitpack.PackedBlock; import bitpack.PackedChromosome; import gene.Exon; import gene.Gene; /* * This class is used to create the first Individual object which is then copied * to create the starting population for the simulation. This class is necessary * to divide the chromosome sequence into locus blocks. This division into locus * blocks allows faster computation of fitness for individuals, since fitness only * needs to be recomputed within blocks where mutation occurs. */ public class IndividualCreator { /* Fields */ static int numExonicPositions_ = 0; static int numCdsPositions_ = 0; // when filled, provides a probability distribution for dominance patterns // See dominance.Dominance for more information static Vector dominanceProbabilities_; // The first individual created, used as a template for all first // individuals created static Individual adam_; // maximum number of bp to read from .FA file containing contig used to represent // chromosome, default is maximum value of integer (does not stop reading loop) private static int maxSize_ = Consts.MAX_INT; // string representation of chromosome; public static String strChr_; // chromosome is first read in as a character array static char [] chromosome_; static boolean isSeqStored_; static int numBlocks_; // these arrays are same size as sequence // for any given position in bp, tells you which block this position // corresponds to, and what the offset within said block this position // corresponds to (i.e. offset = 0, first base in block) public static int [] posBlock_; public static int [] posOffset_; // number of letters in chromosome contig private static int chrSize_; // Used when creating the first individual. This array alternates between // 0's and 1's to indicate which positions are within the coding portion of a gene: // (The protein, coding portions of CDS exons and introns between CDS exons) // TODO: rename isCDS_ to isGenic_ public static boolean isCDS_[]; // Used when filling the fitness tables in the FitnessCalc // class. The selection coefficients for possible substitutions at given // bp position depend on whether the position is CDS-exonic or untranslated/intergenic public static boolean isExonic_[]; public static String [] CDS_ ; // Array is filled with 0 for non-MRI positions, 1 for GC-rich MRI positions, // and -1 for GC-poor MRI positions public static byte MRI_[]; // frame (phase) for each 0-based bp position in genome public static int [] frame_; // complete codon for each 0-based bp position in exonic sequences public static String [] codons_; // number of environments public static int numEnv_; // Hash of Gene of objects with Gene Name as the key static HashMap genesHash_; private static boolean isAlleleShared_; private static int numGenes_; private static boolean isDominanceWeighted_; /* Methods */ // Reads exon positions from a .txt file. Exon positions are used // to determine starting and ending position of each gene. The entire // chromosome contig sequence is broken into locus blocks. The blocks // are generally alternating intergenic-genic-intergenic-genic etc... public static int getNumBlocks(){ return numBlocks_; } public static boolean isAlleleShared(){ return isAlleleShared_; } public static int getNumEnv(){ return numEnv_; } public static String [] getCodons(){ return codons_; } public static void readExons(String exonFileName, int size){ // contains information for each gene in the file containing exons // including the bp start genesHash_ = new HashMap (); isCDS_ = new boolean[size]; // initialize array so that every position is initially non-exonic isExonic_ = new boolean[size]; posBlock_ = new int[size]; posOffset_ = new int[size]; frame_ = new int[size]; codons_ = new String [size]; Arrays.fill(frame_, -1); Arrays.fill(isExonic_, false); String geneName = null; int exonNum = 0; try { // Open exon file BufferedReader in = new BufferedReader(new FileReader(exonFileName)); String line; String strStrand; int intStrand = 0; while(in.ready()){ line = in.readLine(); String [] tokens = line.split("\t"); // position tokens are the start and end of the exon String [] positionTokens = (new String(tokens[2])).split("\\.\\."); // on the left of the underscore is the genename String [] geneTokens = (new String (tokens[3])).split("_"); // on the right is exon number written as ex1, ex2, etc String [] exonTokens = (new String (geneTokens[1])).split("ex"); // here is the actual exon number exonNum = new Integer(exonTokens[1]); // genename geneName = geneTokens[0]; // get start and end of exon in bp // note, this does not correspond to position within // any String in Java, as positions in file assume // first base has position 1, Java strings are zero-based int start = new Integer(positionTokens[0]); int end = new Integer(positionTokens[1]); // if exon is outside boundaries of chromosomal sequence, // assume it is erroneous, goto next exon if ( start > size || end > size) break; // phase for current exon // Phase 0 means that the exon begins with a complete codon // phase 1 means that the first two bases (5-prime) of the exon // constitute the second and third bases of an interrupted codon. // Phase 2 means the first base of the exon consists of the last // base of an interrupted codon. int phase = new Integer(tokens[4]); // strand of exon -- "plus" or "minus" strStrand = new String (tokens[1]); // the real start of the exon in the String or character array // representing the chromosome being evaluated (the index) int idxStart; // index of end int idxEnd; // the actual sequence of the exon String exonSeq; //TODO: account for possible invalid gene lengths (ie not divisible by 3) if (strStrand.equals("plus") ){ intStrand = Consts.PLUS; idxStart = Seq.bpToIdx(start); idxEnd = Seq.bpToIdx(end); // read sequence of exon exonSeq = strChr_.substring(idxStart, idxEnd+1); for (int i=idxStart, j=0; i <= idxEnd; i++, j++){ frame_[i] = (phase+j) %3; } for (int i=idxStart; i <= idxEnd; i++){ if (frame_[i] == 0 && i <= idxEnd-2){ StringBuffer thisTriplet = new StringBuffer(); thisTriplet.append(chromosome_[i]); thisTriplet.append(chromosome_[i+1]); thisTriplet.append(chromosome_[i+2]); codons_[i] = thisTriplet.toString(); codons_[i+1] = thisTriplet.toString(); codons_[i+2] = thisTriplet.toString(); } if (i == idxStart && phase == 1){ StringBuffer thisTriplet = new StringBuffer(); thisTriplet.append(chromosome_[genesHash_.get(geneName).getExon(exonNum-1).getIdxEnd()]); thisTriplet.append(chromosome_[i]); thisTriplet.append(chromosome_[i+1]); codons_[genesHash_.get(geneName).getExon(exonNum-1).getIdxEnd()] = thisTriplet.toString(); codons_[i] = thisTriplet.toString(); codons_[i+1] = thisTriplet.toString(); } if (i == idxStart && phase == 2){ StringBuffer thisTriplet = new StringBuffer(); thisTriplet.append(chromosome_[genesHash_.get(geneName).getExon(exonNum-1).getIdxEnd()-1]); thisTriplet.append(chromosome_[genesHash_.get(geneName).getExon(exonNum-1).getIdxEnd()]); thisTriplet.append(chromosome_[i]); codons_[genesHash_.get(geneName).getExon(exonNum-1).getIdxEnd()-1] = thisTriplet.toString(); codons_[genesHash_.get(geneName).getExon(exonNum-1).getIdxEnd()] = thisTriplet.toString(); codons_[i] = thisTriplet.toString(); } } } else { intStrand = Consts.MINUS; idxStart = Seq.bpToIdx(end); idxEnd = Seq.bpToIdx(start); exonSeq = new StringBuffer( strChr_.substring( idxStart,idxEnd+1)).toString(); exonSeq = Seq.reverseComplement(exonSeq); String test = Seq.reverseComplement("TACTGA"); for (int i=idxEnd, j=0; i >= idxStart; i--, j++){ frame_[i] = (phase+j) %3; } for (int i=idxEnd; i >= idxStart; i--){ if (frame_[i] == 0 && i >= idxStart+2){ StringBuffer thisTriplet = new StringBuffer(); thisTriplet.append(Seq.baseComp(chromosome_[i])); thisTriplet.append(Seq.baseComp(chromosome_[i-1])); thisTriplet.append(Seq.baseComp(chromosome_[i-2])); codons_[i] = thisTriplet.toString(); codons_[i-1] = thisTriplet.toString(); codons_[i-2] = thisTriplet.toString(); } if (frame_[i] == 0 && i == idxStart+1){ StringBuffer thisTriplet = new StringBuffer(); thisTriplet.append(Seq.baseComp(chromosome_[i])); thisTriplet.append(Seq.baseComp(chromosome_[i-1])); thisTriplet.append(Seq.baseComp(chromosome_[genesHash_.get(geneName).getExon(exonNum+1).getIdxEnd()])); codons_[i] = thisTriplet.toString(); codons_[i-1] = thisTriplet.toString(); codons_[genesHash_.get(geneName).getExon(exonNum+1).getIdxEnd()] = thisTriplet.toString(); } if (frame_[i] == 0 && i == idxStart){ StringBuffer thisTriplet = new StringBuffer(); thisTriplet.append(Seq.baseComp(chromosome_[i])); thisTriplet.append(Seq.baseComp(chromosome_[genesHash_.get(geneName).getExon(exonNum+1).getIdxEnd()])); thisTriplet.append(Seq.baseComp(chromosome_[genesHash_.get(geneName).getExon(exonNum+1).getIdxEnd()-1])); codons_[i] = thisTriplet.toString(); codons_[genesHash_.get(geneName).getExon(exonNum+1).getIdxEnd()] = thisTriplet.toString(); codons_[genesHash_.get(geneName).getExon(exonNum+1).getIdxEnd()-1] = thisTriplet.toString(); } } } // Create a new entry in the hash containing Gene objects // Each Gene object is stored using its name as the key // in the hash. // If there is no key present yet for the gene this // exon belongs to, create a new entry in the hash containing Gene objects if (genesHash_.get(geneName)==null){ Gene g = new Gene(geneName,intStrand); genesHash_.put(geneName, g); } // Create a new Exon object to be added to the corresponding // Gene object Exon e = new Exon(start,end,exonNum,phase,exonSeq); // mark true in the boolean array for every position that is exonic // this array is checked when computing fitness for (int i = e.getIdxStart(); i<=e.getIdxEnd(); i++){ isExonic_[i] = true; numExonicPositions_++; } // add the current Exon to the Gene with the same name genesHash_.get(geneName).addExon(exonNum,e); } // get a list of gene names from the hash ArrayList keys = new ArrayList(genesHash_.keySet()); numGenes_ = keys.size(); // iterate through gene names Iterator it = keys.iterator(); while (it.hasNext()){ String gn = it.next(); // compute the start and end of each Gene, now that all exons have // been added to the gene genesHash_.get(gn).computeIndexes(); for (int i = genesHash_.get(gn).getIdxStart(); i <= genesHash_.get(gn).getIdxEnd(); i++){ // TODO: rename isCDS_ to isGenic_ // set isCDS_ to true from beginning of first codon, through all // exons and introns, up until the last codon isCDS_[i] = true; numCdsPositions_++; } } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println("Exception occured while reading gene file: Current Gene="+geneName+" Current Exon="+exonNum); } } public static HashMap getGenes(){ return genesHash_; } public static int getPosBlock(int pos) { return posBlock_[pos]; } public static int getPosOffset(int pos) { return posOffset_[pos]; } // read file containing locations of G+C rich MRI regions public static void readMRI(String MRIFileName, int size){ // size = number of letters in chromosome contig sequence MRI_ = new byte[size]; try { BufferedReader in = new BufferedReader(new FileReader(MRIFileName)); String line; while(in.ready()){ line = in.readLine(); String [] tokens = line.split("\t"); // get start and end of MRI region int start = Seq.bpToIdx( new Integer(tokens[0]).intValue()); int end = Seq.bpToIdx( new Integer(tokens[1]).intValue()); // if we're only using a small piece of the contig // that the MRI file was designed for, check to make sure // we really need to mark the current entry if (end < size){ if ( tokens[3].equals("GC_rich")){ // mark all positions that are GC rich +1 for (int x = start ; x <= end; x++) MRI_[x] = 1; } else { // mark GC poor positions -1 for (int x = start ; x <= end; x++) MRI_[x] = -1; } // remaining positions in MRI_ remain 0 } } } catch (IOException e) { e.printStackTrace(); } } public static PackedChromosome getMaternalChromsome(){ return adam_.getMaternalChromosome(); } public static int getFrame(int pos) { return frame_[pos]; } // One of the most important functions in program. Creates the first individual! // First individual has chromosome properly divided into CDS and nonCDS loci public static void createIndividual(int maxBlockSize){ int startIdx = 0; int endIdx = 0; int blockNumber = 0; // creates a new packedChromosome to which blocks will be // added PackedChromosome pc = new PackedChromosome(chrSize_); PackedChromosome pcWithSeq = new PackedChromosome(chrSize_); int x = 0; // state of each position in chromosome sequence // true when CDS (and for introns in between), false when non-CDS boolean state = isCDS_[x]; // STATE is false when intergenic int id = 0; // this loop uses the isCDS_ array to divide the chromosome into CDS_ // and non-CDS locus blocks while (x < chrSize_){ int offset = 0; int intergenicSize = 0; // loop until the CDS status of the base changes between true/false while(x < chrSize_ && isCDS_[x] == state ){ // posBlock array gives packedblock number for every position in genome posBlock_[x] = blockNumber; // offset of chromosome position within it's respective block posOffset_[x] = offset; offset++; x++; intergenicSize++; // if intergenic, and size is larger than maximum intergenic size, break // (intergenic regions can be very large and it is sometimes more efficient // computationally to break them into multiple blocks) if (state == false && intergenicSize == maxBlockSize) break; } startIdx=endIdx; endIdx=x; String seq = strChr_.substring(startIdx, endIdx); String seq = strChr_.substring(startIdx, endIdx); // create a new packedBlock based on the starting and ending indexes // sequence, and whether the block is CDS or non-CDS PackedBlock pb; PackedBlock pbWithSeq; if (isSeqStored_){ pb = new PackedBlock(isSeqStored_,seq,blockNumber,startIdx,state); }else{ pb = new PackedBlock(isSeqStored_,seq,blockNumber,startIdx,state); } pb.setStartIdx(startIdx); pb.setEndIdx(endIdx-1); // add the new packedBlock to the packedChromosome, will use // two copies of this packedChromosome to create new Individual pc.addBlock(pb); blockNumber++; // if end of chromosome hasn't been reached, flip state between true/false if (x < chrSize_) state = isCDS_[x]; } numBlocks_=blockNumber; // set the total number of blocks the new packedChromosome contains pc.setNumBlocks(blockNumber); pcWithSeq.setNumBlocks(blockNumber); System.out.println("Number of blocks : " + blockNumber); System.out.println("Number of genes : " + numGenes_); System.out.println("Number of exonic bp : " + numExonicPositions_); // create new individual from two copies of the new packedChromosome adam_ = new Individual (pc,pc); } // returns total number of letters in Individual's chromosome public static int getChrSize() { return chrSize_; } // get String representing chromosome public static String getStrChr() { return strChr_; } // get the default individual, two copies of contig for a chromosome with no // mutations public static Individual getIndividual(){ return adam_; } // init function overloaded for situations where user wants to truncate // the contig being read in from the file to a maximum size public static void init ( String chrFileName, int maxSize, boolean isSeqStored, int numEnv, boolean isAlleleShared, boolean isDominanceWeighted){ maxSize_ = maxSize; init (chrFileName, isSeqStored, numEnv, isAlleleShared, isDominanceWeighted); } /** * Reads nucleotide data from chrFileName into the string strChr * @param chrFileName filename containing a single contig */ public static void init ( String chrFileName , boolean isSeqStored, int numEnv, boolean isAlleleShared, boolean isDominanceWeighted) { StringBuffer sbSeq = new StringBuffer(); isDominanceWeighted_ = isDominanceWeighted; isAlleleShared_ = isAlleleShared; isSeqStored_ = isSeqStored; numEnv_ = numEnv; try { BufferedReader in = new BufferedReader(new FileReader(chrFileName)); String line = null; int linesRead = 0; in = null; in = new BufferedReader(new FileReader(chrFileName)); int counter = 0; while(in.ready()) { line = in.readLine(); if ( line.charAt(0) != '>'){ if ( counter + line.length() > maxSize_){ sbSeq.append(line.substring(0, maxSize_ - counter)); break; } sbSeq.append(line); counter += line.length(); chrSize_ = maxSize_; } } in.close(); in = null; strChr_ = sbSeq.toString(); chromosome_ = sbSeq.toString().toCharArray(); chrSize_ = sbSeq.length(); } catch (IOException ioe){ ioe.printStackTrace(); } } } SubStruct.java // SubStruct.java // Copyright 2010 Andrew McSweeny // All rights reserved // Unchanged since 0.45 package substitution; public class SubStruct { public String startingBase_; public String endingBase_; double probability_; public String getStartingBase() { return startingBase_; } public String getEndingBase() { return endingBase_; } public double getProbability() { return probability_; } SubStruct( String startingBase, String endingBase, double probability) { startingBase_ = startingBase; endingBase_ = endingBase; probability_ = probability; } } Substitution.java // Substitution.java // Copyright 2010 Andrew McSweeny // All rights reserved package substitution; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Iterator; import java.util.Vector; import engine.Stats; public class Substitution { // Vector of substitutions and their probabilities // e.g. P(A->T)=0.039 // probabilities should sum to 1 static Vector mutationProbabilities_; // read probabilities of all possible substitutions from a file public static void init(String mutationFileName){ mutationProbabilities_ = new Vector(); try { BufferedReader in = new BufferedReader(new FileReader(mutationFileName)); String line; while(in.ready()){ line = in.readLine(); String [] tokens = line.split("\t"); String startingBase = tokens[0]; String endingBase = tokens[1]; double probability = (new Double(tokens[2])).doubleValue(); SubStruct mp = new SubStruct(startingBase, endingBase, probability); mutationProbabilities_.add(mp); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } // generate a random substitution, based on the probability distribution // for possible substitution. returns a two character string // e.g. "AC" represents the substitution A->C public static String getPair(){ float prob = Stats.nextFloat(); // cumulative probability Iterator i = mutationProbabilities_.iterator(); float cumProb = 0.0f; SubStruct m = null; while (i.hasNext()){ m = i.next(); cumProb += m.getProbability(); if (cumProb >= prob) { break; } } return new String (m.getStartingBase() + m.getEndingBase()); } } Clock.java // Clock.java // Copyright 2010 Andrew McSweeny // All rights reserved package tools; public class Clock { // Class containing two clocks used to record the amount of time different // portions of the program are taking as well as for estimating how long the // program will take to complete (See "verbose" option in Sim readConfigFile method). static long current; static long start; static long elapsed; static long current2; static long start2; static long elapsed2; // Start the first clock public static void startClock() { start = System.currentTimeMillis(); } // First call: get the elapsed time in milliseconds since the clock was started, // subsequent calls: get the elapsed time in milliseconds since the last call public static long getStepTime(){ current = System.currentTimeMillis(); elapsed = current - start; start = current; return elapsed; } // Start the second clock public static void startClock2() { start2 = System.currentTimeMillis(); } // First call: get the elapsed time in milliseconds since the clock was started, // subsequent calls: get the elapsed time in milliseconds since the last call public static long getStepTime2(){ current2 = System.currentTimeMillis(); elapsed2 = current2 - start2; start2 = current2; return elapsed2; } } Consts.java // Consts.java // Copyright 2010 Andrew McSweeny // All rights reserved // Unchanged since 0.45 package tools; public final class Consts { public static final int PAIRING_WITHIN_ENVIRONMENTS = 0; public static final int PAIRING_BETWEEN_ENVIRONMENTS = 1; public static final int THIRDMATINGTYPE = 2; public static final int PLUS = 0; public static final int MINUS = 1; public static final int MATERNAL = 0; public static final int PATERNAL = 1; public static final int DOMINANT = 2; public static final int RECESSIVE = 1; public static final int INTERGENIC = 0; public static final int MALE = 0; public static final int FEMALE = 1; public static final int MAX_INT = 2147483647; public static final int A = 0; public static final int C = 1; public static final int T = 2; public static final int G = 3; private Consts(){ //prevents the native class from //calling this constructor throw new AssertionError(); } } EasyClock.java // Clock.java // Copyright 2010 Andrew McSweeny // All rights reserved package tools; public class EasyClock { // Class containing two clocks used to record the amount of time different // portions of the program are taking as well as for estimating how long the // program will take to complete (See "verbose" option in Sim readConfigFile method). long current; long start; long elapsed; // Start the first clock public void startClock() { start = System.currentTimeMillis(); } // First call: get the elapsed time in milliseconds since the clock was started, // subsequent calls: get the elapsed time in milliseconds since the last call public int getStepTime(){ current = System.currentTimeMillis(); elapsed = current - start; start = current; return (int)elapsed; } } Seq.java // Sequence.java // Copyright 2010 Andrew McSweeny // All rights reserved package tools; public class Seq { // Converts 1-based positions from data files to indexes in arrays public static int bpToIdx(int pos){ return pos-1; } public static char baseComp(char base){ switch(Character.toUpperCase(base)){ case 'A': return 'T'; case 'C': return 'G'; case 'T': return 'A'; case 'G': return 'C'; default: return '\n'; } } public static String reverseComplement(String query){ StringBuffer result = new StringBuffer(query); String StrResult = new String (result.reverse()); StrResult = StrResult.replace('A', 't'); StrResult = StrResult.replace('T', 'A'); StrResult = StrResult.replace('t', 'T'); StrResult = StrResult.replace('C', 'g'); StrResult = StrResult.replace('G', 'C'); StrResult = StrResult.replace('g', 'G'); return StrResult; } }