#!/usr/local/perl @pop_hap=('0'); #population_haplotypes #this program started in Oct 2013 by Alexei Fedorov to make computer modeling of the total SNP difference between pairs of individuals from the same population that have the same frequecy of haplotypes. #assume that each gene has 3 haplotypes A, B, and C with the frequencies of 0.15; 0.35; and 0.50. haplotype A has 5 SNPs; B - has 10 SNPs; C has 15 SNPs. #rand generator assignes two haplotypes for each person #October 02, 2013. in this version relatives among individuals are introduced It could be siblings, cousines, common grate-grand-parent, etc. #by default the first three individuals will have common ancestry with the last three individuals $probability = 1.0; # this $probability measures the distance for ancestry in percentage. #for siblings $probability = 0.50; for cousins = 0.25; for second cousins =0.125; etc $N =92; #$N is the number of persons in the population %snp = ( 'A' => 550, 'B' => 400, 'C' => 150, 'D' => 450 ); #hash for number of SNPs in haplotypes; @pair_dif =(); #2D array of differences between 2 individuals $g =5000; #number of loci in equillibrium in the modeling genomes; $N_relative_pairs = 3; # how many pairs of relatives the program creates $cycle =0; $fileOUT = 'ResultPopModel_new_' . $g .'_' . $probability; open(OUT, ">$fileOUT"); print OUT "loci $g \t population sise $N \t relationship probablility $probability \t One parent mode \n"; while ($cycle < $g) { $cycle++; @pop_hap=('0'); print $cycle, "\n"; @array = &rand_array; for $n (1..$N) { $h1 = &haplotype(@array); if($n <=$N_relative_pairs) {${'common1'.$n} = $h1;} $h2 = &haplotype(@array); if($n <=$N_relative_pairs) {${'common2'.$n} = $h2;} if ($n>($N - $N_relative_pairs)) { $ind = $N-$n+1; #$ind stands for individual $rand =rand; if ($rand < $probability) {$h1 = ${'common1'.$ind};} #print "rand $rand \n"; # $rand =rand; if ($rand < $probability) {$h2 = ${'common2'.$ind};} #should be activated for siblings/cousins } $h = $h1 . $h2; push(@pop_hap, $h); # print "person $n, $h \n"; $h=''; } for $x (1..($N-1)){ for $y ($x..$N){ $dif =0; $man1 = $pop_hap[$x]; $man2 = $pop_hap[$y]; @character =split('', $man2); if ($man1 =~/$character[0]/) {$man1=~s/$character[0]//; $character[0] ='';} if ($man1 =~/$character[1]/) {$man1=~s/$character[1]//; $character[1] ='';} $man1.=$character[0] . $character[1]; @hap_dif =split('', $man1); foreach $z (@hap_dif) { $dif+=$snp{$z}; } $pair_dif[$x][$y] +=$dif; if ($cycle == $g) { print OUT "pair\t$x\t$y\t$pair_dif[$x][$y]\n"; } } } } sub rand_array { @percentage = (); for $q (1..4) { push(@percentage, rand); } @rand = sort {$a <=> $b} @percentage; #print @rand, "\n"; return @rand; } sub haplotype { $r = rand; if ($r<$_[0]) {$haplotype = "A";} elsif ($r<$_[1]) {$haplotype = "B";} elsif ($r<$_[2]) {$haplotype = "C";} else {$haplotype = "D";} return ($haplotype); }