#!/usr/local/perl #we modified the main program by removing processing the likelihoods so we get faster program to count dif in two individuals from same population #$chunk =$ARGV[3]*100000 +1; $chunk = $ARGV[2]*1000000 +1; $dir = 'LWK_JPT_Rare_April20_Chunks'; mkdir "./$dir", 0750 unless -d "./$dir"; open(OUT1, ">./$dir/$ARGV[0]_$ARGV[1]_$ARGV[2]Aa")|| die "Can't open 1_1 output : $!\n"; # open(OUT2, ">./$dir/$ARGV[0]_$ARGV[2]_$ARGV[3]_B")|| die "Can't open 2_2 output : $!\n"; $str = localtime(); print OUT1 "$str\n"; #@pair_id = (); $l_N = 0; @id =(); @b =(); open(IN, "20111108_1000genomes_samples2.txt") || die "Can't open 20111108 : $!\n"; while() { $l_N++; if ($l_N > 26) { if ($_ =~/^($ARGV[0])/){ # if ($_ =~/^($ARGV[0])/ || /^($ARGV[1])/){#i modified this program by adding || /^($ARGV[1]) soi can make a table of two populations. $c++; @a=split(/\t/, $_); push(@pop1, $a[2]); } if ($_ =~/^($ARGV[1])/){ # if ($_ =~/^($ARGV[0])/ || /^($ARGV[1])/){#i modified this program by adding || /^($ARGV[1]) soi can make a table of two populations. $d++; @b=split(/\t/, $_); push(@pop2, $b[2]); } } } # this part to Print test #for $n(0..$#pop1){ #print "$n\t$pop1[$n] \n"} #for $y(0..$#pop2){ #print "$y\t$pop2[$y] \n" #} open (FILE, "zcat /home/ahmed/LWK_JPT_RareSNPs_Table.txt.gz |") || die "can't open head100_1000genome_testfile :$!\n"; while (){ $L_N2++;#print "$l_N\n"; if ($L_N2 == 1) { @e=split(/\t/, $_); for $s(0..$#e){print "$e[$s]\n"; for $y(0..$#pop1){#print "$pop1[$y]\n"; if ($pop1[$y] eq $e[$s]){#print "equal\n"; push (@col_pop1, $s); } } } for $z(0..$#e){print "$e[$s]\n"; for $w(0..$#pop2){#print "$pop1[$y]\n"; if ($pop2[$w] eq $e[$z]){#print "equal\n"; push (@col_pop2, $z); } } } }last; } #Print test "it should print 89 IDs for GBR for example" for $n(0..$#col_pop1){ #print "$n\t$col_pop1[$n] \n" } for $n(0..$#col_pop2){ #print "$n\t$col_pop2[$n] \n" } # open (FILE1, "zcat /home/ahmed/LWK_JPT_RareSNPs_Table.txt.gz |") || die "Can't open RareSNPs table file : $!\n"; $line_N = 0; while (){ $line_N++; if ($line_N == 1){ chop $_; @ID = split(/\t/, $_); $n = 1; # to test that we are getting the IDs line foreach (@ID) { print "$n\t$_\n";$n++; } last; } } #new edit to test for two individuals. #@pair_id = ('NA19328', 'NA19312');#TWo GBR individual to test for $x (0..$#ID) { for $y (0..$#ID) { if ($ID[$x] eq $ID[$y]) { push(@columns, $x); # #end of the edit. } } } #for $n(0..$#columns){ #print "ahmed$columns[$n]\n";} # # open (FILE3, "zcat /home/ahmed/LWK_JPT_RareSNPs_Table.txt.gz |") || die "Can't open zipped file chr21 : $!\n"; $line_N = 0; #$w, $z two individuals from the analyzed population(e.g. GBR) @diff = (); while (){ $line_N++; if ($line_N >= $chunk + 1000000) {last;} if ($line_N > 1 && $line_N >= $chunk) { #if ($line_N > 1) {print "$line_N LWK_FIN\n";} if ($line_N%1000 ==0 ) {print "$line_N LWK_JPT\n";} if ($line_N > 1){ # print "my god\n"; @e = split(/\t/, $_); # print $e[1]."\n"; for $w (0..$#col_pop1){#print "ID1\t$col_pop1[$w]\n"; for $z (0..$#col_pop2){#print "ID2\t$col_pop2[$z]\n"; #if ($w < $z){ # @e = split(/\t/, $_); # 1.To calculate Differneces between each two individuals. if ($e[$col_pop1[$w]] =~/^(\d)\|(\d)\:(.+)\:(.+)\,(.+)\,(.+)/){ $p1 = $1 + $2; # print $p1."\n"; } if ($e[$col_pop2[$z]] =~/^(\d)\|(\d)\:(.+)\:(.+)\,(.+)\,(.+)/){ $p2 = $1 + $2; # print $p2."\n"; } #to calcualte total differences # $diff[$w][$z] += abs ($p1 - $p2); # $my = abs ($p1 - $p2); # print $my."\n"; # End of 1. # to calcualte shared rare SNPS. if ($p1 == 1 && $p2 == 1){$one_one_common[$w][$z]++; # print "found common \n"; } if ($p1 == 2 && $p2 == 2){$two_two_common[$w][$z]++;} if (($p1 == 2 && $p2 == 1) || ($p1 == 1 && $p2 == 2)){$two_one_common[$w][$z]++;} # } } } } } } for $w (0..$#col_pop1) { for $z (0..$#col_pop2) { # if ($w < $z) { #to print total differences : # print OUT1 "$columns[$w]\t$columns[$z]\t$diff[$w][$z]\n"; # to Print Shared rare SNPs.: $Total = $one_one_common[$w][$z] + ($two_two_common[$w][$z]* 2) + ($two_one_common[$w][$z]); print OUT1 "$col_pop1[$w]\t$ID[$col_pop1[$w]]\t$col_pop2[$z]\t$ID[$col_pop2[$z]]\tOne_One:\t$one_one_common[$w][$z]\tTwo_Two:\t$two_two_common[$w][$z]\tTwo_One:\t$two_one_common[$w][$z]\t $Total\n"; #} } } $str2 = localtime(); print OUT1 "$str2\n";