#!/usr/local/perl #we modified the main program by removing processing the likelihoods so we get faster program to count dif in two individuals from same population #$chunk =$ARGV[3]*100000 +1; $chunk = $ARGV[2]*1000000 +1; $dir = 'InterpopulationDifferences'; #print "$chunk \n"; mkdir "./$dir", 0750 unless -d "./$dir"; open(OUT1, ">./$dir/$ARGV[0]_$ARGV[1]_$ARGV[2]_A")|| die "Can't open 1_1 output : $!\n"; # open(OUT2, ">./$dir/$ARGV[0]_$ARGV[2]_$ARGV[3]_B")|| die "Can't open 2_2 output : $!\n"; #$str = localtime(); #print OUT1 "$str\n"; #@pair_id = (); #new addition to make arrays of teh IDS for each population 10_15_2013, $ARGV[1] for example will be LWK_IDs.txt open (POP1, "$ARGV[0]") || die "Can't open POP1 : $!\n"; while (){ chomp $_; @e=split(/\t/, $_); push (@pop1, $e[0]); } for $n(0..$#pop1){ print "$pop1[$n]"; } open (POP2, "$ARGV[1]") || die "Can't open POP2 : $!\n"; while (){ chomp $_; @e2=split(/\t/, $_); push (@pop2, $e2[0]); } #here we open the main file that has the heterozygous differences only for the two populations combined. open (FILE1, "JPT_CHB_Genome_HET.txt") || die "Can't open File1 : $!\n"; $line_N = 0; while (){ $line_N++; if ($line_N == 1){ chop $_; @ID = split(/\t/, $_); # $n = 1; foreach (@ID) { print "$n\t$_\n";$n++; } last; } } #for $n(0..$#pop1){ #print " $pop1[$n]\n" #} #new edit to test for two individuals. #@pair_id = ('NA19328', 'NA19312');#TWo GBR individual to test for $x (0..$#pop1) { for $y (0..$#ID) { if ($pop1[$x] eq $ID[$y]) { # print "$pop1[$x]$ID[$y]MYMYMY\n"; push(@columns1, $y); # #end of the edit. } } } #for $n(0..$#columns1){ # print "$columns1[$n]\n"; #} for $x (0..$#pop2) { for $y (0..$#ID) { if ($pop2[$x] eq $ID[$y]) { push(@columns2, $y); # #end of the edit. } } } for $n(0..$#columns2){ print "$columns2[$n]\n"; } open (FILE, "JPT_CHB_Genome_HET.txt") || die "Can't open zipped file chr21 : $!\n"; $line_N = 0; #$w, $z two individuals from the analyzed population(e.g. GBR) @diff = (); while (){ $line_N++;print "$line_N\n"; if ($line_N >= $chunk + 1000000) {last;} if ($line_N > 1 && $line_N >= $chunk) { # print $line_N "\n"; # print $line_N."\n";_ # if ($line_N > 1){ # print "my god\n"; @e = split(/\t/, $_); # print $e[1]."\n"; for $w (0..$#columns1){ for $z (0..$#columns2){ # if ($w < $z){ # @e = split(/\t/, $_); if ($e[$columns1[$w]] =~/^(\d)\|(\d)\:(.+)\:(.+)\,(.+)\,(.+)/){ $p1 = $1 + $2; # print $p1."\n"; } if ($e[$columns2[$z]] =~/^(\d)\|(\d)\:(.+)\:(.+)\,(.+)\,(.+)/){ $p2 = $1 + $2; # print $p2."\n"; } $diff[$w][$z] += abs ($p1 - $p2); # $my = abs ($p1 - $p2); # } #print "$columns1[$w]\t$ID[$columns1[$w]]\t$columns2[$z]\t$ID[$columns2[$z]]\t$diff[$w][$z]\n"; # print $my."\n"; # } } } } } for $w (0..$#columns1) { for $z (0..$#columns2) { # if ($w < $z) { print OUT1 "$columns1[$w]\t$ID[$columns1[$w]]\t$columns2[$z]\t$ID[$columns2[$z]]\t$diff[$w][$z]\n"; # } } } #$str2 = localtime(); #print OUT1 "$str2\n";