#!/usr/local/perl #program created by Alexei Fedorov in Nov 2014. Its goal to calculate locations of shared vrGVs for two individuals #command line for starting the program: perl 2individualsGenomeDif_vrGVs.pl Arg1 Arg2 Arg3 #Arg1 is a fragment of the output filename; Arg2 is identifier of individual #1 from the 1092 sequenced genomes; Arg3 is the indentifier of the second individual #example of the command line: nohup perl GenomeDiff_2ind_vrGV_v3.pl CHS_CHS NA18548 NA18567 & #the output file for this example will have the name: Table5S_CHS_CHS_NA18548_NA18567 $c=0; @b =(); open(OUT, ">TableS5_$ARGV[0]_$ARGV[1]_$ARGV[2]"); open(IN, "1000genomes_ID"); while() { @b=split(/\t/, $_); } print "$#b \n"; @pair_id = ($ARGV[1], $ARGV[2]); @columns2 =(); for $x (0..$#b) { for $y (0..$#pair_id) { if ($b[$x] eq $pair_id[$y]) {push(@columns2, $x); print "$x \t $b[$x] \t $#columns2 \n";} } } print OUT "columns2 $columns2[0] \t $columns2[1] \n"; $match =0; #open (FILE, "zcat /home/asahama/ALL.chr12.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz |") || die "could not open file \n"; for $L (1..22) { $name = '/home/afedorov/rareSNP_chr'.$L.'.gz'; open (FILE, "zcat $name |") || die "could not open file \n"; $line_N =0; #$w, $z two individuals from the analyzed population(e.g. GBR) while (){ $line_N++; if ($line_N > 30) { $count = () = $_ =~ /\|1/g; $count2 = () = $_ =~ /1\|/g; if ($count + $count2 >4 ){next;} @e=split(/\t/, $_); if ($e[$columns2[0]] =~/^(\d)\|(\d)/){$p1 = $1 + $2;} else {print "problem with the entry $e[$columns2[0]] \n";} if ($e[$columns2[1]] =~/^(\d)\|(\d)/){$p2 = $1 + $2;} else {print "problem with the entry $e[$columns2[1]] \n";} if ($p1 >0 && $p2 >0) {print OUT "CHR$e[0]\t$e[1]\t$e[2]\t$e[$columns2[0]]\t$e[$columns2[1]]\t$e[3]\t$e[4]\n"; $match++; } } } } print OUT "number of matches of vrGVs is $match \n";