#!/bin/perl #-------------------------------------------------------- # # Porthi # Ver 1.07 SPL 07.09.05 # # This is a program for creating orthologous introns # see Saxonov et al NAR (2000)28,N1,185-190. # # Made from Peidd4 # # # assuming DOS-style text file formats (0D0A - EOL string) # for UNIXlike style set $txt_file_style=1 # # call Porthi input_file_name [>out_put_file_name] # #-------------------------------------------------------- # last minute change : no proceedings use strict; my $ifn; my $arg_err=0; if (@ARGV ==1) {$ifn =$ARGV[0]} # else while(<>) {my $ifn =chomp}; else {$arg_err=1;} #$ifn='/cygdrive/d/cygwin/usr/home/spl/DATA/h4d500q.txt'; # test file first 500 lines of homo2004.dEID. #$ifn='/cygdrive/d/cygwin/usr/home/spl/h4d10l.txt'; #$ifn='/cygdrive/d/cygwin/usr/home/spl/h4d14.txt'; #$ifn='/cygdrive/d/cygwin/usr/home/spl/DATA/h4d18.txt'; my $txt_file_style=1;# =0 DOS-like style; =1 UNIX-like style my $c_e=0; my $c_l=0; my $c_line=''; my $c_entry_found=0; my @c_filds; my @c_err; my $c_sq=''; my $last_id=''; my $err_flg=0; my $prev_f_line=''; my $eol_str; if ($txt_file_style==0) {$eol_str="\r\n";} else {$eol_str="\n";} my $acc_indent=' '; my $debug_c_sub=0; my $c_err_flg=0; my $err_c=0; my @intron_phase; my @intron_size; my $intr_sum; my @exon_size; my $ex_sum; my @splice_arr; my @introns; my @exons; my @splices; my $TCAGN='TCAGN'; my $tcagn='tcagn'; my $t_1=time; my $c_sq2=''; my @c_term_stat=(0,0,0); my @glob_term_stat=(0,0,0); my @glob_phase_stat=(0,0,0,0); # 0 1 2 1+2 my @glob_exon_stat=(0,0); #^ ^closed #!--open my $glob_closed_exon_entry_num=0; my $aux_sum_ph=0; my @exon_len_stat=qw/0*202/; my @intr_len_stat=qw/0*202/; my $glob_exon_len_sum=0; my $glob_intr_len_sum=0; my $in_temp_file_num=0; my $in_temp_file_query=''; my $in_temp_file_sbjct=''; my $i1_line=''; my $i2_line=''; my $SQ1=''; my $SQc=''; my $SQ2=''; my $c_start1=0; my $c_start2=0; my %cast_hash; my $p1FN=''; my $p2FN=''; my $d1FN=''; my $d2FN=''; my $TEMPORARY_DIR=''; my $O3FN=''; my $ORTHDBFN=''; #s07 my %O3_hash; my %p1_hash; my %p2_hash; my %d1_hash; my %d2_hash; #statistics my $temp_files_read=0; my $total_orth_introns_found=0; my $strict_phase_total_orth_introns_found=0; my %introns_qual_stat_hash; my $c_common_in_output3=0; my $common_intron_discrep_c=0; my $temp_file_content=0; my $orth_intron_found=0; #print 'ifn=',$ifn,$eol_str; #open (OUT, ">test") || die "Can't open: test $!",$eol_str; if ($arg_err == 0) {open (IFV, $ifn) || die "Can't open: $ifn $!",$eol_str;} #$/ = "\n>"; if ($arg_err != 0) {$ifn='[not defined]'} &write_header; if ($arg_err != 0) {die 'invalid number of parameters in program call',$eol_str;} &init_cat_hash; &reading_main_file; &chech_cat_hash_and_open_files; &reading_p1_file; &reading_p2_file; &reading_d1_file; &reading_d2_file; &reading_output3_file; &handle_output3_file; &print_statistics_2; #&print_statistics; close(ORTHDBFV); exit; exit; while () { $c_line=$_; $c_l++; #print $c_line; #debug #&press_any_key; if (($c_line =~/^>/ ) && ($err_flg==0)) {if (($c_line =~/^> \d+_/) && ($err_flg==0)) {if ($c_entry_found==1) {&handle_entry_acc($prev_f_line);} {$c_e++; $c_entry_found=1;$err_flg=0;$prev_f_line=$c_line} } else {#$err_flg=1; print 'unexpected > in line ',$c_l,$eol_str;} # strange compilation error dependend on line formatting here } else {if ($c_entry_found==1) {&add_row_sq_line($c_line);} } }; # end of IFV &handle_entry_acc($prev_f_line); print 'total lines read ',$c_l,$eol_str; print 'number of entires=',$c_e,' number of invalid format=',$err_c,$eol_str; printf "total phase 0: %6g phase 1: %6g phase 2: %6g",@glob_term_stat; print $eol_str; printf "total free phase 0: %6g phase 1: %6g phase 2: %6g phase 1+2: %6g", @glob_phase_stat; print $eol_str; $aux_sum_ph=$glob_phase_stat[0]+$glob_phase_stat[1]+$glob_phase_stat[2]-$glob_phase_stat[3]; print "total free phase sum (ph0+ph1+ph2-ph(1+2))=",$aux_sum_ph,$eol_str; print 'number of invalid CDS format=',$c_e-$err_c-$aux_sum_ph,$eol_str; print 'total open exons=',$glob_exon_stat[0],' closed=',$glob_exon_stat[1],$eol_str; print 'total entries with closed exons=',$glob_closed_exon_entry_num,$eol_str; &print_exon_len_stat; &print_intr_len_stat; print 'exec time=',get_hour_min_sec_time_elapsed_line(time),$eol_str; # $t_1=time; # &tst_cir; print 'tst OK',$eol_str; # print 'tst_cir time=',get_hour_min_sec_time_elapsed_line(time),$eol_str; # # $t_1=time; # &surplaceaa; # print 'surplace time=',get_hour_min_sec_time_elapsed_line(time),$eol_str; # # $t_1=time; # &tst_cir_while; print 'tst OK',$eol_str; # print 'tst_cir_while time=',get_hour_min_sec_time_elapsed_line(time),$eol_str; print 'END of report',$eol_str; #close(OUT); close(IFV); exit; #---- END OF MAIN ----- #---- lib ----- sub init_cat_hash { %cast_hash=("pEID1"=>undef, "pEID2"=>undef, "dEID1"=>undef, "dEID2"=>undef, "TEMPORARY"=>"", "OUTPUT3FILE"=>undef, "ORTHOLOG_DB"=>undef) #s07 } #---------------------- sub chech_cat_hash_and_open_files { if (defined $cast_hash{"pEID1"}) {$p1FN=$cast_hash{"pEID1"}; open (p1FV, $p1FN) || die "Can't open: $p1FN $!",$eol_str;} if (defined $cast_hash{"pEID2"}) {$p2FN=$cast_hash{"pEID2"}; open (p2FV, $p2FN) || die "Can't open: $p2FN $!",$eol_str;} if (defined $cast_hash{"dEID1"}) {$d1FN=$cast_hash{"dEID1"}; open (d1FV, $d1FN) || die "Can't open: $d1FN $!",$eol_str;} if (defined $cast_hash{"dEID2"}) {$d2FN=$cast_hash{"dEID2"}; open (d2FV, $d2FN) || die "Can't open: $d2FN $!",$eol_str;} if (defined $cast_hash{"TEMPORARY"}) {$TEMPORARY_DIR=$cast_hash{"TEMPORARY"}; if ($TEMPORARY_DIR eq "") {$TEMPORARY_DIR="."} print '$TEMPORARY_DIR=',$TEMPORARY_DIR,$eol_str; # &press_any_key; open (temp_DIRV, $TEMPORARY_DIR) || die "Can't open dir: $TEMPORARY_DIR $!",$eol_str;} if (defined $cast_hash{"OUTPUT3FILE"}) {$O3FN=$cast_hash{"OUTPUT3FILE"}; open (O3FV, $O3FN) || die "Can't open: $O3FN $!",$eol_str;} if (defined $cast_hash{"ORTHOLOG_DB"}) {$ORTHDBFN=$cast_hash{"ORTHOLOG_DB"}; open (ORTHDBFV, ">".$ORTHDBFN) || die "Can't open: $ORTHDBFN $!",$eol_str;} #s07 } #---------------------- sub reading_main_file { my @c_f; my @c_f2; while () {if (substr($_,0,1) ne '*') {if (/\w+/) { print $_; # &press_any_key; $_=del_eol($_); # @c_f= split /\*/,$_; # @c_f2=split /=/,$c_f[0]; # if (undef($c_f2[1])) {$c_f2[1]='';} if (/\*/) {$_=$`} if (/=/) {$c_f2[0]=$`;$c_f2[1]=$'} # if (undef($c_f2[1])) {$c_f2[1]='';} # print 'item:',$c_f2[0],' value:',$c_f2[1],$eol_str; if (exists $cast_hash{$c_f2[0]}) {$cast_hash{$c_f2[0]}=$c_f2[1]} # &press_any_key; }}} # print %cast_hash; # &press_any_key; return } #---------------------- sub reading_output3_file { my @c_f; while () { print $_; # &press_any_key; $_=del_eol($_); @c_f= split; # @c_f2=split /=/,$c_f[0]; # if (undef($c_f2[1])) {$c_f2[1]='';} if (defined($c_f[7])) {$O3_hash{$c_f[7]}=$_;} # print '$_=',$_,$eol_str; # &press_any_key; } close O3FV; return } #---------------------- sub handle_output3_file { my @c_f; my $c_key; my $c_line; my $c_temp_file_name; print '=== STARTING INDIVIDUAL GENE PAIRS ===',$eol_str; while (($c_key,$c_line)=each %O3_hash) { # print "$c_key=>$c_line<<<",$eol_str; # print "$c_line<<<",$eol_str; # &press_any_key; @c_f= split /\s+/,$c_line; # if (undef($c_f2[1])) {$c_f2[1]='';} $c_temp_file_name=$TEMPORARY_DIR.'/temporary_'.$c_key; # if (defined($c_f[7])) {$O3_hash{$c_f[7]}=$_} #assume unique CIP number # if (defined($c_f[7])) {$O3_hash{$c_f[7]}=$_} #assume unique CIP number # print $c_temp_file_name,$eol_str; print '/temporary_'.$c_key,$eol_str; my $open_succ=open (TFV, $c_temp_file_name) || warn "Can't open: $c_temp_file_name $!",$eol_str; $c_common_in_output3=$c_f[0]; print 'common in 3=',$c_common_in_output3,$eol_str; # &press_any_key; $orth_intron_found=0; if ($open_succ) { $temp_file_content=1; &reading_temporary_file_head; while ($temp_file_content==1) { &reading_temporary_file_2; if ($temp_file_content==1) { # &handle_intron_line_2; &handle_intron_line_tab_2;#&handle_intron_line_tab; } } $temp_files_read++; print 'found=',$orth_intron_found,$eol_str; # &press_any_key; if ($c_common_in_output3!=$orth_intron_found) {$common_intron_discrep_c++} close TFV; } # &press_any_key; } return } #---------------------- sub reading_p1_file { my @c_f; while () {if (substr($_,0,1) eq '>') { # print $_; # &press_any_key; $_=del_eol($_); @c_f= split /\s+/; if (defined($c_f[1])) {/intron$/;$_=$'; /$/;$_=$`; $p1_hash{$c_f[1]}=$_ } # print "f>>$c_f[1]<') { # print $_; # &press_any_key; $_=del_eol($_); @c_f= split /\s+/; if (defined($c_f[1])) {/intron$/;$_=$'; /$/;$_=$`; $p2_hash{$c_f[1]}=$_ } # print ">>>$c_f[1]<<<",$eol_str; # print '$_=',$_,$eol_str; # &press_any_key; } } my $p_count =keys %p2_hash; print 'p2 num=',$p_count,$eol_str; close p2FV; return } #---------------------- sub reading_d1_file { my @c_f; while () {if (substr($_,0,1) eq '>') { # print $_; # &press_any_key; $_=del_eol($_); @c_f= split /\s+/; if (defined($c_f[1])) {/intron$/;$_=$'; /$/;$_=$`; $d1_hash{$c_f[1]}=$_ } # print ">>>$c_f[1]<<<",$eol_str; # print '$_=',$_,$eol_str; # &press_any_key; } } my $d_count =keys %d1_hash; print 'd1 num=',$d_count,$eol_str; close d1FV; return } #---------------------- sub reading_d2_file { my @c_f; while () {if (substr($_,0,1) eq '>') { # print $_; # &press_any_key; $_=del_eol($_); @c_f= split /\s+/; if (defined($c_f[1])) {/intron$/;$_=$'; /$/;$_=$`; $d2_hash{$c_f[1]}=$_ } # print ">>>$c_f[1]<<<",$eol_str; # print '$_=',$_,$eol_str; # &press_any_key; } } my $d_count =keys %d2_hash; print 'd2 num=',$d_count,$eol_str; close d2FV; return } #------------------ sub reading_temporary_file_head { if (defined($c_line=)) { $c_l++; #print $c_line; #debug #&press_any_key; if ($c_line =~/(\d+)\s+query is (\w+)\s+subject is (\w+)/) {$in_temp_file_num=$1; $in_temp_file_query=$2; $in_temp_file_sbjct=$3; # print $in_temp_file_num,$eol_str; # print $in_temp_file_query,$eol_str; # print $in_temp_file_sbjct,$eol_str; } else {print 'invalid first line'} }; # end of handle first line } #------------------ sub reading_temporary_file_2 { my $c_frag1=''; my $c_frag2=''; my $first_SQ_line_flg=0; my $last_SQ_line_flg=0; my $c_i1_line=''; my $c_i2_line=''; my $c_common=''; my $c_frag=''; my $c_frag_len=0; my $line_off=0; my $aux_l=0; my $buff1=''; $SQ1='';$SQc='';$SQ2=''; $i1_line=''; $i2_line=''; while (($first_SQ_line_flg==0) && defined($buff1=) ) #NB! the above line assume short circle avaluation { # print ' search Query',$eol_str; #print 'line_len_before=',length($buff1),$eol_str; $c_l++; $buff1=&del_eol($buff1); if (defined($c_line=)) {$c_l++} else {$temp_file_content=0} if ($temp_file_content!=0) { $c_line=&del_eol($c_line); # print 'line_len_after=',length($c_line),$eol_str; # print 'c_line=',$c_line; #debug # &press_any_key; if ($c_line =~/Query:(\s+)(\d+)(\s+)((\w|-)+)/) {$c_start1=$2; $c_frag1=$4; $first_SQ_line_flg=1; $line_off=length($1)+length($2)+length($3)+6; $c_i1_line=$buff1; # print $c_start1,$eol_str; # print $c_frag1,$eol_str; # print $first_SQ_line_flg,$eol_str; # print $line_off,$eol_str; # &press_any_key; } else {$buff1=$c_line} } # end if $temp } #while if (defined($buff1)) { # print 'exited search Query, buff1=',$buff1,$eol_str; if ($c_frag1=~/\s*(\d+)/) {$c_frag1=$`; $last_SQ_line_flg=1} $c_frag_len=length($c_frag1); # print 'frag_len=',$c_frag_len,$eol_str; if (defined($c_line=)) {$c_l++; #common SQ line $c_common=substr($c_line,$line_off,$c_frag_len); # print 'common=',$c_common,$eol_str; } else {print 'invalid first SQ line 1',$eol_str} # print 'debug stop';$eol_str; # &press_any_key; if (defined($c_line=)) {$c_l++; #second line $c_frag2=substr($c_line,$line_off,$c_frag_len); # print $c_frag2,$eol_str; } else {print 'invalid first SQ line 2',$eol_str} if ($first_SQ_line_flg==1) {if ($c_line =~/Sbjct:(\s+)(\d+)(\s+)((\w|-)+)/) {$c_start2=$2} else {print 'invalid first SQ line 2',$eol_str} } if (defined($c_i2_line=)) {$c_l++} $c_i1_line=substr($c_i1_line,$line_off,$c_frag_len); $c_i2_line=substr($c_i2_line,$line_off,$c_frag_len); $SQ1=$SQ1.$c_frag1; $SQc=$SQc.$c_common; $SQ2=$SQ2.$c_frag2; $i1_line=$i1_line.$c_i1_line; $i2_line=$i2_line.$c_i2_line; # print 'frag2=',$c_frag2,$eol_str; # print 'i2 =',$c_i2_line,$eol_str; #print 'no go forther',$eol_str; #&press_any_key; if ($last_SQ_line_flg==0) {while (($last_SQ_line_flg==0) && defined($c_line=)) {if ($last_SQ_line_flg==0) { $c_i1_line=''; $c_i2_line='';$line_off=0; $c_l++; if (defined($c_line=)) {$c_l++} if (defined($c_i1_line=)) {$c_l++} # print 'i1=',$c_i1_line,$eol_str; #debug # &press_any_key; if (defined($c_line=)) {$c_l++} # print 'line_len_before=',length($c_line),$eol_str; $c_line=&del_eol($c_line); # print 'line_len_after=',length($c_line),$eol_str; # print $c_line; #debug # &press_any_key; if ($c_line =~/Query:(\s+)(\d+)(\s+)((\w|-)+)/) {print 'temporary file format error: unexpected Query line' } else {$c_frag1=$c_line; $first_SQ_line_flg=0;$line_off=0;} # print $c_line; #debug # &press_any_key; if ($c_frag1=~/\s*(\d+)/) {$c_frag1=$`; $last_SQ_line_flg=1} $c_frag_len=length($c_frag1); # print 'frag_len=',$c_frag_len,$eol_str; if (defined($c_line=)) {$c_l++; #common SQ line $c_common=substr($c_line,$line_off,$c_frag_len); # print 'common=',$c_common,$eol_str; } else {print 'invalid first SQ line'} # &press_any_key; if (defined($c_line=)) {$c_l++; #second line $c_frag2=substr($c_line,$line_off,$c_frag_len); # print 'frag2=',$c_frag2,$eol_str; } else {print 'invalid first SQ line'} # if ($first_SQ_line_flg==1) # {if ($c_line =~/Sbjct:(\s+)(\d+)(\s+)((\w|-)+)/) # {$c_start2=$2} # } if (defined($c_i2_line=)) {$c_l++} # print 'i2=',$c_i2_line,$eol_str; #debug # print 'last SQ flg=',$last_SQ_line_flg,$eol_str; # &press_any_key; $c_i1_line=substr($c_i1_line,$line_off,$c_frag_len); $c_i2_line=substr($c_i2_line,$line_off,$c_frag_len); $SQ1=$SQ1.$c_frag1; $SQc=$SQc.$c_common; $SQ2=$SQ2.$c_frag2; $i1_line=$i1_line.$c_i1_line; $i2_line=$i2_line.$c_i2_line; } # if $last_SQ_line_flg==0 }; # end of TFV } # if last_SQ } else {$temp_file_content=0} # $temp_file_content=0; #??? # $aux_l=length($i1_line); #extending $i1_line # if ($aux_l<=length($SQ1)) # {for (my $j1=$aux_l+1; $j1<=length($SQ1); $j1++) # {substr($i1_line,$j1-1,1)=' ';} # } # $aux_l=length($i2_line); #extending $i1_line # if ($aux_l<=length($SQ1)) # {for (my $j1=$aux_l+1; $j1<=length($SQ1); $j1++) # {substr($i2_line,$j1-1,1)=' ';} # } # print 'i1:',$eol_str,$i1_line,$eol_str; # &press_any_key; # print 'sq1:',$eol_str,$SQ1,$eol_str; # &press_any_key; # print 'sq common:',$eol_str,$SQc,$eol_str; # &press_any_key; # print 'sq2:',$eol_str,$SQ2,$eol_str; # &press_any_key; # print 'i2:',$eol_str,$i2_line,$eol_str; # &press_any_key; return } #------------------ #------------------ #------------------ sub handle_intron_line_tab_2 # for porthi ver 1.06 or greater {my $c_i1_pos=0; my $c_p1=0; my $c_p2=0; my $aux1=''; my $aux2=''; my $aux_p=0; my $aux_p_i=0; my $left_val=0; my $right_val=0; my $left_val_x=0; my $right_val_x=0; my $aux_val=0; my $AAstr='ABCDEFGHIJKLMNOPQRSTUVWYZ'; my $half_thresh=3; my $half_thresh_x=1; my $intron_qual=0; my $ind1; my $ind2; my $d_shift1; my $d_shift2; my $c_orth_intron_found=0; my $intron_qual_word=''; my $gap_lt=0; my $gap_rt=0; # print " l r qual ind1 ind2 m1 m2",$eol_str; for (my $c_i1_pos=0; $c_i1_pos=0) {$left_val=$left_val+1} if (substr($SQc,$aux_p_i,1) eq '+') {$left_val=$left_val+0.5} if (substr($SQc,$aux_p_i,1) eq " "){ if(substr($SQ1,$aux_p_i,1) eq '-' or substr($SQ2,$aux_p_i,1) eq '-'){ $gap_lt=$gap_lt+1; } } } $aux_p=$c_i1_pos+4; if ($aux_p>=length($SQ1)) {$aux_p=length($SQ1)-1} $right_val=0;$gap_rt=0; for ($aux_p_i=$c_i1_pos; $aux_p_i<=$aux_p; $aux_p_i++) { if (index($AAstr,substr($SQc,$aux_p_i,1))>=0) {$right_val=$right_val+1} if (substr($SQc,$aux_p_i,1) eq '+') {$right_val=$right_val+0.5} elsif (substr($SQc,$aux_p_i,1) eq " "){ if(substr($SQ1,$aux_p_i,1) eq '-' or substr($SQ2,$aux_p_i,1) eq '-'){ $gap_rt=$gap_rt+1; } } } # handle XXXX if (substr($SQ1,$c_i1_pos,1) eq 'X') { $aux_p=$c_i1_pos-4; if ($aux_p<0) {$aux_p=0} $left_val_x=0; for ($aux_p_i=$aux_p; $aux_p_i<=$c_i1_pos; $aux_p_i++) {if (substr($SQ1,$aux_p_i,1) eq 'X') {$left_val_x=$left_val_x+1} } $aux_p=$c_i1_pos+4; if ($aux_p>=length($SQ1)) {$aux_p=length($SQ1)-1} $right_val_x=0; for ($aux_p_i=$c_i1_pos; $aux_p_i<=$aux_p; $aux_p_i++) {if (substr($SQ1,$aux_p_i,1) eq 'X') {$right_val_x=$right_val_x+1} } } # end handle XXXX $intron_qual=0; if($gap_rt>3 or $gap_lt>3){$intron_qual=4} else{ if ($left_val<$half_thresh) {$intron_qual++} if ($right_val<$half_thresh) {$intron_qual++} if ($intron_qual == 2) { if (($left_val_x>=$half_thresh_x) && ($right_val_x>=$half_thresh_x)) {$intron_qual=3} } } $introns_qual_stat_hash{$intron_qual}++; # print '$c_i1_pos=',$c_i1_pos,' $c_start1=',$c_start1,' $c_start2=',$c_start2,' ',$aux1,' ',$aux2,$eol_str; ($ind1,$ind2)=&snooping_protein_files($c_p1,$c_p2); ($d_shift1,$d_shift2)=&snooping_dna_files($c_p1,$c_p2); # printf "%3.1f %3.1f %4g %4g %4g %4g %4g", # $left_val,$right_val,$intron_qual,$ind1,$ind2, # $ind1+$d_shift1,$ind2+$d_shift2; # print $eol_str; # print $in_temp_file_query,$eol_str; # print $in_temp_file_sbjct,$eol_str; if (($intron_qual==0) || ($intron_qual==1) || ($intron_qual==3)) { if ($intron_qual==0) {$intron_qual_word='good'} if ($intron_qual==1) {$intron_qual_word='poor'} if ($intron_qual==3) {$intron_qual_word='XXXX'} #if ($intron_qual==4) {$intron_qual_word='Alt_exon'} printf "i_%-3g %-16s i_%-3g %-16s %-4s", $ind1+$d_shift1, $in_temp_file_query, $ind2+$d_shift2, $in_temp_file_sbjct, $intron_qual_word; print $eol_str; printf ORTHDBFV "INTRON_%-3g %-16s INTRON_%-3g %-16s %-4s", $ind1+$d_shift1, $in_temp_file_query, $ind2+$d_shift2, $in_temp_file_sbjct, $intron_qual_word; #s07 print ORTHDBFV $eol_str;#s07 } # printf "l=%3.1f r=%3.1f qual=%1g ind1=%3g ind2=%3g m_ind1=%3g m_ind2=%3g", # $left_val,$right_val,$intron_qual,$ind1,$ind2, # $ind1+$d_shift1,$ind2+$d_shift2;print $eol_str; # print 'l=',$left_val,' r=',$right_val,' qual=',$intron_qual, # ' ind1=',$ind1,' ind2=',$ind2, # ' m_ind1=',$ind1+$d_shift1, # ' m_ind2=',$ind2+$d_shift2,$eol_str; # &press_any_key; } #endif in2 } #endif in1 if (substr($SQ1,$c_i1_pos,1) ne '-') {$c_p1++} if (substr($SQ2,$c_i1_pos,1) ne '-') {$c_p2++} } #end for # print 'found=',$c_orth_intron_found,$eol_str; # &press_any_key; $orth_intron_found=$orth_intron_found+$c_orth_intron_found; } #------------------ sub reading_temporary_file { my $c_frag1=''; my $c_frag2=''; my $first_SQ_line_flg=1; my $last_SQ_line_flg=0; my $c_i1_line=''; my $c_i2_line=''; my $c_common=''; my $c_frag=''; my $c_frag_len=0; my $line_off=0; my $aux_l=0; $SQ1='';$SQc='';$SQ2=''; $i1_line=''; $i2_line=''; if (defined($c_line=)) { $c_l++; #print $c_line; #debug #&press_any_key; if ($c_line =~/(\d+)\s+query is (\w+)\s+subject is (\w+)/) {$in_temp_file_num=$1; $in_temp_file_query=$2; $in_temp_file_sbjct=$3; #print $in_temp_file_num,$eol_str; # print $in_temp_file_query,$eol_str; # print $in_temp_file_sbjct,$eol_str; } else {print 'invalid first line'} }; # end of handle first line #&press_any_key; while () {if ($last_SQ_line_flg==0) { $c_i1_line=''; $c_i2_line='';$line_off=0; $c_line=$_; $c_l++; if (defined($c_line=)) {$c_l++} if (defined($c_i1_line=)) {$c_l++} if (defined($c_line=)) {$c_l++} # print 'line_len_before=',length($c_line),$eol_str; $c_line=&del_eol($c_line); # print 'line_len_after=',length($c_line),$eol_str; # print $c_line; #debug # &press_any_key; if ($c_line =~/Query:(\s+)(\d+)(\s+)((\w|-)+)/) {$c_start1=$2; $c_frag1=$4; $first_SQ_line_flg=1; $line_off=length($1)+length($2)+length($3)+6; # print $c_start,$eol_str; # print $c_frag1,$eol_str; # print $first_SQ_line_flg,$eol_str; # print $line_off,$eol_str; } else {$c_frag1=$c_line; $first_SQ_line_flg=0;$line_off=0;} # &press_any_key; if ($c_frag1=~/\s*(\d+)/) {$c_frag1=$`; $last_SQ_line_flg=1} $c_frag_len=length($c_frag1); # print 'frag_len=',$c_frag_len,$eol_str; if (defined($c_line=)) {$c_l++; #common SQ line $c_common=substr($c_line,$line_off,$c_frag_len); # print $c_common,$eol_str; } else {print 'invalid first SQ line'} # &press_any_key; if (defined($c_line=)) {$c_l++; #second line $c_frag2=substr($c_line,$line_off,$c_frag_len); # print $c_frag2,$eol_str; } else {print 'invalid first SQ line'} if ($first_SQ_line_flg==1) {if ($c_line =~/Sbjct:(\s+)(\d+)(\s+)((\w|-)+)/) {$c_start2=$2} } if (defined($c_i2_line=)) {$c_l++} $c_i1_line=substr($c_i1_line,$line_off,$c_frag_len); $c_i2_line=substr($c_i2_line,$line_off,$c_frag_len); $SQ1=$SQ1.$c_frag1; $SQc=$SQc.$c_common; $SQ2=$SQ2.$c_frag2; $i1_line=$i1_line.$c_i1_line; $i2_line=$i2_line.$c_i2_line; } # if $last_SQ_line_flg==0 }; # end of TFV $temp_files_read++; close TFV; $aux_l=length($i1_line); #extending $i1_line if ($aux_l<=length($SQ1)) {for (my $j1=$aux_l+1; $j1<=length($SQ1); $j1++) {substr($i1_line,$j1-1,1)=' ';} } $aux_l=length($i2_line); #extending $i1_line if ($aux_l<=length($SQ1)) {for (my $j1=$aux_l+1; $j1<=length($SQ1); $j1++) {substr($i2_line,$j1-1,1)=' ';} } # print 'i1:',$eol_str,$i1_line,$eol_str; # &press_any_key; # print 'sq1:',$eol_str,$SQ1,$eol_str; # &press_any_key; # print 'sq common:',$eol_str,$SQc,$eol_str; # &press_any_key; # print 'sq2:',$eol_str,$SQ2,$eol_str; # &press_any_key; # print 'i2:',$eol_str,$i2_line,$eol_str; return } #------------------ #------------------ sub print_statistics { print '=== STATISTICS ===',$eol_str; print 'temporary files read=',$temp_files_read,$eol_str; print 'total orthologous introns found=',$total_orth_introns_found,$eol_str; for (my $c_qu=0; $c_qu<=2; $c_qu++) { print 'total qualitily ',$c_qu,'=',$introns_qual_stat_hash{$c_qu},$eol_str; } print 'common intron discrepancy num=',$common_intron_discrep_c,$eol_str; print 'exec time=',get_hour_min_sec_time_elapsed_line(time),$eol_str; } #------------------ sub print_statistics_2 # for porthi ver 1.06 or greater { print '=== STATISTICS ===',$eol_str; print 'temporary files read=',$temp_files_read,$eol_str; print 'total orthologous introns found=',$total_orth_introns_found,$eol_str; print 'strict phase orthologous introns found=', $strict_phase_total_orth_introns_found,$eol_str; for (my $c_qu=0; $c_qu<=3; $c_qu++) { print 'total qualitily ',$c_qu,'=',$introns_qual_stat_hash{$c_qu},$eol_str; } print 'common intron discrepancy num=',$common_intron_discrep_c,$eol_str; print 'exec time=',get_hour_min_sec_time_elapsed_line(time),$eol_str; } #------------------ sub snooping_protein_files #(c_p1,c_p2) {my $c_p1_pos=@_[0]; my $c_p2_pos=@_[1]; my $c_intron_rec1=''; my $c_intron_rec2=''; my @aux; my $ind1=0; my $ind2=0; my $intron_num1=0; my $intron_num2=0; my $c_line_pos1=$c_start1+$c_p1_pos; my $c_line_pos2=$c_start2+$c_p2_pos; #print $c_line_pos1,' ',$c_line_pos2,$eol_str; # &press_any_key;; # print '>>',$in_temp_file_query,'<<',$eol_str; if (exists $p1_hash{$in_temp_file_query}) {$c_intron_rec1=$p1_hash{$in_temp_file_query}; $c_intron_rec1=~/position:/;$c_intron_rec1=$'; #print $c_intron_rec1,"\t",$eol_str,"HELLLOWWWW\n"; # &press_any_key; @aux=split/,/,$c_intron_rec1; for (my $i=0; $i<=$#aux; $i++) {if ($aux[$i]==$c_line_pos1) {$ind1=$i+1}} } else {print 'no in pEID1',$eol_str} if (exists $p2_hash{$in_temp_file_sbjct}) {$c_intron_rec2=$p2_hash{$in_temp_file_sbjct}; $c_intron_rec2=~/position:/;$c_intron_rec2=$'; # print $c_intron_rec2,$eol_str; # &press_any_key; @aux=split/,/,$c_intron_rec2; for (my $i=0; $i<=$#aux; $i++) {if ($aux[$i]==$c_line_pos2) {$ind2=$i+1}} } else {print 'no in pEID2',$eol_str} return $ind1,$ind2; } #------------------ sub snooping_dna_files { my $c_intron_rec1=''; my $c_intron_rec2=''; my $d_intron_num1_shift=0; my $d_intron_num2_shift=0; if (exists $d1_hash{$in_temp_file_query}) {$c_intron_rec1=$d1_hash{$in_temp_file_query}; $c_intron_rec1=~/phase:/;$c_intron_rec1=$'; $c_intron_rec1=~/\d+/; # print '>>>',$`,'<<<',$eol_str; # print $c_intron_rec1,$eol_str; # &press_any_key; $d_intron_num1_shift=length($`); } else {print 'no in dEID1',$eol_str} if (exists $d2_hash{$in_temp_file_sbjct}) {$c_intron_rec2=$d2_hash{$in_temp_file_sbjct}; $c_intron_rec2=~/phase:/;$c_intron_rec2=$'; $c_intron_rec2=~/\d+/; # print $c_intron_rec2,$eol_str; # &press_any_key; $d_intron_num2_shift=length($`); } else {print 'no in dEID2',$eol_str} return $d_intron_num1_shift,$d_intron_num2_shift; } #------------------ sub write_header {my $now_str=''; print '---------- PORTHI ver 1.07 --------- '; $now_str=localtime(time); print $now_str,$eol_str; print 'input file name=',$ifn,$eol_str; print $eol_str; } #-------------- sub handle_entry_acc #($c_line) {my $phases; my $intron_sizes; my $exon_sizes; my $splices; my ($line)=@_; $c_err_flg=0; #print 'i: ',$line; #&press_any_key; $line=&purge_eol_sp($line); #print 'a ',$line; #&press_any_key; $debug_c_sub++; # if ($line =~/^> \d+_/) if ($line =~/^> \w+/) {#last_min_mark print $debug_c_sub,' ',$&,' sq_len=',length ($c_sq),$eol_str; $last_id=$&; } # repeated if, correct it. if ($' !~/intron$phase:/) {handle_err('invalid intron record');} else {$line=$'; #print $line; &press_any_key; if ($' !~/size:/) {handle_err('invalid intron record size');} else {$line=$'; $phases=$`; #if (substr ($`,length($`)-1,1) eq ',') {chop($`);} $phases=&chop_if_chr($phases,','); @intron_phase=split / */,$phases; # join in cygwin perl give an extra glue at the end of the result #print '>>>',join "-",@intron_phase,'<<<',$eol_str; #print $line; &press_any_key; if ($line !~/intr_sum:/) {handle_err('invalid intron record size sum')} else {$line=$'; $intron_sizes=$`; $intron_sizes=&chop_if_chr($intron_sizes,','); @intron_size=split /,/,$intron_sizes; #print '>>>',join "-",@intron_size,'<<<',$eol_str; #print $line; #&press_any_key; if ($line !~/$;/) {handle_err('invalid intron record size sum val');} else {$line=$'; $intr_sum=$`; #print '>>>',$intr_sum,'<<<',$eol_str; #print $line; &press_any_key; if ($' !~/exon$size:/) {handle_err('invalid exon record');} else {$line=$'; #print $line; &press_any_key; if ($line !~/ex_sum:/) {handle_err('invalid exon record size sum');} else {$line=$'; $exon_sizes=$`; $exon_sizes=&chop_if_chr($exon_sizes,','); @exon_size=split /,/,$exon_sizes; #print '>>>',join "-",@exon_size,'<<<',$eol_str; #print $line; &press_any_key; if ($line !~/$;/) {handle_err('invalid exon record size sum val');} else {$line=$'; $ex_sum=$`; #print '>>>',$ex_sum,'<<<'; #print $line; &press_any_key; if ($' !~/{splice:/) {handle_err('invalid splice record');} else {$line=$'; $splices=&chop_if_chr($line,'}'); @splice_arr=split /,/,$splices; #print '>>>',join "-",@splice_arr,'<<<',$eol_str; #print $line; &press_any_key; &handle_sq; } } } } } } } } $c_entry_found=0; $c_sq=''; $err_c+=$c_err_flg; } #-------------- sub handle_sq { @exons=(); @introns=(); &convert_to_TCAGcl; #if ($last_id eq '> 4_NT_004321') {print 'convert pass',$eol_str}; if (&check_exon_intron_sequence == 0) {&get_exon_intron_len; #if ($last_id eq '> 4_NT_004321') {print 'get exon_intro_len pass',$eol_str}; &test_exon_intron_len; #if ($last_id eq '> 4_NT_004321') {print 'tst exon_intro_len pass',$eol_str}; &get_CDS_sq; &get_term_codon_stat; &add_glob_codon_stat; #last_min_mark &print_term_codon_stat0; @exons=(); @introns=(); &get_exon_intron_len2; &test_exon_CDS; } else {handle_err('invalid exon-intron-exon structure');} #print @exons; } #-------------- sub convert_to_TCAGcl_ori # U->T, non-TCAG -> N {my $not_TCAGN='BDEFHIJKLMOPQRSUVWXYZ'; my $not_tcagn='bdefhijklmopqrsuvwxyz.'; # include '.' to handle '..' for unknown intron in EID specification # presume will not interfere with unxpected '.' in sequence my $last_i=length($c_sq)-1; # don't know wether calculated ones in circle header for (my $i=0; $i<=$last_i; $i++) {if (substr($c_sq,$i,1) eq 'U') {substr($c_sq,$i,1)='T';} if (substr($c_sq,$i,1) eq'u') {substr($c_sq,$i,1)='t';} if (index($not_TCAGN,substr($c_sq,$i,1)) >=0) {substr($c_sq,$i,1)='N';} if (index($not_tcagn,substr($c_sq,$i,1)) >=0) {substr($c_sq,$i,1)='n';} } } #-------------- sub convert_to_TCAGcl # U->T, non-TCAG -> N {#my $not_TCAGN='BDEFHIJKLMOPQRSUVWXYZ'; #my $not_tcagn='bdefhijklmopqrsuvwxyz.'; #my $NNNT= 'NNNNNNNNNNNNNNNTNNNNN'; #my $clnt= 'nnnnnnnnnnnnnnntnnnnnn'; # include '.' to handle '..' for unknown intron in EID specification # presume will not interfere with unxpected '.' in sequence $c_sq=~tr/BDEFHIJKLMOPQRSUVWXYZ/NNNNNNNNNNNNNNNTNNNNN/; $c_sq=~tr/bdefhijklmopqrsuvwxyz./nnnnnnnnnnnnnnntnnnnnn/; #print substr($c_sq,1,80); #&press_any_key; } #---------------- sub check_exon_intron_sequence # bit-map # left right # 00 00 all correct exon-intron-exon # 01 00 intron-exon # 00 01 exon-intron # 01 01 intron-exon-intron, others - other sequence defects {my $err_exon_intron_sequence=0; #print '!',substr($c_sq,0,1),'!'; #print 'this l ',$TCAGN,index($TCAGN,substr($c_sq,0,1)); if (index($TCAGN,substr($c_sq,0,1)) >=0) {$err_exon_intron_sequence=0;} else {if (index($tcagn,substr($c_sq,0,1))>=0) {$err_exon_intron_sequence=1;} else {$err_exon_intron_sequence=2;} } $err_exon_intron_sequence=$err_exon_intron_sequence*4; #print '#',$err_exon_intron_sequence,' ',substr($c_sq,-1,1),'#'; if (index($TCAGN,substr($c_sq,-1,1))>=0) {$err_exon_intron_sequence+=0;} else {if (index($tcagn,substr($c_sq,-1,1)) >=0) {$err_exon_intron_sequence+=1;} else {$err_exon_intron_sequence+=2;} } #print '#>>',$err_exon_intron_sequence,substr($c_sq,-1,1),' <<#'; return $err_exon_intron_sequence; } #-------------- sub get_exon_intron_len_ori # assuming right exon-intron-exon structure {my $exon_flg=1; my $last_pos=0; my $last_i=length($c_sq)-1; # don't know wether calculated ones in circle header for (my $i=0; $i<=$last_i; $i++) {if (index($TCAGN,substr($c_sq,$i,1))>=0) {if ($exon_flg==0) # new exon goes {push @introns,$i-$last_pos; $last_pos=$i; $exon_flg=1;} } if (index($tcagn,substr($c_sq,$i,1))>=0) {if ($exon_flg==1) # new intron goes {push @exons, $i-$last_pos; $last_pos=$i; $exon_flg=0;} } } push @exons, length($c_sq)-$last_pos; #handle last exon } #-------------- sub get_exon_intron_len_6sec_vs_ori # assuming right exon-intron-exon structure {my $exon_flg=1; my $last_pos=0; my $last_i=length($c_sq)-1; # don't know wether calculated ones in circle header for (my $i=0; $i<=$last_i; $i++) {if (substr($c_sq,$i,1)=~tr/TCAGN/TCAGN/>0) {if ($exon_flg==0) # new exon goes {push @introns,$i-$last_pos; $last_pos=$i; $exon_flg=1;} } else { if (substr($c_sq,$i,1)=~tr/tcagn/tcagn/>0) {if ($exon_flg==1) # new intron goes {push @exons, $i-$last_pos; $last_pos=$i; $exon_flg=0;} } } } push @exons, length($c_sq)-$last_pos; #handle last exon } #-------------- sub get_exon_intron_len_ord # assuming right exon-intron-exon structure # assuming right TCAGN encoding {my $exon_flg=1; my $last_pos=0; my $last_i=length($c_sq)-1; # don't know wether calculated ones in circle header for (my $i=0; $i<=$last_i; $i++) { #if ($last_id eq '> 4_NT_004321') # {if (($i % 1000) == 0) {print 'c_pos=',$i,$eol_str}}; if (ord(substr($c_sq,$i,1))1, 11-20->2, etc {my $ind=0; my $c_len=@_[0]; if ($c_len > 0) {$ind=($c_len-1)/10+1; if ($ind > 200) {$ind=201} $exon_len_stat[$ind]++; $exon_len_stat[0]++; $glob_exon_len_sum=$glob_exon_len_sum+$c_len; } } #------------------------- sub add_intr_len_stat #(c_len) # in 100 bp bins starting from 1: 1-100->1, 11-200->2, etc {my $ind=0; my $c_len=@_[0]; if ($c_len > 0) {$ind=($c_len-1)/100+1; if ($ind > 200) {$ind=201} $intr_len_stat[$ind]++; $intr_len_stat[0]++; $glob_intr_len_sum=$glob_intr_len_sum+$c_len; } } #------------------------- sub print_exon_len_stat # in 10 bp bins starting from 1: 1-10->1, 11-20->2, etc {print ' exon length statistics',$eol_str; print ' from to count',$eol_str; my $i=1; for ($i=1;$i<=200; $i++) {printf " %4g %4g %8g",($i-1)*10+1,$i*10, $exon_len_stat[$i]; print $eol_str; # if (($i % 20 ) eq 0) {&press_any_key;} } $i=201; printf " %4g >%4g %8g",($i-1)*10+1,($i-1)*10+1,$exon_len_stat[$i]; print $eol_str; print ' mean exon length='; printf "%10.3f",$glob_exon_len_sum/$exon_len_stat[0]; print $eol_str; } #------------------------- sub print_intr_len_stat # in 100 bp bins starting from 1: 1-100->1, 101-200->2, etc {print ' intron length statistics',$eol_str; print ' from to count',$eol_str; my $i=1; for ($i=1;$i<=200; $i++) {printf " %5g %5g %8g",(($i-1)*100)+1,$i*100, $intr_len_stat[$i]; print $eol_str; # if (($i % 20 ) eq 0) {&press_any_key;} } $i=201; printf " %4g >%4g %8g",(($i-1)*100)+1,(($i-1)*100)+1,$intr_len_stat[$i]; print $eol_str; print ' mean intron length='; printf "%10.3f",$glob_intr_len_sum/$intr_len_stat[0]; print $eol_str; } #-------------- sub test_exon_CDS # assuming direct @exons and @introns store {my $c_e_start=0; my $closed_exon_exist_flg=0; #print "@exons",$eol_str; for (my $i=0;$i<=$#exons; $i++) {$c_sq2=substr($c_sq,$c_e_start,$exons[$i]); #print $c_e_start,$exons[$i],$eol_str; #print $c_sq2,$eol_str; #&press_any_key; &get_term_codon_stat; #print "@c_term_stat",$eol_str; if ($i!=$#exons) {$c_e_start=$c_e_start+$exons[$i]+$introns[$i];} #last_min_mark &print_term_codon_stat3($i+1); #print '$c_intr_len=',$c_intr_len,$eol_str; #print '$c_intr_len=',$c_intr_len,' decl:',$intron_size[$i],$eol_str; #&press_any_key; if (($c_term_stat[0] !=0) and ($c_term_stat[1] !=0) and ($c_term_stat[2] !=0)) {$glob_exon_stat[1]++; $closed_exon_exist_flg=1;} else {$glob_exon_stat[0]++;} } if ($closed_exon_exist_flg ==1 ) {$glob_closed_exon_entry_num++;} } #-------------- sub tst_cir # assuming right exon-intron-exon structure {my $exon_flg=1; my $sum1=0; my $last_i=3600000; # don't know wether calculated ones in circle header for (my $i=0; $i<=$last_i; $i++) { $sum1=$sum1+index($TCAGN,substr($c_sq,1,1))+index($TCAGN,substr($c_sq,2,1)); } print 'sum1=',$sum1,' ',chr(0x61),chr(0x62),$eol_str; } #-------------- sub tst_cir_while # assuming right exon-intron-exon structure {my $exon_flg=1; my $sum1=0; my $last_i=3600000; # don't know wether calculated ones in circle header my $i=0; while ( $i<$last_i ) { $sum1=$sum1+index($TCAGN,substr($c_sq2,1,1))+index($TCAGN,substr($c_sq2,2,1)); $i++; } print 'sum1=',$sum1,' ',chr(0x61),chr(0x62),$eol_str; } #-------------- sub surplaceaa {$c_sq2=$c_sq; $c_sq2=~tr/TCAGNtcagn/AAAAAaaaaa/; } #-------------- sub test_exon_intron_len {my $c_intr_sum=0; my $sq_intr_sum; $sq_intr_sum=0; my $c_intr_len=0; for (my $i=$#intron_size; $i>=0; $i--) {$c_intr_len=pop @introns; #print '$c_intr_len=',$c_intr_len,$eol_str; if (not defined($c_intr_len)) {$c_intr_len=0;} #print '$c_intr_len=',$c_intr_len,' decl:',$intron_size[$i],$eol_str; #&press_any_key; $sq_intr_sum+=$c_intr_len; if ($intron_size[$i] ne 'u') {if ($intron_size[$i] != $c_intr_len) {handle_err('invalid intron len'); #&press_any_key; } } } if ($intr_sum != -1) {if ($intr_sum != $sq_intr_sum) {handle_err('invalid intron len sum');} } my $c_exon_sum=0; my $sq_exon_sum=0; my $c_exon_len; for (my $i=$#exon_size; $i>=0; $i--) {$c_exon_len=pop @exons; if (not defined($c_exon_len)) {$c_exon_len=0;} $sq_exon_sum+=$c_exon_len; if ($exon_size[$i] != $c_exon_len) {handle_err('invalid exon len'); #&press_any_key; } } if ($ex_sum != $sq_exon_sum) {handle_err('invalid exon len sum');} } #-------------- sub handle_err #($c_message) {print $acc_indent,@_,$eol_str; $c_err_flg=1; } #-------------- sub add_row_sq_line #($loc_l) {my $loc_l=$_[0]; # deleteing 0D0A and spaces $c_sq.=&purge_eol_sp($loc_l); #if ($last_id eq '> 4_NT_004321') # {print 'c_sq len=',length($c_sq),$eol_str;} } #-------------- sub chop_if_chr #(string;chr) {my $loc_string=$_[0]; #print 'char=',ord($_[1]),' string=',$loc_string,' substr=',ord(substr ($loc_string,-1,1)); if (substr ($loc_string,-1,1) eq $_[1]) {chop($loc_string);} #print 'after last=',ord(substr ($loc_string,-1,1)); return $loc_string; } #-------------- sub del_trail_spaces #($loc_l) {$_=$_[0]; s/ +$//;$_=$_} #deleting trailing spaces #-------------- sub tst1 {my $line='qwerty'.chr(13).chr(10); print $line; print length($line); chomp($line); $line=&chop_if_chr($line,chr(13));#chop($line); # deleteing 0D0A print 'after ',$line,' ',length($line); } #-------------- sub del_eol #($loc_l); {my $loc_string=$_[0]; chomp($loc_string); $loc_string=&chop_if_chr($loc_string,chr(13)); #chop($line); # deleteing 0D0A return $loc_string; } #-------------- sub tst2 {my $line='qwerty '.chr(13).chr(10); print $line; print length($line); $line=&purge_eol_sp($line); print 'after ',$line,' ',length($line); } #-------------- sub tst3 {my $line='qwerty '.chr(13).chr(10); print $line; print length($line); $line=&purge_eol($line); print 'after ',$line,' ',length($line); } #-------------- sub tst4 {my $line='qwerty '.chr(10); print $line; print length($line); #$line=&purge_eol_sp($line); $line=&purge_eol_sp($line); print 'after ',$line,' ',length($line); } #-------------- sub purge_eol #($loc_l) {my $loc_l=$_[0]; print 'i: ',$loc_l,' ',length($loc_l); chomp($loc_l); print 'chomp: ',$loc_l,' ',length($loc_l); $loc_l=&chop_if_chr($loc_l,chr(13)); #print 'chopic: ',$loc_l,' ',length($loc_l); } #-------------- sub purge_eol_sp #($loc_l) {my $loc_l=$_[0]; chomp($loc_l); $loc_l=&chop_if_chr($loc_l,chr(13)); $loc_l=&del_trail_spaces($loc_l); } #-------------- sub press_any_key { read STDIN,my $a_key,1; $a_key='';} #-------------- sub get_hour_min_sec_time_elapsed_line($c_time) {my $t_2=time-$t_1; my $out_str; return $out_str=sprintf ("%6d h %2d m %2d s", $t_2 / 3600, ($t_2 % 3600)/60,$t_2 % 60); } #---- end of lib ----- # #---- Rewiew history ---------------------------- # v1.00 first version, principal error known 05.03.05 # v1.01 first version, intron number output # v1.02 first version, pre-mRNA intron number output 09.03.05 # v1.03 statistics output 14.03.05 # v1.04 more then 1 BLAST output in temporary file 27.03.05 # v1.05 new table output format 27.03.05 # v1.06 conserved current version 1.05 06.06.05 # v1.07 stem ORTHOLOG_GB file ORTHDBFV handle 07.09.05 #------------------------------------------------ #---- known problems # 1. No inter-file data consistency check # 2. No suppresson of pEID format errors (but (i) # there are exists special program Peidd4 for format check # for old pEID format (ii) nor format error found today # 3. Only common introns considered. # # 4. NO consistency check of output3 line and gene pair output # 5. NO FOOL PROTECTION for output file. It is of user's to # direct output to the proper place. # 6. Little or no time-course of data processing # 7. No specific keys to enhance program flexibility # 8. No user menu supplied # #---- end of prog ----