#!/usr/bin/perl -w
# Serge Saxonov, Iraj Daizadeh, Alexei Fedorov and Walter Gilbert
# Department of Molecular and Cellular Biology
# Harvard University
# Cambridge, MA 02138
# Reference: Serge Saxonov, Iraj Daizadeh, Alexei Fedorov, and Walter Gilbert
#       Nucleic Acids Res. 2000 Jan 1;28(1):185-190
#SPL spl notes 
#SPL spl reduction in Sept 2004
#SPL ver 1.03 20.10.04
#SPL ver 1.04 21.10.04
#SPL ver 1.05 22.10.04
#SPL ver 1.06 01.11.04
#SPL ver 1.07 07.11.04
#SPL ver 1.08 12.11.04
#SPL ver 1.09 17.11.04
#SPL ver 1.10 19.11.04
#SPL ver 1.11 19.11.04
#SPL ver 1.12 02.12.04
#SPL ver 1.13 11.12.04
#SPL ver 1.14 16.12.04
#SPL ver 1.15 17.12.04
#SPL ver 1.16 17.12.04
#SPL ver 1.17 15.01.05
#SPL ver 1.18 19.08.05
#SPL ver 1.19 04.09.05
#SPL ver 1.20 11.09.05
#SPL ver 1.21 13.09.05
#SPL ver 1.22 15.09.05
#SPL ver 1.23 18.09.05
#SPL ver 1.24 25.10.05
#SPL ver 1.25 01.11.05
#SPL ver 1.26 08.11.05
#SPL ver 1.27 18.11.05
#SPL ver 1.28 25.12.05
#SPL ver 1.29 26.12.05
#SPL ver 1.30 28.12.05
#SPL ver 1.31 01.01.06
#SPL ver 1.32 02.01.06
#SPL ver 1.33 05.01.06
#SPL ver 1.34 29.06.06
#SPL ver 1.35 11.07.06
#SPL ver 1.36 19.07.06
#SPL ver 1.37 23.08.06
#SPL ver 1.37e 01.09.06
#SPL ver 1.38e 01.09.06
#SPL ver 1.39e 01.09.06
#SPL ver 1.40e 13.09.06
#SPL ver 1.41e 21.09.06
#SPL ver 1.42e 21.09.06
#SPL ver 1.43e 21.09.06
#SPL ver 1.44e 21.09.06
#SPL ver 1.45e 21.09.06
#SPL ver 1.46e 07.10.06
#SPL ver 1.47e 18.10.06
#SPL ver 1.48e 19.10.06
#SPL ver 1.49e 20.10.06
#SPL ver 1.50e 20.10.06
#SPL ver 1.51e 20.10.06
#SPL ver 1.52e 20.10.06
#SPL ver 1.53e 08.04.07
#SPL ver 1.54e 18.07.07
#SPL ver 1.55e 26.07.07  
#SPL ver 1.55ee 05.08.07  

## check_translations.pl --  The program to parse the files produced by first_parse.pl
##                           check that the translations are present and correspond in 
##                           length to that specified in the CDS join feature. For each 
##                           CDS join, it outputs the information about the locus and the 
##                           CDS join in a separate entry in $prefix.RAWchecked. STDIN
##                           is used for input.
my $c_ver='1.56e';
my $txt_file_style=1;# =0 DOS-like style; =1 UNIX-like style     #SPL
my $eol_str;                                                     #SPL 
my @gene_blocks;
my @CDS_blocks;
my $coding_gene_flg=0; #a19 
my $intronless_c=0; #SPL counter of intronless CDS; 
if ($txt_file_style==0) {$eol_str="\r\n";} else {$eol_str="\n";} #SPL 

#&test_expand_1nucl; 
#&test_diff_num;
#exit; exit;


($#ARGV != -1) || die "Usage: check_translations.pl prefix\n";
$prefix = $ARGV[0];
$modeCDS= $ARGV[1];
open(OUT, ">$prefix.RAC") || die;#SPL ORI open(OUT, ">$prefix.RAWchecked") || die;
open(INP_F,"$prefix.RAW") || die "unable to open $prefix.RAW"; #SPL
open(TRACEF, ">>${prefix}.tEID") || die "unable open ${prefix}.tEID";
open(STATF, ">>${prefix}.sEID") || die "unable open ${prefix}.sEID"; #a19



&trace("modeCDS=$modeCDS",0); #SPL

$real_cds_counter = 0;
$cds_counter = 0;
$cds_feature_counter = 0;
$gene_counter = 0; #SPL new counter due to new spec
$coding_gene_counter=0; #a19
$OK_gene_counter = 0; #SPL new counter due to new spec, passed verification 
$non_alt_gene_counter=0; #SPL counter of genes without alternative splicing; 
$alt_gene_counter=0;     #SPL counter of genes with    alternative splicing; 
$alt_isoforms_c=0;       #a19 total number of alternatively spliced isoforms 
$mRNA_match_controvers_c=0; #SPL counter of contraversal mRNA assignment events 
$skipped_cds = 0; 
$mRNA_assign_err_c=0;       #number of CDS with no mRNA assignment 
$CDS_format_error_c=0; 

$null_CDS_gene_counter=0;
$mRNA_match_controvers_gene_c=0;
$err_get_mRNA_CDS_c=0;
$aux_c1=0;
$aux_c3=0;
$aux_c4=0;
my $aux_gene_name='???';
my $gene_counter_in_contig=0;
my $locus_overlap_c=0;
my $locus_overlap_c_in_genes=0;
my $locus_overlapped_c=0; #a19 numer of genes paticipating in overlap
my $CDS_non_overlap_event_c=0;
my $CDS_non_overlap_c=0; 
my $CDS_gene_name_mismatch_c=0; 
my $CDS_gene_name_mismatch_locus_c=0; 
my $c_CDS=0; 
my $in_gene_CDS_gene_name_mismatch_c=0; 
my $dismissed_alt_gene_c=0; 
my $total_improvements_in_second_round=0; 
my $final_improvements_in_second_round=0; 
my $number_fails_in_second_round=0; 
my $alt_mRNA_while_CDS_intronless_c=0; 
my $spliced_mRNA_while_CDS_intronless_c1=0; 
my $all_mRNA_equ_c=0; my $prot_id=''; 
my $no_protein_id_check_phase=0; 
my $CDS_exon_num_err_c=0; # number of errors in calculating CDS_exon_num 
my $CDS_exon_num_equ1_err_c=0; # same when erroneous value is 1 
my $aux_CDS_exon_num_diff_c=0; 
my $CDS_exon_num_equ21_err_defpid_c=0; 
my $CDS_exon_num_equ21_err_nopid_c=0; 
my $CDS_exon_num_err_defpid_c=0; 
my $CDS_exon_num_err_nopid_c=0; 
my $corr_intronless_c=0; 
my $spliced_mRNA_while_CDS_intronless_defpid_c1=0; 
my $corr_intronless_defpid_c=0; 
my $CDS_exon_num_equ0_c=0; 
my $spliced_mRNA_avail_flg=0; #a19 only declaration 
my $deb_c=0; 

my $CDS_pseudo_c=0;
my $corr_intronless_CDS_pseudo_c=0;
my $corr_spliced_CDS_pseudo_c=0;
my $corr_intronless_CDS_pseudo_defpid_c=0;
my $corr_spliced_CDS_pseudo_defpid_c=0;
my $pseudo_in_alt_gene_c=0;
my $pseudo_flg=0;
my $rejected_mRNA_recs_due_to_err=0;

# declaration of alternative exons counts part
my $total_alt_mRNA_gene_c=0;
my $total_OK_mRNA_in_alt_mRNA_gene_c=0;
my $total_irremovable_exons_c=0;
my $total_alternative_exons_c=0;
my $total_diff_alt_c=0;
my $total_skipped_exon_c=0;
my $total_skipped_internal=0;
my $total_trunc_class_num=0;
my $total_others_num=0;
my $total_retained_intron_c=0;
my $total_retained_intron_in_trunc_c=0;
my $total_retained_intron_others_c=0;
my $total_retained_intron_others_cc=0;


$/ = "";


while($cur_entry = <INP_F>){
    ($id) = ($cur_entry =~ /\nLOCUS\s+(\S+)/);
#$deb_c++;
#print 'deb_c=',$deb_c,$eol_str;&press_any_key;
#print $cur_entry;&press_any_key;

#print 'id:>>>',$id,'<<<';&press_any_key;
    (defined($id)) || die "Couldn't resolve the LOCUS line in $cur_entry\n";
    &do_checks2($id, $cur_entry);
}

&trace("total number of genes=$gene_counter",0); #SPL
&trace("total number of OK genes=$OK_gene_counter",0); 
&trace("total number of genes without alt splicing=$non_alt_gene_counter",0); 
&trace("total number of genes with    alt splicing=$alt_gene_counter",0); 
if ($dismissed_alt_gene_c>=0) {
   &trace("total number of  locus  overlap  events   =$locus_overlap_c",0); 
   &trace("total number of  locus  overlapped paires =$locus_overlap_c_in_genes",0); 
   &trace("total number of nonoverlapped CDS events  =$CDS_non_overlap_event_c",0); 
   &trace("total number of locuses with nonoverlapped CDS=$CDS_non_overlap_c",0); 
   &trace("total number of events of CDS gene name mismatch=$CDS_gene_name_mismatch_c",0); 
   &trace("total number of locuses with CDS gene name mismatch=$CDS_gene_name_mismatch_locus_c",0); 
   &trace("total number of dismissed alt splice gene name counter=$dismissed_alt_gene_c",0); 
   my $aux_cc=$alt_gene_counter-$dismissed_alt_gene_c; 
   &trace("corrected total number of genes with alt splicing=$aux_cc",0); 
}  
#SPL if ($modeCDS ne 'nowi') 
   {&trace("total number of controversial mRNA assignment=$mRNA_match_controvers_c",0); #SPL
    &trace("total number of genes with controversial mRNA assignment=$mRNA_match_controvers_gene_c",0); #SPL
    &trace("total_improvements_in_second_round=$total_improvements_in_second_round",0);#SPL
    &trace("final_improvements_in_second_round=$final_improvements_in_second_round",0);#SPL
    &trace("number of fails in second round=$number_fails_in_second_round",0);#SPL
    &trace("number of events when all controversial mRNA assign equ=$all_mRNA_equ_c",0);
   }
&trace("total number of CDS features=$cds_feature_counter",0);    #SPL
&trace("total number of CDS to store=$cds_counter",0);    #SPL
#no further &trace("number of intronless CDS=$intronless_c",0); #SPL
&trace("corr_intronless_defpid_c=$corr_intronless_defpid_c",0);
&trace("number of spliced mRNA for intronless CDS=$alt_mRNA_while_CDS_intronless_c",0);#SPL
&trace("number of intronless CDS while spliced mRNA available=$spliced_mRNA_while_CDS_intronless_c1",0);
&trace("spliced_mRNA_while_CDS_intronless_defpid_c1=$spliced_mRNA_while_CDS_intronless_defpid_c1",0);
&trace("number of mRNA assignment errors=$mRNA_assign_err_c",0); #SPL
&trace("CDS format errors=$CDS_format_error_c",0); #SPL
&trace("no_protein_id_total=$no_protein_id_check_phase",0);
&trace("CDS_exon_num_err_c=$CDS_exon_num_err_c",0); 
&trace("CDS_exon_num_equ1_err_c=$CDS_exon_num_equ1_err_c",0);
&trace("aux_CDS_exon_num_diff_c=$aux_CDS_exon_num_diff_c",0);
&trace("CDS_exon_num_equ21_err_defpid_c=$CDS_exon_num_equ21_err_defpid_c",0);
&trace("CDS_exon_num_equ21_err_nopid_c=$CDS_exon_num_equ21_err_nopid_c",0);
&trace("CDS_exon_num_err_defpid_c=$CDS_exon_num_err_defpid_c",0);
&trace("CDS_exon_num_err_nopid_c=$CDS_exon_num_err_nopid_c",0);
&trace("corr_intronless_c=$corr_intronless_c",0);
&trace("CDS_exon_num_equ0_c=$CDS_exon_num_equ0_c",0);
&trace("null_CDS_gene_counter=$null_CDS_gene_counter",4);
&trace("err_get_mRNA_CDS_c=$err_get_mRNA_CDS_c",4);

&trace("number of CDS with two or more exons: ".$aux_c1,4);
&trace("aux err assignment: ".$aux_c3,4);
&trace("!!! err           : ".$aux_c4,4);
&trace("CDS with pseudo tag: ".$CDS_pseudo_c,0);
&trace("spliced CDS with pseudo tag:".$corr_spliced_CDS_pseudo_c,0);
&trace("intronless_CDS with pseudo tag:".$corr_intronless_CDS_pseudo_c,0);
&trace("spliced CDS with pseudo tag defined protein_id:".$corr_spliced_CDS_pseudo_defpid_c,0);
&trace("intronless_CDS with pseudo tag defined protein_id:".$corr_intronless_CDS_pseudo_defpid_c,0);
&trace("pseudo gene in alternatively spliced gene:".$pseudo_in_alt_gene_c,0);
&trace("rejected mRNA records due to format errors:".$rejected_mRNA_recs_due_to_err,0);
&trace("  --- alternative mRNA exons count ---",0);
&trace("total number of genes with alternative mRNA................... ".$total_alt_mRNA_gene_c,0);
&trace("total number of correct mRNA in genes with alternative mRNA... ".$total_OK_mRNA_in_alt_mRNA_gene_c,0);
&trace("total number of constitutive exons............................ ".$total_irremovable_exons_c,0);
&trace("total number of alternative exons in all mRNAs................ ".$total_alternative_exons_c,0);
&trace("total number of different alternative exons................... ".$total_diff_alt_c,0);
&trace("total number of skipped exons................................. ".$total_skipped_exon_c,0);
&trace("total number of skipped-internal exons........................ ".$total_skipped_internal,0);
&trace("total number of islands with truncated exons.................. ".$total_trunc_class_num,0);
&trace("total number of other islands................................. ".$total_others_num,0);
&trace("total number of islands with retained introns................. ".$total_retained_intron_c,0);
&trace("total number of islands with retained introns and truncated",0);
&trace("                exons......................................... ".$total_retained_intron_in_trunc_c,0);
&trace("total number of other islands with retained introns........... ".$total_retained_intron_others_c,0);
&trace("total number of retained introns other islands................ ".$total_retained_intron_others_cc,0);
&trace("version $c_ver",0);
close TRACEF; #SPL
# STAT file output #a19
&trace_stat("STATISTICS OF GENOMIC EID",10); #a19
&trace_stat("",0); #a19
&trace_stat("A.General:",0); #a19
&trace_stat("Total number of Gene blocks in GenBank........................ $gene_counter",0); #a19
&trace_stat("Total number of protein coding genes.......................... $coding_gene_counter",0); #a19
&trace_stat("Total number of protein coding genes having introns within",0); #a19
&trace_stat("      CDS region.............................................. $OK_gene_counter",0); #a19 
&trace_stat("Total number of genes without alt splicing.................... $non_alt_gene_counter",0); #SPL 
&trace_stat("Total number of genes with alternative splicing............... $alt_gene_counter",0); #a19 
&trace_stat("Total number of alternatively spliced isoforms................ $alt_isoforms_c",0); #a19 
&trace_stat("Total number of overlapped protein coding genes in EID........ $locus_overlapped_c",0); #a19 
#&trace("Total number of intronless protein-coding genes =$locus_overlap_c_in_genes",0); #SPL 
&trace_stat("  --- alternative mRNA exons count ---",0);
&trace_stat("total number of genes with alternative mRNA................... ".$total_alt_mRNA_gene_c,0);
&trace_stat("total number of correct mRNA in genes with alternative mRNA... ".$total_OK_mRNA_in_alt_mRNA_gene_c,0);
&trace_stat("total number of constitutive exons............................ ".$total_irremovable_exons_c,0);
&trace_stat("total number of alternative exons in all mRNAs................ ".$total_alternative_exons_c,0);
&trace_stat("total number of different alternative exons................... ".$total_diff_alt_c,0);
&trace_stat("total number of skipped exons................................. ".$total_skipped_exon_c,0);
&trace_stat("total number of skipped-internal exons........................ ".$total_skipped_internal,0);
&trace_stat("total number of islands with truncated exons.................. ".$total_trunc_class_num,0);
&trace_stat("total number of other islands................................. ".$total_others_num,0);
&trace_stat("total number of islands with retained introns................. ".$total_retained_intron_c,0);
&trace_stat("total number of islands with retained introns and truncated",0);
&trace_stat("                exons......................................... ".$total_retained_intron_in_trunc_c,0);
&trace_stat("total number of other islands with retained introns........... ".$total_retained_intron_others_c,0);
&trace_stat("total number of retained introns other islands................ ".$total_retained_intron_others_cc,0);

close STATF; #a19
#print 'before exit',$eol_str; &press_any_key;
#&press_any_key;
exit; #SPL

sub do_checks{
    
    my $output = "";
    my @genes;
    my $header;
    @entrylines = split(/\n/, $cur_entry);
    ### get the header info
    for($k = 1; ;$k++){
        $header .= "$entrylines[$k]\n";
        if($entrylines[$k] =~ /^     source          /){
            last;
        }
    }
    while(1){
        $k++;
        if($entrylines[$k] !~ /^                    /){
            last;
        }
        else{
            $header .= "$entrylines[$k]\n";
        }
    }
    ### finished getting the header info
    #SPL entire header including only first source field
    $i = 2;        ## this will keep track of where in @entrylines we currently are
    CDS:while(&get_cds){
    #&print_cds_line; #SPL
    #print "$cdsline"; #SPL
    #print "$cds_info"; #SPL
        $output = &check_cds($cds_info);
        if($output){
            push(@genes, $output);
        }
    }
    foreach $gene (@genes){
        ++$cds_counter;
        print OUT "\$ID $cds_counter\n";
        print OUT "${header}_HEADER_END_";
        print OUT "$gene\n\n";
    }
}
    

sub do_checks2{        
#SPL new procedure making RAWchecked file 
#SPL     consisting the block of gene, all mRNA of the gene, 
#SPL     and one CDS of that gene
#SPL     no translation, no mutual mRNA and CDS check
#SPL     since no sequence availble now due to program chart
    
    my $output = "";
    my @genes;
    my $header=''; #s11
    my $cds_c_in_gene=0; #SPL numbering of the CDS in gene 
    my $CDS_c_str=''; #SPL letter numbering of the CDS in gene 
    my $err_code=0;
    my $c_locus_flg=0;
    @entrylines = split(/\n/, $cur_entry);
    ### get the header info
    &trace($entrylines[0],0);
    &trace($entrylines[1],0);
#    print $entrylines[0], $eol_str; &press_any_key;
#    print $entrylines[1], $eol_str; &press_any_key;
#    print $#entrylines,  $eol_str; &press_any_key;
    for($k = 1; $k<=$#entrylines;$k++){
#    print 'k=',$k,  $eol_str; &press_any_key;
        $header .= "$entrylines[$k]\n";
        if($entrylines[$k] =~ /^     source          /){
            last;
        }
    }
    while($k <= $#entrylines){
        $k++;
        if($entrylines[$k] !~ /^                    /){
            last;
        }
        else{
            $header .= "$entrylines[$k]\n";
        }
    }
    ### finished getting the header info
    #SPL entire header including only first source field
    $i = 2;        ## this will keep track of where in @entrylines we currently are
    @gene_blocks=();
    &get_gene_block;
    #&print_cds_line; #SPL
#    &print_cds_line($gene_block); #SPL
#    &press_any_key; #SPL
#    print $header,"\n";
#    print 'gene_block size=',$#gene_blocks; #&press_any_key;#SPL
#    @gene_blocks are going separately for each contig;
#    &press_any_key;
    $gene_counter_in_contig=0;
    $old_gene_num=0;
    $old_gene_num3=0; #for CDS comparison in single gene in ..CDS02
    foreach $gene_block (@gene_blocks){
#       print $gene_block,"\n"; &press_any_key;
        $c_locus_flg=0;
        $in_gene_CDS_gene_name_mismatch_c=0;
        @CDS_blocks=();
        my $op_res=($gene_block=~/\/(gene|locus_tag)=\"([\040-\176]+)\"/);
        if ($op_res) {$aux_gene_name=$2}
#EMBL_GB
            else {$op_res=($gene_block=~/\/(gene|locus_tag)=\s*([\041-\176]+)/);
                  if ($op_res) {$aux_gene_name=$2}
                 }      
        $coding_gene_flg=0; #a19
        $pseudo_flg=0; #s15
        if ($modeCDS eq 'nowi') {&get_mRNA_CDS0} 
#                           else {&get_mRNA_CDS}
                           elsif ($modeCDS eq 'gb149') {&get_mRNA_CDS_GB149}
                           else {&get_mRNA_CDS_GB149}
#     print ">>>\n";
#     print $gene_block,"\n";
#     print "<<<";
#     &press_any_key;
        $cds_c_in_gene=0;  
        $CDS_c_str=''; 
        $gene_counter++;
        $gene_counter_in_contig++;
        if ($coding_gene_flg==1) {$coding_gene_counter++} #a19
        if ($#CDS_blocks>=0) {
           $OK_gene_counter++;
#           print ">>>>$#CDS_blocks<<<<\n"; &press_any_key;#SPL debug 
           if ($#CDS_blocks==0) {$non_alt_gene_counter++} 
           elsif ($#CDS_blocks>0){$alt_gene_counter++;
           #SPL now count only CDS which has corresponding mRNA
           $alt_isoforms_c=$alt_isoforms_c+$#CDS_blocks+1; #a19
           if ($pseudo_flg==1) {$pseudo_in_alt_gene_c++} #s15
           } else {$null_CDS_gene_counter++}
        } else {$err_get_mRNA_CDS_c++}
        if ($#CDS_blocks>=0) {
        $c_CDS=0;
        foreach $CDS_block (@CDS_blocks){
        $c_CDS++;
#        if ($aux_gene_name eq 'GAGE1')
#        {print $CDS_block,"\n"; &press_any_key;}
            ++$cds_counter;
        $CDS_c_str=$OK_gene_counter; #previous $gene_counter;
        if ($#CDS_blocks>0) 
           {$CDS_c_str.=&get_CDS_c_str(++$cds_c_in_gene);
#            print $#CDS_blocks,' ',$cds_c_in_gene," $CDS_c_str\n"; 
           }
# check CDS over contig , check CDS gene names; can be encapsulated
  $dismissed_alt_gene_c=-1; #dummy param to out some statistics out_for_GB149_251205
#out_for_GB149_251205        
    if ($modeCDS ne 'gb149') {&scan_entire_contig_vs_CDS;} #deb 16.11.04
#out_for_GB149_251205        
    if ($modeCDS ne 'gb149') {$c_locus_flg=&check_CDS_gene_names($c_locus_flg); }
        print OUT "\$ID $CDS_c_str\n";
#        print OUT "\$ID $cds_counter\n";
        print OUT "${header}_HEADER_END_\n";
        print OUT "$CDS_block\n\n";
        }
        if ($#CDS_blocks>0) {
           if (($#CDS_blocks-$in_gene_CDS_gene_name_mismatch_c) <=0)
              {$dismissed_alt_gene_c++;
               &trace("dismissed alt splice gene $aux_gene_name",4);
              }
        }
        } #end if  
    }
}
    


#sub convert_CDS_block_EMBLGB_GB #($CDS_block)
#  {my $c_E_mRNA_rec='';
#   my $c_E_CDS_rec='';
#   my $c_E CDS_block=@_[0];
#   $c_E CDS_block=~/\015//g;      
#   my @c_E_CDS_lines=split '\n',$c_E CDS_block;
#    
#  } 


sub get_cds{
    my $line;
    undef($codon_start);

    while(++$i <= $#entrylines){

        if($entrylines[$i] =~ /CDS\s+join|CDS\s+complement\(join/){ #)
            $real_cds_counter++;
            $cds_info = $entrylines[$i];
            $parenind = ($entrylines[$i] =~ tr/\(/\(/) - ($entrylines[$i] =~ tr/\)/\)/);
            $cdsline = $entrylines[$i];
            while($parenind != 0){
                $line = $entrylines[++$i];
                $cds_info .= "\n$line";
                $parenind += ($line =~ tr/\(/\(/);
                $parenind -= ($line =~ tr/\)/\)/);
                $cdsline .= $line;
            }
    

            $codon_start = 0;
            while(++$i <= $#entrylines){
                $line = $entrylines[$i];
                #### ($line =~ /^\s*$/) && next;
                $cds_info .= "\n$line";
                if($line =~ /^                     \/codon_start=(\d)/){
                    $codon_start = $1 - 1;
                }
             elsif($line =~ /^                     \/translation=/){
               $translation = $line;
               $translation =~ s/\/translation=\"//;
               while($translation !~ /\"/){
                  $line = $entrylines[++$i];
                  $cds_info .= "\n$line";
                  $translation .= $line;
               }
               $translation =~ s/\s+//g;
               $translation =~ s/\"//;
               if(!defined($codon_start)){
                  die "No codon_start before translation:$id\n$cdsline\n";
               }
               return 1;

            }
            elsif(($line !~ /^\s*$/) && ($line !~ /^             /)){ ## Some entries don't have translations.
               $cds_info =~ s/\n.*$//;
               # $codon_start = 0;           ## So we just make up dummy values
               $translation = "";           ## for $translation and $codon_start.
               $i--;
               return 1;                 ## This will be caught later, when we
                                      ## check for length of $translation.

            }
         }   
         $skipped_cds++;
         return 0;
         
      }
   }
   return 0;
}

sub get_gene_block{
#SPL new subroutine to extract block of lines belonging to one gene feature
    my $line;
    my $gene_flg=0;      #SPL 1 = gene block available
    my $gene_line_flg=0; #SPL 1 = gene, mRNA, or CDS lines
    undef($codon_start);
    $gene_block='';
    while(++$i <= $#entrylines){

        if ($entrylines[$i] =~ /^\s{5}mRNA\s{1}|^\s{5}CDS\s{1}/){ 
           $gene_line_flg=1; $gene_flg=1;}
        elsif ($entrylines[$i] =~ /^\s{5}gene\s{1}/){ 
            $gene_line_flg=1;
            if ($gene_flg==1) {push @gene_blocks,$gene_block; 
                               $gene_block='';
                              }
            $gene_flg=1;
        } #SPL end if
        elsif ($entrylines[$i] =~ /^\s{5}\w+/) {$gene_line_flg=0}
        if (($gene_line_flg==1) && ($gene_flg==1)) {$gene_block.="$entrylines[$i]\n"}
    }#SPL end while
   if ($gene_flg==1) {push @gene_blocks,$gene_block}
#   print 'gene_flg=',$gene_flg,' in sub gene_blocks_size=',$#gene_blocks;
#   &press_any_key;
   return 0;
}



sub get_mRNA_CDS{
#SPL new subroutine to extract block of lines belonging to one CDS feature
#SPL including gene feature and all mRNA feature 
    my @temp_g_b=split("\n",$gene_block);
    #print '$#temp_g_b=',$#temp_g_b;
    #&press_any_key;
    my $cds_flg=0;       #SPL 1 = cds block available
    my $gene_line_flg=0; #SPL 1 = gene, mRNA, or CDS lines
    my $CDS_line_flg=0;  #SPL 1 = gene, mRNA, or CDS lines
    my $CDS_block='';
    my $CDS_flg=0;
    my $mRNA_flg=0;
    my $mRNA_line_flg=0;
    my $gene_rec_end="_GENE_REC_END_\n";
    my $mRNA_rec_end="_mRNA_REC_END_\n";
    my $aux_str;
    my $i=0;
    my $mRNA_assing_controvers_flg=0;
    my $gene_block=''; $mRNA_block='';
    # first gather gene feaure and all mRNA features
    while($i<= $#temp_g_b){
        #  print $temp_g_b[$i],"\n";
        #  &press_any_key;
        if ($temp_g_b[$i] =~ /^\s{5}gene\s{1}/){ $gene_line_flg=1}
        elsif ($temp_g_b[$i] =~ /^\s{5}mRNA\s{1}/) { 
           $mRNA_line_flg=1;$gene_line_flg=0}
        elsif ($temp_g_b[$i] =~ /^\s{5}CDS\s{1}/) {$gene_line_flg=0;
           $mRNA_line_flg=0} 
        if ($gene_line_flg==1) {$gene_block.="$temp_g_b[$i]\n"}
        if ($mRNA_line_flg==1) {$mRNA_block.="$temp_g_b[$i]\n"}
        $i++;
    }#SPL end while
    # second prepare long and short mRNA description stack
    my @temp_m_b=split("\n",$mRNA_block);
#    print 'check $#temp_m_b=',$#temp_m_b;
#    &press_any_key;
    my $mRNA_rec='';
    @long_mRNA_stack=(); 
    @mRNA_stack=(); 
    $i=0;
    while($i<= $#temp_m_b){
#          print $temp_m_b[$i],"\n";
#          &press_any_key;
        if ($temp_m_b[$i] =~ /^\s{5}mRNA\s{1}/){ 
           $mRNA_line_flg=1;
           if ($mRNA_flg==1) {push @long_mRNA_stack,$mRNA_rec;
                              push @mRNA_stack,&purge_feature_record($mRNA_rec);
                             }
           $mRNA_flg=1;
           $mRNA_rec=$temp_m_b[$i]."\n";
        }
        else {if ($mRNA_line_flg==1) {$mRNA_rec.=$temp_m_b[$i]."\n"}}
        $i++;
    }#SPL end while
    if ($mRNA_flg==1) {push @long_mRNA_stack,$mRNA_rec;
                       push @mRNA_stack,&purge_feature_record($mRNA_rec);}
    # third add CDS feature for every CDS feature
    $i=0;$CDS_flg=0;$CDS_line_flg=0;
    $c_CDS_num=0;
    my $CDS_exon_num=0;
    my $mRNA_num=0;
    my $mRNA_match_c=0;
    my $err_code=0;
    my $SPL_note_rec='                     /SPL_note="mRNA assignment controversial"';
    my $aux_rec='';
    my $aux_RNA_block='';
    while($i<= $#temp_g_b){
        if ($temp_g_b[$i] =~ /^\s{5}CDS\s{1}/){ $CDS_line_flg=1;
           $cds_feature_counter++;
           $coding_gene_flg=1; #a19
           if ($CDS_flg==1) 
              {
#           print  '>>',$CDS_block,'<<',$eol_str; &press_any_key;#s13
                ($mRNA_num,$CDS_exon_num,$mRNA_match_c,$err_code)=
                &check_mRNA_block_CDS_block_rel($CDS_block);
#           print '$mRNA_num=',$mRNA_num,' $CDS_exon_num=',$CDS_exon_num,
#                 ' $mRNA_match_c=',$mRNA_match_c,' $err_code=',$err_code,$eol_str;
#           &press_any_key; #s13
               if ($err_code==0) {
               if ($CDS_exon_num>1) {
               $aux_c1++;
               if ($mRNA_num>0) 
                  { $j=0;
                    foreach (@long_mRNA_stack)
                      {$j++; 
                       if ($j==$mRNA_num) 
                          {if ($mRNA_match_c>1) 
                              {$aux_rec=$SPL_note_rec."\n"}
                              else {$aux_rec="\n"}
                           $aux_str=$gene_rec_end.$_.$aux_rec.$mRNA_rec_end;
                           push @CDS_blocks,$gene_block.$aux_str.$CDS_block;
                           last}
                      }
                    if ($mRNA_match_c>1) {&trace("$mRNA_match_c matched mRNA found",4);
                          &trace($gene_block,0); 
                       $mRNA_match_controvers_c++;
                       $mRNA_assing_controvers_flg=1;
                       }
                  } else {&trace("no matched mRNA found",4);
                  $aux_RNA_block=$CDS_block;
                  $aux_RNA_block=~s/     CDS   /     mRNA  /;
                  $aux_RNA_block.='                     /SPLnote="DUMMY mRNA_NF"'."\n";
                  $aux_str=$gene_rec_end.$aux_RNA_block.$mRNA_rec_end;
                  push @CDS_blocks,$gene_block.$aux_str.$CDS_block;
                          &trace("mRNA considered equ CDS",4);
                          &trace($gene_block,0);  
                          $mRNA_assign_err_c++;
                         }
               } else {&trace("intronless CDS",4);
                       &trace($gene_block,0)  }
               } else {&trace("CDS format error",4);
                       &trace($gene_block,0); 
                       $CDS_format_error_c++;} #a19
   #print "$CDS_block\n";
   #&press_any_key;
               $CDS_block=''; 
              }
           $c_CDS_num++;
           $CDS_flg=1;
        }
        elsif ($temp_g_b[$i] =~ /^\s{5}\w+\s{1}/){ $CDS_line_flg=0}
        if ($CDS_line_flg==1) {$CDS_block.="$temp_g_b[$i]\n"}
        $i++;
    }#SPL end while
   if ($CDS_flg==1) 
      {
#           print  '>>',$CDS_block,'<<',$eol_str; &press_any_key;#s13
       ($mRNA_num,$CDS_exon_num,$mRNA_match_c,$err_code)=&check_mRNA_block_CDS_block_rel($CDS_block);
#           print '$mRNA_num=',$mRNA_num,' $CDS_exon_num=',$CDS_exon_num,
#                 ' $mRNA_match_c=',$mRNA_match_c,' $err_code=',$err_code,$eol_str;
#           &press_any_key; #s13
       if ($err_code==0) {
       if ($CDS_exon_num>1) {
       $aux_c1++;
       if ($mRNA_num>0) 
          { $j=0;
            foreach (@long_mRNA_stack)
              {$j++; 
               if ($j==$mRNA_num) 
                  {if ($mRNA_match_c>1) 
                      {$aux_rec=$SPL_note_rec."\n"}
                      else {$aux_rec="\n"}
                   $aux_str=$gene_rec_end.$_.$aux_rec.$mRNA_rec_end;
                   push @CDS_blocks,$gene_block.$aux_str.$CDS_block;last}
              }
                    if ($mRNA_match_c>1) {&trace("$mRNA_match_c matched mRNA found",4);
                                          &trace($gene_block,0);
                       $mRNA_match_controvers_c++;
                       $mRNA_assing_controvers_flg=1;
                    }
          } else {&trace("no matched mRNA found",4);
                  $aux_RNA_block=$CDS_block;
                  $aux_RNA_block=~s/     CDS   /     mRNA  /;
                  $aux_RNA_block.='                     /SPLnote="DUMMY mRNA_NF"'."\n";
                  $aux_str=$gene_rec_end.$aux_RNA_block.$mRNA_rec_end;
                  push @CDS_blocks,$gene_block.$aux_str.$CDS_block;
                          &trace("mRNA considered equ CDS",4);
                  &trace($gene_block,0);
                  $mRNA_assign_err_c++;
                 }
       } else {&trace("intronless CDS",4); &trace($gene_block,0)  }
       } else {&trace("CDS format error",4); &trace($gene_block,0);
               $CDS_format_error_c++}
      }
   if ($mRNA_assing_controvers_flg) {$mRNA_match_controvers_gene_c++}
   if ($err_code!=0)    {$aux_c3++}
   if (($err_code==0) and ($CDS_exon_num>1) and ($mRNA_num>0)) {$err_code=0} 
      else {$err_code=1}  #this line has no meaning
   return $err_code;
}

sub get_mRNA_CDS0{
#SPL new subroutine to extract block of lines belonging to one CDS feature
#SPL including gene feature and all mRNA feature 
#SPL special version for handling all CDS without wings
#SPL produces dummy mRNA record to include into prefix.rac
    my @temp_g_b=split("\n",$gene_block);
    #print '$#temp_g_b=',$#temp_g_b;
    #&press_any_key;
    my $cds_flg=0;       #SPL 1 = cds block available
    my $gene_line_flg=0; #SPL 1 = gene, mRNA, or CDS lines
    my $CDS_line_flg=0;  #SPL 1 = gene, mRNA, or CDS lines
    my $CDS_block='';
    my $CDS_flg=0;
    my $mRNA_flg=0;
    my $mRNA_line_flg=0;
    my $gene_rec_end="_GENE_REC_END_\n";
    my $mRNA_rec_end="_mRNA_REC_END_\n";
    my $aux_str;
    my $i=0;
    my $mRNA_assing_controvers_flg=0;
    my $gene_block=''; $mRNA_block='';
    # first gather gene feaure and all mRNA features
    while($i<= $#temp_g_b){
        #  print $temp_g_b[$i],"\n";
        #  &press_any_key;
        if ($temp_g_b[$i] =~ /^\s{5}gene\s{1}/){ $gene_line_flg=1}
        elsif ($temp_g_b[$i] =~ /^\s{5}mRNA\s{1}/) { 
           $mRNA_line_flg=1;$gene_line_flg=0}
        elsif ($temp_g_b[$i] =~ /^\s{5}CDS\s{1}/) {$gene_line_flg=0;
           $mRNA_line_flg=0} 
        if ($gene_line_flg==1) {$gene_block.="$temp_g_b[$i]\n"}
        if ($mRNA_line_flg==1) {$mRNA_block.="$temp_g_b[$i]\n"}
        $i++;
    }#SPL end while
    # second prepare long and short mRNA description stack
    my @temp_m_b=split("\n",$mRNA_block);
#    print 'check $#temp_m_b=',$#temp_m_b;
#    &press_any_key;
    my $mRNA_rec='';
    @long_mRNA_stack=(); 
    @mRNA_stack=(); 
    $i=0;
    while($i<= $#temp_m_b){
#          print $temp_m_b[$i],"\n";
#          &press_any_key;
        if ($temp_m_b[$i] =~ /^\s{5}mRNA\s{1}/){ 
           $mRNA_line_flg=1;
           if ($mRNA_flg==1) {push @long_mRNA_stack,$mRNA_rec;
                              push @mRNA_stack,&purge_feature_record($mRNA_rec);
                             }
           $mRNA_flg=1;
           $mRNA_rec=$temp_m_b[$i]."\n";
        }
        else {if ($mRNA_line_flg==1) {$mRNA_rec.=$temp_m_b[$i]."\n"}}
        $i++;
    }#SPL end while
    if ($mRNA_flg==1) {push @long_mRNA_stack,$mRNA_rec;
                       push @mRNA_stack,&purge_feature_record($mRNA_rec);}
    # third add CDS feature for every CDS feature
    # replase RNA block by CDS block
    $i=0;$CDS_flg=0;$CDS_line_flg=0;
    $c_CDS_num=0;
    my $CDS_exon_num=0;
    my $mRNA_num=0;
    my $mRNA_match_c=0;
    my $err_code=0;
    my $aux_RNA_block='';
    while($i<= $#temp_g_b){
        if ($temp_g_b[$i] =~ /^\s{5}CDS\s{1}/){ $CDS_line_flg=1;
           $cds_feature_counter++;
           $coding_gene_flg=1; #a19
           if ($CDS_flg==1) 
              {$CDS_exon_num=&check_CDS_block($CDS_block);
               if ($CDS_exon_num>1) {
                  $aux_c1++;
                  $aux_RNA_block=$CDS_block;
                  $aux_RNA_block=~s/     CDS   /     mRNA  /;
                  $aux_RNA_block.='                     /SPLnote="DUMMY"'."\n";
                  $aux_str=$gene_rec_end.$aux_RNA_block.$mRNA_rec_end;
                  push @CDS_blocks,$gene_block.$aux_str.$CDS_block;
               } else {&trace("intronless CDS",4);
                       &trace($gene_block,0)  }
               $CDS_block=''; 
              }
           $c_CDS_num++;
           $CDS_flg=1;
        }
        elsif ($temp_g_b[$i] =~ /^\s{5}\w+\s{1}/){ $CDS_line_flg=0}
        if ($CDS_line_flg==1) {$CDS_block.="$temp_g_b[$i]\n"}
        $i++;
    }#SPL end while
   if ($CDS_flg==1) 
      {$CDS_exon_num=&check_CDS_block($CDS_block);
       if ($CDS_exon_num>1) {
          $aux_c1++;
          $aux_RNA_block=$CDS_block;
          $aux_RNA_block=~s/     CDS   /     mRNA  /;
          $aux_RNA_block.='                     /SPLnote="DUMMY"'."\n";#corr in 1.13
          $aux_str=$gene_rec_end.$aux_RNA_block.$mRNA_rec_end;
          push @CDS_blocks,$gene_block.$aux_str.$CDS_block;
       } else {&trace("intronless CDS",4); &trace($gene_block,0)  }
      }
   return $err_code;
}

sub get_mRNA_CDS02 #($c_gene_block)
#SPL new subroutine to extract block of lines belonging to one CDS feature
#SPL NOT including gene feature and all mRNA feature 
#SPL special version for handling all CDS without wings
#SPL to scan all CDS of the contig 
    {my $c_gene_block=$_[0];
    my @temp_g_b=split("\n",$c_gene_block);
    #print '$#temp_g_b=',$#temp_g_b;
    #&press_any_key;
    my $cds_flg=0;       #SPL 1 = cds block available
    my $gene_line_flg=0; #SPL 1 = gene, mRNA, or CDS lines
    my $CDS_line_flg=0;  #SPL 1 = gene, mRNA, or CDS lines
    my $CDS_block='';
    my $CDS_flg=0;
    my $mRNA_flg=0;
    my $mRNA_line_flg=0;
    my $gene_rec_end="_GENE_REC_END_\n";
    my $mRNA_rec_end="_mRNA_REC_END_\n";
    my $aux_str;
    my $i=0;
    my $mRNA_assing_controvers_flg=0;
    my $gene_block2=''; $mRNA_block2='';
    # first gather gene feaure and all mRNA features
    while($i<= $#temp_g_b){
        #  print $temp_g_b[$i],"\n";
        #  &press_any_key;
        if ($temp_g_b[$i] =~ /^\s{5}gene\s{1}/){ $gene_line_flg=1}
        elsif ($temp_g_b[$i] =~ /^\s{5}mRNA\s{1}/) { 
           $mRNA_line_flg=1;$gene_line_flg=0}
        elsif ($temp_g_b[$i] =~ /^\s{5}CDS\s{1}/) {$gene_line_flg=0;
           $mRNA_line_flg=0} 
        if ($gene_line_flg==1) {$gene_block2.="$temp_g_b[$i]\n"}
        if ($mRNA_line_flg==1) {$mRNA_block2.="$temp_g_b[$i]\n"}
        $i++;
    }#SPL end while
    # third add CDS feature for every CDS feature
    # replase RNA block by CDS block
    $i=0;$CDS_flg=0;$CDS_line_flg=0;
    $c_CDS_num=0;
    my $CDS_exon_num=0;
    my $mRNA_num=0;
    my $mRNA_match_c=0;
    my $err_code=0;
    $aux_CDS_block='';
    while($i<= $#temp_g_b){
        if ($temp_g_b[$i] =~ /^\s{5}CDS\s{1}/){ $CDS_line_flg=1;
           if ($CDS_flg==1) 
              {#$CDS_exon_num=&check_CDS_block($aux_CDS_block);
               push @CDS_blocks2,$gene_block2.$mRNA_rec_end.$aux_CDS_block;
#               print "$aux_CDS_block\n";&press_any_key;
               $aux_CDS_block=''; 
              }
           $c_CDS_num++;
           $CDS_flg=1;
        }
        elsif ($temp_g_b[$i] =~ /^\s{5}\w+\s{1}/){ $CDS_line_flg=0}
        if ($CDS_line_flg==1) {$aux_CDS_block.="$temp_g_b[$i]\n"}
        $i++;
    }#SPL end while
   if ($CDS_flg==1) 
      {# $CDS_exon_num=&check_CDS_block($aux_CDS_block);
       push @CDS_blocks2,$gene_block2.$mRNA_rec_end.$aux_CDS_block;
#               print "$aux_CDS_block\n";&press_any_key;
      }
   return $err_code;
}

sub get_mRNA_CDS_GB149{
#SPL new subroutine to extract block of lines belonging to one CDS feature
#SPL including gene feature and all mRNA feature 
    my @temp_g_b=split("\n",$gene_block);
    #print '$#temp_g_b=',$#temp_g_b;
    #&press_any_key;
    my $cds_flg=0;       #SPL 1 = cds block available
    my $gene_line_flg=0; #SPL 1 = gene, mRNA, or CDS lines
    my $CDS_line_flg=0;  #SPL 1 = gene, mRNA, or CDS lines
    my $CDS_block='';
    my $CDS_flg=0;
    my $mRNA_flg=0;
    my $mRNA_line_flg=0;
    my $gene_rec_end="_GENE_REC_END_\n";
    my $mRNA_rec_end="_mRNA_REC_END_\n";
    my $aux_str;
    my $i=0;
    my $mRNA_assing_controvers_flg=0;
    my $gene_block=''; $mRNA_block='';
    my $c_i2=0;
    # first gather gene feature and all mRNA features
    while($i<= $#temp_g_b){
        #  print $temp_g_b[$i],"\n";
        #  &press_any_key;
        if ($temp_g_b[$i] =~ /^\s{5}gene\s{1}/){ $gene_line_flg=1}
        elsif ($temp_g_b[$i] =~ /^\s{5}mRNA\s{1}/) { 
           $mRNA_line_flg=1;$gene_line_flg=0}
        elsif ($temp_g_b[$i] =~ /^\s{5}CDS\s{1}/) {$gene_line_flg=0;
           $mRNA_line_flg=0} 
        if ($gene_line_flg==1) {$gene_block.="$temp_g_b[$i]\n"}
        if ($mRNA_line_flg==1) {$mRNA_block.="$temp_g_b[$i]\n"}
        $i++;
    }#SPL end while
    # second prepare long and short mRNA description stack
    my @temp_m_b=split("\n",$mRNA_block);
#    print 'check $#temp_m_b=',$#temp_m_b;
#    &press_any_key;
    my $mRNA_rec='';
    @long_mRNA_stack=(); 
    @mRNA_stack=(); 
    $i=0;
    while($i<= $#temp_m_b){
#          print $temp_m_b[$i],"\n";
#          &press_any_key;
        if ($temp_m_b[$i] =~ /^\s{5}mRNA\s{1}/){ 
           $mRNA_line_flg=1;
           if ($mRNA_flg==1) {push @long_mRNA_stack,$mRNA_rec;
                              push @mRNA_stack,&purge_feature_record($mRNA_rec);
                             }
           $mRNA_flg=1;
           $mRNA_rec=$temp_m_b[$i]."\n";
        }
        else {if ($mRNA_line_flg==1) {$mRNA_rec.=$temp_m_b[$i]."\n"}}
        $i++;
    }#SPL end while
    if ($mRNA_flg==1) {push @long_mRNA_stack,$mRNA_rec;
                       push @mRNA_stack,&purge_feature_record($mRNA_rec);}
#    if ($aux_gene_name eq 'RNU12')
#       print "aux stop\n";&press_any_key;
#print 'lmRNA_stack>>',@long_mRNA_stack,'<<',$eol_str; &press_any_key;#exon
#print ' mRNA_stack>>',@mRNA_stack,'<<',$eol_str; &press_any_key;#exon
    # mark mRNA stack
      @mRNA_stack_errs=();
      my $c_mRNA_rec=''; my $c_mRNA_rec_i=0;
      foreach $c_mRNA_rec (@mRNA_stack)
        { #print ">>>$c_mRNA_rec <<<";&press_any_key; 
          $c_mRNA_rec=&expand_rec_1nuc2($c_mRNA_rec);
          my $mRNA_err=&get_feature_start_end4($c_mRNA_rec);
          push @mRNA_stack_errs,$mRNA_err;
          if ($mRNA_err) {$rejected_mRNA_recs_due_to_err++;
              &trace("rejected mRNA rec:",4);
              &trace($long_mRNA_stack[$c_mRNA_rec_i],0);  
              print "rejected mRNA rec:$long_mRNA_stack[$c_mRNA_rec_i]\n";
          }
          $c_mRNA_rec_i++;
        }
    # here calculating alternative exon count

      my $c_mRNA_ind1=0;
      my $c_mRNA_ind2=0;
      my $c_mRNA_rec1=''; 
      my $c_mRNA_rec1_aux=''; 
      my $c_mRNA_rec2=''; 
      my @irremovable_exons=();
      my @out_irr_array=();
      my @all_exons_arr=();
      my @out_all_exons_arr=();
      my $aux_rec1=''; my $aux_rec2='';
      my @aux_rec1_stack=();  my @aux_rec2_stack=();
      my $aux_first_OK_flg=1; # aux to find first OK mRNA rec
      foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
        { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          $c_mRNA_rec1=~s/[\s\n]//g; 
          $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
          while (my $c_res1=($c_mRNA_rec1=~m/\(/))
            {$c_mRNA_rec1=$';}
          $c_mRNA_rec1=~m/\)/;
          $c_mRNA_rec1=$`;
          @aux_rec1_stack=split/,/, $c_mRNA_rec1;
          if ($aux_first_OK_flg==1) 
             {$aux_first_OK_flg=0;
              @irremovable_exons=@aux_rec1_stack;
#print "start>@irremovable_exons<start\n";&press_any_key;
             }

          # calc all exon set
          if ($c_mRNA_ind1==0) {@all_exons_arr=@aux_rec1_stack}
          else {
          @out_all_exons_arr=@all_exons_arr;
          foreach (@aux_rec1_stack)
            { my $c_exon1=$_;
              my $aux_equ_flg=0;
              foreach (@all_exons_arr)
                { my $c_exon_all_arr=$_;
#print "$c_exon1 :: $c_exon_all_arr\n";&press_any_key;
                  if ($c_exon1 eq $c_exon_all_arr) {$aux_equ_flg=1}
                }
              if ($aux_equ_flg==0) {push @out_all_exons_arr,$c_exon1}
            } #end calc current all exons 
#print "all>@out_all_exons_arr<all\n";&press_any_key;
          @all_exons_arr=@out_all_exons_arr;
          } #end else 

          $c_mRNA_ind2=0;
          $aux_rec2='';
          foreach (@mRNA_stack)
            { $c_mRNA_rec2=$_;
              #print ">>>$c_mRNA_rec2 <<<";&press_any_key; 
#          print "$c_i2: s4>>>@mRNA_stack2 <<<s4";&press_any_key; 
#          $c_i2++; 
              if (($c_mRNA_ind2>$c_mRNA_ind1) && 
                  ($mRNA_stack_errs[$c_mRNA_ind2]==0))
                  { $c_mRNA_rec2=~s/[\s\n]//g; 
                    $c_mRNA_rec2=&expand_rec_1nuc2($c_mRNA_rec2);
                  
          while (my $c_res2=($c_mRNA_rec2=~m/\(/))
            {$c_mRNA_rec2=$';}
          $c_mRNA_rec2=~m/\)/;
          $c_mRNA_rec2=$`;
          @aux_rec2_stack=split/,/, $c_mRNA_rec2;
#print "2>@aux_rec2_stack<2\n";&press_any_key;
                    @out_irr_array=();
                    foreach $c_irr (@irremovable_exons)
                      {foreach $c_exon (@aux_rec2_stack)
                         { #print "$c_irr :: $c_exon",;
                          if ($c_irr eq $c_exon)
                             {#print " =";
                              push @out_irr_array, $c_irr}
                         # print "\n";&press_any_key;
                         }
                      } # end comparing exons
                    @irremovable_exons=@out_irr_array;
#print "c_st1>@out_irr_array<c_st1\n";&press_any_key;
#print "c_st>@irremovable_exons<c_st\n";&press_any_key;
                  } # if 2
              $c_mRNA_ind2++;
            } #end foreach2
          
        }$c_mRNA_ind1++;} #end foreach1
#print "st>@irremovable_exons<st\n";&press_any_key;
      my $c_irremovable_exons_c=$#irremovable_exons+1;
      my $c_alternative_exons_c=0;
      $c_mRNA_ind1=0;
      my $aux_irr_exon_flg=0;
      my $OK_mRNA_c=0;
      foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
        { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          $OK_mRNA_c++;
          $c_mRNA_rec1=~s/[\s\n]//g; 
          $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
          while ($c_res1=($c_mRNA_rec1=~m/\(/))
            {$c_mRNA_rec1=$';}
          $c_mRNA_rec1=~m/\)/;
          $c_mRNA_rec1=$`;
          @aux_rec1_stack=split/,/, $c_mRNA_rec1;
                    foreach $c_exon (@aux_rec1_stack)
                      {$aux_irr_exon_flg=0;
                         foreach $c_irr (@irremovable_exons)
                           { #print "$c_irr :: $c_exon",;
                            if ($c_irr eq $c_exon)
                             {#print " =";
                              $aux_irr_exon_flg=1;}
                         # print "\n";&press_any_key;
                           }
                       if ($aux_irr_exon_flg==0) {$c_alternative_exons_c++;}
                      } # end comparing exons
           # seems irrelevant position
        }$c_mRNA_ind1++;} #end foreach1
#print "gene=$aux_gene_name\n";
#print "irr_exon_c=$c_irremovable_exons_c\n";
#print "alt_exon_c0=$c_alternative_exons_c\n";
my $diff_alt_c=$#all_exons_arr+1-$c_irremovable_exons_c;
#print "alt_exon_c=$diff_alt_c\n";#&press_any_key;                  

    # calculating skipped exons
    my @rest_exons=(); #@all_exons_arr;
       foreach $c_exon (@all_exons_arr)
         {my $aux_exon_flg=0;
          foreach $c_irr (@irremovable_exons)
            { if ($c_exon eq $c_irr) {$aux_exon_flg=1}}
          if ($aux_exon_flg==0) {push @rest_exons,$c_exon}
         } #end foreach 1
#    print "all: @all_exons_arr\n"; #&press_any_key;
#    print "irr: @irremovable_exons\n";#&press_any_key;

#    print "rest: @rest_exons\n";

    my @skipped_exons=();
    my $aux_skip_exon_flg=1;
    foreach $c_rest (@rest_exons)
      {$aux_skip_exon_flg=0;
       $c_mRNA_ind1=0;
       $c_overlap_flg=0;
       foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
             { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
              $aux_c_flg=1;
              $c_mRNA_rec1=~s/[\s\n]//g; 
              $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
              while ($c_res1=($c_mRNA_rec1=~m/\(/))
                {$c_mRNA_rec1=$';}
                 $c_mRNA_rec1=~m/\)/;
                 $c_mRNA_rec1=$`;
                 @aux_rec1_stack=split/,/, $c_mRNA_rec1;
                 foreach $c_exon (@aux_rec1_stack)
                   {#print "$c_rest vs $c_exon\n"; 
                    if ($c_rest eq $c_exon) {$aux_c_flg=0;}
                        else 
                          {$c_overlap=&get_overlap_exon($c_rest,$c_exon);
#                           print "overlap=$c_overlap\n";
                           if ($c_overlap!=0) {$aux_c_flg=0;$c_overlap_flg=1}
                          } # end else
                  #  print "aux=$aux_c_flg\n";
                   } # end comparing exons
              if ($aux_c_flg==1) {$aux_skip_exon_flg=1};
#              print "aux=$aux_c_flg  aux_skip=$aux_skip_exon_flg\n";&press_any_key;
             } #end if good mRNA record
          $c_mRNA_ind1++;
        } #end foreach mRNA variant
        if ($c_overlap_flg==1) {$aux_skip_exon_flg=0}; # clear skip flag 
                                                       # if overlap but not 
                                                       # equality anywhere
        if ($aux_skip_exon_flg==1) {push @skipped_exons,$c_rest}
      } #end foreach rest
    my $skipped_exon_c=$#skipped_exons+1;
#    print "skipped_exon_c=$skipped_exon_c\n";

    # discriminate internal skipped exons and marginal skipped exons
    # this is improper section where only first marginal exons 
    # but in all RNA is counted 
    # stored temporary for debug
    my $c_exon_c=0;
    my $marginal_skipped_exons_c=0;

    foreach $c_skipped (@skipped_exons)
      {$aux_skip_exon_flg=0;
       $c_mRNA_ind1=0;
       my $marginal_skipped_exon_avail_flg=0;
       foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
             { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
              $aux_c_flg=0; #here indicate marginal exon
              $c_mRNA_rec1=~s/[\s\n]//g; 
              $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
              while ($c_res1=($c_mRNA_rec1=~m/\(/))
                {$c_mRNA_rec1=$';}
                 $c_mRNA_rec1=~m/\)/;
                 $c_mRNA_rec1=$`;
                 @aux_rec1_stack=split/,/, $c_mRNA_rec1;
                 $c_exon_c=0;
                 foreach $c_exon (@aux_rec1_stack)
                   {#print "$c_rest vs $c_exon\n"; 
                    if (($c_skipped eq $c_exon) and (($c_exon_c==0) 
                                    or ($c_exon_c==$#aux_rec1_stack)))
                         {$aux_c_flg=1;}
                  #  print "aux=$aux_c_flg\n";
                    $c_exon_c++;
                   } # end comparing exons
              if ($aux_c_flg==1) {$marginal_skipped_exon_avail_flg=1};
#              print "aux=$aux_c_flg  aux_skip=$aux_skip_exon_flg\n";&press_any_key;
             } #end if good mRNA record
          $c_mRNA_ind1++;
        } #end foreach mRNA variant
        if ($marginal_skipped_exon_avail_flg==1) {$marginal_skipped_exons_c++}
      } #end foreach rest
#    print "marginal_skipped_exon_c=$marginal_skipped_exons_c\n";

    # discriminate internal skipped exons and marginal skipped exons
    # this section searchs for all marginal skipped exon only 
    # in protruding RNA  
    # possibly invalid proc for nonoverlapped RNA?
     $c_exon_c=0;
     $marginal_skipped_exons_c=0;

       #search for left and right protruding mRNA
       my $left_end=999999999999;
       my $right_end=0;
       my $rightmost_left_end=0;
       my $leftmost_right_end=999999999999;
       $c_mRNA_ind1=0;
       foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
             { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
              $aux_c_flg=0; #here indicate marginal exon
              $c_mRNA_rec1=~s/[\s\n]//g; 
              $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
              while ($c_res1=($c_mRNA_rec1=~m/\(/))
                {$c_mRNA_rec1=$';}
                 $c_mRNA_rec1=~m/\)/;
                 $c_mRNA_rec1=$`;
                 @aux_rec1_stack=split/,/, $c_mRNA_rec1;
                 $c_exon_c=0;
                 foreach $c_exon (@aux_rec1_stack)
                   {#print "$c_rest vs $c_exon\n"; 
                    ($c_start1,$c_end1,$c_err1)=&get_feature_start_end2($c_exon);
                    if ($c_exon_c==0)
                       {if ($c_start1<$left_end) {$left_end=$c_start1} # seems unused later
                        if ($c_start1>$rightmost_left_end) {$rightmost_left_end=$c_start1} 
                       }
                    if ($c_exon_c==$#aux_rec1_stack)
                       {if ($c_end1>$right_end) {$right_end=$c_end1} # seems unused later
                        if ($c_end1<$leftmost_right_end) {$leftmost_right_end=$c_end1} 
                       }
                    $c_exon_c++;
                   } # end comparing exons
             } #end if good mRNA record
          $c_mRNA_ind1++;
        } #end foreach mRNA variant
#    print "$left_end $rightmost_left_end $leftmost_right_end $right_end\n";

    my @marginal_skipped_arr=();
#    print "skip: @skipped_exons\n";
    foreach $c_skipped (@skipped_exons)
      {$aux_skip_exon_flg=0;
       $c_mRNA_ind1=0;
       foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
             { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
              $c_mRNA_rec1=~s/[\s\n]//g; 
              $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
              while ($c_res1=($c_mRNA_rec1=~m/\(/))
                {$c_mRNA_rec1=$';}
                 $c_mRNA_rec1=~m/\)/;
                 $c_mRNA_rec1=$`;
                 @aux_rec1_stack=split/,/, $c_mRNA_rec1;
                 foreach $c_exon (@aux_rec1_stack)
                   {#print "$c_rest vs $c_exon\n"; 
                    if ($c_skipped eq $c_exon) 
                       {($c_start1,$c_end1,$c_err1)=&get_feature_start_end2($c_exon);
                        if (($c_start1>$leftmost_right_end) or
                            ($c_end1<$rightmost_left_end))
                         { push (@marginal_skipped_arr,$c_skipped)}
                       } #if current mRNA exon is skipped
                   } # end comparing exons
#              print "aux=$aux_c_flg  aux_skip=$aux_skip_exon_flg\n";&press_any_key;
             } #end if good mRNA record
          $c_mRNA_ind1++;
        } #end foreach mRNA variant
      } #end foreach skipped
#    print "marginal: @marginal_skipped_arr\n";
    $marginal_skipped_exons_c=&num_diff_items(@marginal_skipped_arr);
#    print "marginal_skipped_exon_c=$marginal_skipped_exons_c\n";

    # calculating rest exon by subtracting skipped exons
    my @out_rest_exons=(); 
       foreach $c_exon (@rest_exons)
         {my $aux_exon_flg=0;
          foreach $c_skip (@skipped_exons)
            { if ($c_exon eq $c_skip) {$aux_exon_flg=1}}
          if ($aux_exon_flg==0) {push @out_rest_exons,$c_exon}
         } #end foreach 1
#    print "rest: @out_rest_exons\n"; 
    @rest_exons=@out_rest_exons; 

#    # producing nonoverlapped classes
#    # first produce projection, nonchalance for comparing twice
#    050807 this block has a bug, taken under comment, 
#    see beneath another algorithm 
#    my @proj_arr=();
#    my @aux_proj_arr=();
#    my $c_exon_num=0;
#    my $no_overlap_flg=1;
#    my @rewritten_stack=();
#       if ($#rest_exons>=0) {$proj_arr[0]=$rest_exons[0];
#       @aux_proj_arr=@_proj_arr;
#          foreach (@rest_exons)
#            {   $c_exon=$_; #HERE MODIFIED ERROR JOINNIG PROC
#                            #BACKWARD INFLUENCE ON @rest_exons OTHERWISE
#                $c_exon_num=0; $no_overlap_flg=1;
#                @rewritten_stack=();
#                foreach $cc_exon (@proj_arr)
#                  { ($c_overlap,$c_rec)=&get_joint_exon($c_exon,$cc_exon); 
#                    print "$c_exon & $cc_exon=>$c_rec ($c_exon_num)\n";
#                     if ($c_overlap==1) {$aux_proj_arr[$c_exon_num]=$c_rec;
#                                         push @rewritten_stack,$c_exon_num;
#                                         $no_overlap_flg=0;
#                                        }
#                    $c_exon_num++;
#                  } #end foreach 2
#                if ($no_overlap_flg==1) {push @aux_proj_arr,$c_exon};
##            print "c_jnt_p: @aux_proj_arr\n"; &press_any_key;
#            @proj_arr=@aux_proj_arr;
#            if ($#rewritten_stack>=1) {
#            @aux_proj_arr=();
#            # get common exon
#            $c_exon=$proj_arr[$rewritten_stack[0]];
#            for (my $i=1; $i<=$#rewritten_stack;$i++)
#                { $cc_exon=$proj_arr[$rewritten_stack[$i]];
#                  ($c_overlap,$c_rec)=&get_joint_exon($c_exon,$cc_exon); 
#                  if ($c_overlap==1) {$c_exon=$c_rec;
#                                     } else {&trace("error in joining alg",4);}
#                } # for in joining rewritten exons 
#            # put common exon into arr
#            $c_exon_num=0; my $aux_c_exon_num=0;
#            print "rewritten_stack_depth=$#rewritten_stack",$eol_str;
#            print $rewritten_stack[0],$eol_str;
#            print $rewritten_stack[1],$eol_str;
#            foreach $cc_exon (@proj_arr)
#              { if ($c_exon_num==$rewritten_stack[0]) 
#                   {push @aux_proj_arr,$c_exon;$aux_c_exon_num++}
#                   else {if ($c_exon_num==$rewritten_stack[$aux_c_exon_num])
#                            {$aux_c_exon_num++}
#                            else {push @aux_proj_arr,$proj_arr[$c_exon_num]}
#                        } #end else 
#                $c_exon_num++;
#              }
#            } #end if rewritten stack
##            print "c_joint: @aux_proj_arr\n"; &press_any_key;
#                @proj_arr=@aux_proj_arr;
#            } #end foreach 1
#       } #end if non empty set
#    print "joint: @proj_arr\n";

    # producing nonoverlapped classes
    # first produce projection, nonchalance for comparing twice
    # alternative algoritm to produce projection
    my @proj_arr=();
    my @aux_proj_arr=();
    my $c_exon_num=0;
    my $overlap_flg=1;
    my $i1=0;
    my $i2=0;
    @aux_proj_arr=@rest_exons;
    @proj_arr=@rest_exons;

    if ($#rest_exons>0) {
    while ($overlap_flg==1)
      {$overlap_flg=0; $i1=0;
       while (($i1<$#aux_proj_arr) and ($overlap_flg==0))
         {$i2=$i1+1;
          while (($i2<=$#aux_proj_arr) and ($overlap_flg==0))
            {$c_exon=$aux_proj_arr[$i1];
             $cc_exon=$aux_proj_arr[$i2];
             ($c_overlap,$c_rec)=&get_joint_exon($c_exon,$cc_exon); 
#                    print "$c_exon & $cc_exon=>$c_rec \n";
             if ($c_overlap==1) 
                {$overlap_flg=1;
                 # copiing to proj_arr
                 @proj_arr=();
                 push @proj_arr,$c_rec;
                 for (my $j1=0; $j1<=$#aux_proj_arr; $j1++)
                   { if (($j1!=$i1) and ($j1!=$i2)) 
                        {push @proj_arr,$aux_proj_arr[$j1]}
                   }
                 @aux_proj_arr=@proj_arr;
                } #end if overlap found
             $i2++;
            } #end while second exon in pair 
          $i1++;
         } #end while first exon in pair 
      } #end while overlap available
      @proj_arr=@aux_proj_arr;
    } #end if more then single element in rest_exons
#    print "joint: @proj_arr\n";

#    # second sequentially produce every nonoverlapped class for demonstration 
#    my $class_num=0;  #{1 start}
#    print "out_rest demo: @out_rest_exons\n"; 
#    print "    rest demo: @rest_exons\n"; 
#      foreach $c_class (@proj_arr)
#        {$class_num++;
#         print "class $class_num: ";
#         foreach $c_exon (@rest_exons)
#           {print '(',"$c_exon ",')';
#            $c_overlap=&get_overlap_exon($c_class,$c_exon);
#            if ($c_overlap!=0) {print "$c_exon "}
#           }
#         print "\n";
#        } #end foreach 1

    # third sequentially produce every nonoverlapped class 
    # and check for truncated exons 
      $class_num=0;  #{1 start}
    my $trunc_class_num=0;
    my @out_rest_class=();
      foreach $c_class (@proj_arr)
        {$class_num++;
         my $c_5or3_OK_flg=1;
         @c_class_arr=();
         foreach $c_exon (@rest_exons)
           {$c_overlap=&get_overlap_exon($c_class,$c_exon);
            if ($c_overlap!=0) {push @c_class_arr,$c_exon }
           }
         foreach $c_exon (@c_class_arr)
           {$c_overlap=&get_5or3_exon($c_class,$c_exon);
            if ($c_overlap==0) {$c_5or3_OK_flg=0}
           }
         if ($c_5or3_OK_flg==1) {$trunc_class_num++}
            else {push @out_rest_class,$c_class}
        } #end foreach 1
    my $others_num=$#out_rest_class+1;
#    print "trunc=$trunc_class_num  others=$others_num\n";

    # calculating retained introns among trunc and others exons
    my $retained_intron_c=0;
    foreach $c_class (@proj_arr)
      {my $retained_intron_flg=0;
       $c_mRNA_ind1=0;
       foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
             { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
              $aux_c_flg=1;
              $c_mRNA_rec1=~s/[\s\n]//g; 
              $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
              while ($c_res1=($c_mRNA_rec1=~m/\(/))
                {$c_mRNA_rec1=$';}
                 $c_mRNA_rec1=~m/\)/;
                 $c_mRNA_rec1=$`;
                 @aux_rec1_stack=split/,/, $c_mRNA_rec1;
                 my $c_ex_i=0;
                 my $c_exon1='';
                 my $c_exon2='';
                 for ($c_ex_i=0; $c_ex_i<$#aux_rec1_stack;$c_ex_i++)
                   {$c_exon1=$aux_rec1_stack[$c_ex_i];
                    $c_exon2=$aux_rec1_stack[$c_ex_i+1];
                    $c_overlap=&get_overlap_exon($c_exon1,$c_class);
#                   print "overlap=$c_overlap\n";
                    if ($c_overlap!=0) {
                       $c_overlap=&get_overlap_exon($c_exon2,$c_class);
                       if ($c_overlap!=0) {$retained_intron_flg=1}
                                  # assume exon1 and exon2 do not overlap
                    } #end is first overlap
                  #  print "aux=$aux_c_flg\n";
                   } # end comparing exons to class for specific mRNA
#              print "aux=$aux_c_flg  aux_skip=$aux_skip_exon_flg\n";&press_any_key;
             } #end if good mRNA record
          $c_mRNA_ind1++;
        } #end foreach mRNA variant
        if ($retained_intron_flg==1) {$retained_intron_c++}
      } #end foreach class
#    print "retained_intron_c=$retained_intron_c\n";

    # calculating retained introns among others exons
    my $retained_intron_others_c=0;
    foreach $c_class (@out_rest_class)
      {my $retained_intron_others_flg=0;
       $c_mRNA_ind1=0;
       foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
             { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
              $aux_c_flg=1;
              $c_mRNA_rec1=~s/[\s\n]//g; 
              $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
              while ($c_res1=($c_mRNA_rec1=~m/\(/))
                {$c_mRNA_rec1=$';}
                 $c_mRNA_rec1=~m/\)/;
                 $c_mRNA_rec1=$`;
                 @aux_rec1_stack=split/,/, $c_mRNA_rec1;
                 $c_ex_i=0;
                 $c_exon1='';
                 $c_exon2='';
                 for ($c_ex_i=0; $c_ex_i<$#aux_rec1_stack;$c_ex_i++)
                   {$c_exon1=$aux_rec1_stack[$c_ex_i];
                    $c_exon2=$aux_rec1_stack[$c_ex_i+1];
                    $c_overlap=&get_overlap_exon($c_exon1,$c_class);
#                   print "overlap=$c_overlap\n";
                    if ($c_overlap!=0) {
                       $c_overlap=&get_overlap_exon($c_exon2,$c_class);
                       if ($c_overlap!=0) {$retained_intron_others_flg=1}
                                  # assume exon1 and exon2 do not overlap
                    } #end is first overlap
                  #  print "aux=$aux_c_flg\n";
                   } # end comparing exons to class for specific mRNA
#              print "aux=$aux_c_flg  aux_skip=$aux_skip_exon_flg\n";&press_any_key;
             } #end if good mRNA record
          $c_mRNA_ind1++;
        } #end foreach mRNA variant
        if ($retained_intron_others_flg==1) {$retained_intron_others_c++}
      } #end foreach class
#    print "retained_intron_c=$retained_intron_c (in others=$retained_intron_others_c)\n";


    # calculating retained introns among others exons
    # and calculating the number of such introns (not only islands)
    # for debug
    $retained_intron_others_c=0;
    my $retained_intron_others_cc=0;
    foreach $c_class (@out_rest_class)
      {my $retained_intron_others_flg=0;
       $c_mRNA_ind1=0;
       my @retained_intron_arr=();
       foreach  (@mRNA_stack)
        { $c_mRNA_rec1=$_;
#          print "s>>>@mRNA_stack <<<s";&press_any_key; 
#          print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
          if ($mRNA_stack_errs[$c_mRNA_ind1]==0)
             { #print ">>>$c_mRNA_rec1 <<<";&press_any_key; 
              $aux_c_flg=1;
              $c_mRNA_rec1=~s/[\s\n]//g; 
              $c_mRNA_rec1=&expand_rec_1nuc2($c_mRNA_rec1);
              while ($c_res1=($c_mRNA_rec1=~m/\(/))
                {$c_mRNA_rec1=$';}
                 $c_mRNA_rec1=~m/\)/;
                 $c_mRNA_rec1=$`;
                 @aux_rec1_stack=split/,/, $c_mRNA_rec1;
                 $c_ex_i=0;
                 $c_exon1='';
                 $c_exon2='';
                 for ($c_ex_i=0; $c_ex_i<$#aux_rec1_stack;$c_ex_i++)
                   {$c_exon1=$aux_rec1_stack[$c_ex_i];
                    $c_exon2=$aux_rec1_stack[$c_ex_i+1];
                    $c_overlap=&get_overlap_exon($c_exon1,$c_class);
#                   print "overlap=$c_overlap\n";
                    if ($c_overlap!=0) {
                       $c_overlap=&get_overlap_exon($c_exon2,$c_class);
                       if ($c_overlap!=0) {$retained_intron_others_flg=1;
                          ($c_start1,$c_end1,$c_err1)=&get_feature_start_end2($c_exon1);
                          ($c_start2,$c_end2,$c_err2)=&get_feature_start_end2($c_exon2);
                          push(@retained_intron_arr,$c_end1.'..'.$c_start2);
                       }
                                  # assume exon1 and exon2 do not overlap
                    } #end is first overlap
                  #  print "aux=$aux_c_flg\n";
                   } # end comparing exons to class for specific mRNA
#              print "aux=$aux_c_flg  aux_skip=$aux_skip_exon_flg\n";&press_any_key;
             } #end if good mRNA record
          $c_mRNA_ind1++;
        } #end foreach mRNA variant
        if ($retained_intron_others_flg==1) {$retained_intron_others_c++;
           $retained_intron_others_cc=$retained_intron_others_cc+
                                      &num_diff_items(@retained_intron_arr);
        }
      } #end foreach class
#    print "retained_intron_c=$retained_intron_c (in others=$retained_intron_others_c($retained_intron_others_cc))\n";

    #printing and store section
    $aux_out_str='const='.$c_irremovable_exons_c.'  alt='.$c_alternative_exons_c.'  ';
    $aux_out_str.='diff='.$diff_alt_c.'  skip='.$skipped_exon_c.'  ';
    $aux_val=$skipped_exon_c-$marginal_skipped_exons_c;
    $aux_out_str.='int='.$aux_val.'  ';
    $aux_out_str.='trunc='.$trunc_class_num.'  others='.$others_num.'  ';
    $aux_out_str.='ret_i='.$retained_intron_c.' (';
    $aux_out_str.=$retained_intron_c-$retained_intron_others_c.'/';
    $aux_out_str.=$retained_intron_others_c.'('.$retained_intron_others_cc.'))';
#    print "gene=$aux_gene_name  OK_mRNA_c=$OK_mRNA_c\n";
#    print $aux_out_str,"\n";
    if ($OK_mRNA_c>=2) {$total_alt_mRNA_gene_c++;
       $total_OK_mRNA_in_alt_mRNA_gene_c+=$OK_mRNA_c;
       $total_irremovable_exons_c+=$c_irremovable_exons_c;
       $total_alternative_exons_c+=$c_alternative_exons_c;
       $total_diff_alt_c+=$diff_alt_c;
       $total_skipped_exon_c+=$skipped_exon_c;
       $total_skipped_internal+=$skipped_exon_c-$marginal_skipped_exons_c;
       $total_trunc_class_num+=$trunc_class_num;
       $total_others_num+=$others_num;
       $total_retained_intron_c+=$retained_intron_c;
       $total_retained_intron_in_trunc_c+=$retained_intron_c-$retained_intron_others_c;
       $total_retained_intron_others_c+=$retained_intron_others_c;
       $total_retained_intron_others_cc+=$retained_intron_others_cc;
    }
    if ($OK_mRNA_c>=2) {
       &trace("gene=$aux_gene_name  OK_mRNA_c=$OK_mRNA_c",4);
       &trace($aux_out_str,4)}
    #&press_any_key;                  

    # third add CDS feature for every CDS feature
    $i=0;$CDS_flg=0;$CDS_line_flg=0;
    $c_CDS_num=0;
    my $CDS_exon_num=0;
    my $mRNA_num=0;
    my $mRNA_match_c=0;
    my $err_code=0;
    my $SPL_note_rec='                     /SPL_note="mRNA assignment controversial"';
    my $aux_rec='';
    my $aux_RNA_block='';
    while($i<= $#temp_g_b){
        if ($temp_g_b[$i] =~ /^\s{5}CDS\s{1}/){ $CDS_line_flg=1;
           $cds_feature_counter++;
           $coding_gene_flg=1; #a19
           if ($CDS_flg==1) 
              {
#           print  '>>',$CDS_block,'<<',$eol_str; &press_any_key;#s13
                ($mRNA_num,$CDS_exon_num,$mRNA_match_c,$err_code)=
                &check_mRNA_block_CDS_block_rel_gb149($CDS_block);
#           print '$mRNA_num=',$mRNA_num,' $CDS_exon_num=',$CDS_exon_num,
#                 ' $mRNA_match_c=',$mRNA_match_c,' $err_code=',$err_code,$eol_str;
#           &press_any_key; #s13
               if ($err_code==0) {
               if ($CDS_exon_num>1) {
               $aux_c1++;
               if ($mRNA_num>0) 
                  { $j=0;
                    foreach (@long_mRNA_stack)
                      {$j++; 
                       if (($j==$mRNA_num) && ($mRNA_stack_errs[$j-1]==0))
                          {if ($mRNA_match_c>1) 
                              {$aux_rec=$SPL_note_rec."\n"}
                              else {$aux_rec="\n"}
                           $aux_str=$gene_rec_end.$_.$aux_rec.$mRNA_rec_end;
                           push @CDS_blocks,$gene_block.$aux_str.$CDS_block;
                           last}
                      }
                    if ($mRNA_match_c>1) {&trace("$mRNA_match_c matched mRNA found",4);
                          &trace($gene_block,0); 
                       $mRNA_match_controvers_c++;
                       $mRNA_assing_controvers_flg=1;
                       }
                  } else {&trace("no matched mRNA found",4);
                  $aux_RNA_block=$CDS_block;
                  $aux_RNA_block=~s/     CDS   /     mRNA  /;
                  $aux_RNA_block.='                     /SPLnote="DUMMY mRNA_NF"'."\n";
                  $aux_str=$gene_rec_end.$aux_RNA_block.$mRNA_rec_end;
                  push @CDS_blocks,$gene_block.$aux_str.$CDS_block;
                          &trace("mRNA considered equ CDS",4);
                          &trace($gene_block,0);  
                          $mRNA_assign_err_c++;
                         }
               } else {&trace("intronless CDS",4);
                       &trace($gene_block,0)  }
               } else {&trace("CDS format error",4);
                       &trace($gene_block,0); 
                       $CDS_format_error_c++;} #a19
#   print "$CDS_block\n";
#   &press_any_key;
               $CDS_block=''; 
              }
           $c_CDS_num++;
           $CDS_flg=1;
        }
        elsif ($temp_g_b[$i] =~ /^\s{5}\w+\s{1}/){ $CDS_line_flg=0}
        if ($CDS_line_flg==1) {$CDS_block.="$temp_g_b[$i]\n"}
        $i++;
    }#SPL end while
   if ($CDS_flg==1) 
      {
#           print  '>>',$CDS_block,'<<',$eol_str; &press_any_key;#s13
       ($mRNA_num,$CDS_exon_num,$mRNA_match_c,$err_code)=
           &check_mRNA_block_CDS_block_rel_gb149($CDS_block);
#           print '$mRNA_num=',$mRNA_num,' $CDS_exon_num=',$CDS_exon_num,
#                 ' $mRNA_match_c=',$mRNA_match_c,' $err_code=',$err_code,$eol_str;
#           &press_any_key; #s13
       if ($err_code==0) {
       if ($CDS_exon_num>1) {
       $aux_c1++;
       if ($mRNA_num>0) 
          { $j=0;
            foreach (@long_mRNA_stack)
              {$j++; 
               if (($j==$mRNA_num) && ($mRNA_stack_errs[$j-1]==0))
                  {if ($mRNA_match_c>1) 
                      {$aux_rec=$SPL_note_rec."\n"}
                      else {$aux_rec="\n"}
                   $aux_str=$gene_rec_end.$_.$aux_rec.$mRNA_rec_end;
                   push @CDS_blocks,$gene_block.$aux_str.$CDS_block;last}
              }
                    if ($mRNA_match_c>1) {&trace("$mRNA_match_c matched mRNA found",4);
                                          &trace($gene_block,0);
                       $mRNA_match_controvers_c++;
                       $mRNA_assing_controvers_flg=1;
                    }
          } else {&trace("no matched mRNA found",4);
                  $aux_RNA_block=$CDS_block;
                  $aux_RNA_block=~s/     CDS   /     mRNA  /;
                  $aux_RNA_block.='                     /SPLnote="DUMMY mRNA_NF"'."\n";
                  $aux_str=$gene_rec_end.$aux_RNA_block.$mRNA_rec_end;
                  push @CDS_blocks,$gene_block.$aux_str.$CDS_block;
                          &trace("mRNA considered equ CDS",4);
                  &trace($gene_block,0);
                  $mRNA_assign_err_c++;
                 }
       } else {&trace("intronless CDS",4); &trace($gene_block,0)  }
       } else {&trace("CDS format error",4); &trace($gene_block,0);
               $CDS_format_error_c++}
      }
   if ($mRNA_assing_controvers_flg) {$mRNA_match_controvers_gene_c++}
   if ($err_code!=0)    {$aux_c3++}
   if (($err_code==0) and ($CDS_exon_num>1) and ($mRNA_num>0)) {$err_code=0} 
      else {$err_code=1}  #this line has no meaning
   return $err_code;
}

sub scan_entire_contig_vs_CDS{
    my $op_res=0;
    my $aux_gene_name2='???';
    my $mRNA_rec_end="_mRNA_REC_END_\n";
       $CDS_block=~m/$mRNA_rec_end/;
    my $c_CDS_block=$';
    my $c_start=0;
    my $c_end=0;
    my $c_err=0;
    my $c_start2=0;
    my $c_end2=0;
    my $c_err2=0;
    my $gene_counter_in_contig2=0;
    my $overlap=0;
    my $old_gene_num2=0;
    my $overlap_flg=0; # overlap of the gene with any other #a19
   
    my $c_CDS2=0; # this c_CDS2 numerates on every CDS in second gene
                  # while c_CDS indicates on every CDS except intronless 
                  # in first gene 

 my $aux_CDS_block=&purge_feature_record($c_CDS_block);
    $aux_CDS_block =~s/[\s\n]//g; 
    $aux_CDS_block=&expand_rec_1nuc($aux_CDS_block);
#    print "$aux_CDS_block\n";&press_any_key;
    ($c_start,$c_end,$c_err)=&get_feature_start_end2($aux_CDS_block);
    foreach $gene_block2 (@gene_blocks){
#       print "gb2>>>\n";
#       print $gene_block2,"\n"; 
#       print "<<<gb2";
#       &press_any_key;
        @CDS_blocks2=();
        my $op_res=($gene_block2=~/\/(gene|locus_tag)=\"([\040-\176]+)\"/);
        if ($op_res) {$aux_gene_name2=$2}
#EMBL_GB
            else {$op_res=($gene_block2=~/\/(gene|locus_tag)=\s*([\041-\176]+)/);
                  if ($op_res) {$aux_gene_name2=$2}
                 }      
        &get_mRNA_CDS02($gene_block2); 
#        $cds_c_in_gene=0;  
#        $CDS_c_str=''; 
        $gene_counter_in_contig2++;
        if ($#CDS_blocks2>=0) {
           $c_CDS2=0;
           foreach $CDS_block2 (@CDS_blocks2){
             $c_CDS2++;
             $CDS_block2=~m/$mRNA_rec_end/;
             my $c_CDS_block2=$';
             my $aux_CDS_block2=&purge_feature_record($c_CDS_block2);
             $aux_CDS_block2 =~s/[\s\n]//g; 
             $aux_CDS_block2=&expand_rec_1nuc($aux_CDS_block2);
#    print "$aux_CDS_block2\n";&press_any_key;
             ($c_start2,$c_end2,$c_err2)=&get_feature_start_end2($aux_CDS_block2);
             if (($c_err == 0) and ($c_err2 == 0))
                {$overlap=&get_overlap($c_start,$c_end,$c_start2,$c_end2);
#     if ($overlap>0) {print "$aux_gene_name $aux_gene_name2 $overlap\n";
#                      &press_any_key;}
                 if (($overlap>0) and 
                    ($gene_counter_in_contig !=$gene_counter_in_contig2))
                    {$locus_overlap_c++;
                     $overlap_flg=1; #a19
                     if (($gene_counter_in_contig !=$old_gene_num) and 
                        ($gene_counter_in_contig2 !=$old_gene_num2))
                        {$locus_overlap_c_in_genes++;
                         $old_gene_num=$gene_counter_in_contig;
                         $old_gene_num2=$gene_counter_in_contig2;
                        }
#                     print "$aux_gene_name $aux_gene_name2 $overlap\n";
                     &trace("locus overlap: $aux_gene_name $aux_gene_name2 $overlap",4);
#         &press_any_key;
                 } 
                 if (($overlap==0) and 
                    ($gene_counter_in_contig ==$gene_counter_in_contig2))
                    {if ($#CDS_blocks2>=0 ){
                        $CDS_non_overlap_event_c++;
                        if ($gene_counter_in_contig2 !=$old_gene_num3) {
                           $CDS_non_overlap_c++;
                           $old_gene_num3=$gene_counter_in_contig2;
                        }
#         print "$c_start _ $c_end vs $c_start2 _ $c_end2 ";
#         print "$c_CDS vs $c_CDS2 ";
                     print "nonoverlapped CDS in $aux_gene_name2\n";
                     &trace("nonoverlapped CDS in $aux_gene_name2",4);
                     } # end if ($#CDS_blocks2>=0 )
                    } # end if overlap==0
                } #end end if c_err==0 
#            ++$cds_counter;
#        $CDS_c_str=$OK_gene_counter; #previous $gene_counter;
           } #end foreach $CDS_block2
        } #end if ($#CDS_blocks2>=0) 
    } #end foreach $gene_block2
    if ($overlap_flg==1) {$locus_overlapped_c++} #a19
} #end sub



sub get_overlap #(c_start,$c_end,$c_start2,$c_end2); ret overlap_len
    {my $c_ovarlap=0;
     my $auxl=0; $auxr=0;
     $c_start=$_[0];$c_end=$_[1];$c_start2=$_[2];$c_end2=$_[3];
     $auxl=&max($c_start,$c_start2); $auxr=&min($c_end,$c_end2);
     if ($auxl<=$auxr) {$c_ovarlap=$auxr-$auxl+1}
     return $c_ovarlap;
    }

sub get_overlap_exon #(rec1, rec2) ; ret 1 if overlap
    { my $a1; my $a2;my $err; my $b1; my $b2;
      my $c_overlap=0;
      ($a1,$a2,$err)= &get_feature_start_end2($_[0]); 
      ($b1,$b2,$err)= &get_feature_start_end2($_[1]); 
       $c_overlap=get_overlap($a1,$a2,$b1,$b2);
       if ($c_overlap>0) {$c_overlap=1};
       return $c_overlap;
    }

sub get_joint_exon #(rec1, rec2) ; ret 1 if overlap, joint exon rec
    { my $a1; my $a2;my $err; my $b1; my $b2;
      my $c_overlap=0;
      my $auxl=0; $auxr=0;
      my $joint_rec='';
      ($a1,$a2,$err)= &get_feature_start_end2($_[0]); 
      ($b1,$b2,$err)= &get_feature_start_end2($_[1]); 
       $c_overlap=get_overlap($a1,$a2,$b1,$b2);
       if ($c_overlap>0) {$c_overlap=1;
#       print "$a1,$a2,$b1,$b2\n";
       $auxl=&min($a1,$b1); $auxr=&max($a2,$b2);
       $joint_rec=$auxl."..".$auxr;
#       print "$joint_rec\n";
       } #end if
       return $c_overlap,$joint_rec;
    }

sub get_5or3_exon #(proj, member_of_class) ; ret 1 if 5', 2 if 3'
    { my $a1; my $a2;my $err; my $b1; my $b2;
      my $c_overlap=0;
      ($a1,$a2,$err)= &get_feature_start_end2($_[0]); 
      ($b1,$b2,$err)= &get_feature_start_end2($_[1]); 
       if ($a1==$b1) {$c_overlap=1};
       if ($a2==$b2) {$c_overlap=2};
       return $c_overlap;
    }

sub num_diff_items #(@c_arr); ret number_of_different_elements
    {my @sorted=sort(@_);
     my $num_c=0; 
     my $aux_flg=0;
     my $c_item='';
     if ($#sorted<1) {$num_c=$#sorted+1}
        else { $c_item=$sorted[0];$num_c=1; 
          foreach (@sorted) {if ($c_item ne $_) {$num_c++;$c_item=$_}
                   
          } #end foreach item
        } #end else 
     return $num_c;
    }

sub test_overlap
    {print &get_overlap(1,3,2,4)," 2 \n";
    }

sub test_joint_rec
    {&get_joint_exon('1..3','2..3'),"   \n";
    }

sub test_diff_num
    {print &num_diff_items('1..3','1..3','1..3','1..3','1..3','2..3','2..3','uuu'),"   \n";
    }

sub min #(va1,val2) ret min
    { my $min;
         if ($_[0]<$_[1]) {$min=$_[0]} else {$min=$_[1]}
      return $min;
    }

sub max #(va1,val2) ret max
    { my $max;
         if ($_[0]<$_[1]) {$max=$_[1]} else {$max=$_[0]}
      return $max;
    }

sub check_CDS_gene_names #(c_locus_flg)
    {
    my $c_locus_flg=$_[0];
    my $mRNA_rec_end="_mRNA_REC_END_\n";
    my $c_CDS_gene_name='ZZZ';
    $CDS_block=~m/$mRNA_rec_end/;
    my $aux_CDS_rec=$';

    my $CDS_name_op_res=($aux_CDS_rec=~/\/(gene|locus_tag)=\"([\040-\176]+)\"/);
    if ($CDS_name_op_res) {$c_CDS_gene_name=$2} 
#EMBL_GB
            else {$CDS_name_op_res=($aux_CDS_rec=~/\/(gene|locus_tag)=\s*([\041-\176]+)/);
                  if ($CDS_name_op_res) {$c_CDS_gene_name=$2}
                     else {$c_CDS_gene_name='ZZZ'}
                 }      
        if ($aux_gene_name ne $c_CDS_gene_name) 
           {$CDS_gene_name_mismatch_c++;
            $in_gene_CDS_gene_name_mismatch_c++;
            &trace("CDS gene name mismatch in $aux_gene_name",4);
            if ($c_locus_flg==0) {
               $CDS_gene_name_mismatch_locus_c++;
               $c_locus_flg=1;
               }
           }
   return $c_locus_flg;
   }

sub check_mRNA_block_CDS_block_rel #($CDS_block)
    {
#SPL new subroutine to mRNA_block for relation to specific CDS 
    my $CDS_block=$_[0];
    my $cds_flg=0;       #SPL 1 = cds block available
    my $aux_CDS_block='';
    my $err_code=0;
    my $mRNA_rec='';
    my $match_found_flg=0;
    my $c_match_found_flg=0;
    my $longest_mRNA=0;
    my $longest_mRNA_ind=0; 
    my $mRNA_st=0;
    my $mRNA_en=0;
    my $mRNA_match_c=0;
    my $c_pseudo_flg=0;
    #...
    # purge CDS feature
#    print "CDS_block>>>\n";
#    print "$CDS_block";
#    print "<<<CDS_block\n";
#    &press_any_key;
    if ($CDS_block =~ /                     \/pseudo/) 
       {$CDS_pseudo_c++; $c_pseudo_flg=1}; #s15
    if ($c_pseudo_flg==1) {$pseudo_flg=1}; #s15 
    ($prot_id) = ($CDS_block =~ /                     \/protein_id=\"(.*)\"/); 
    if (defined($prot_id)) {} else 
       {&trace("no protein_id in check phase", 4);
        $no_protein_id_check_phase++;
       } 
    $aux_CDS_block=&purge_feature_record($CDS_block);
    $aux_CDS_block =~s/[\s\n]//g; 
    $aux_CDS_block=&expand_rec_1nuc($aux_CDS_block);
#    print "aux_CDS_block>>>\n";
#    print "$aux_CDS_block";
#    print "<<<aux_CDS_block\n";
#    &press_any_key;
    my $CDS_exon_num=($aux_CDS_block =~s/(\d+\.\.\d+)/$1/g); 
    my $corr_CDS_exon_num=($aux_CDS_block =~s/,/,/g); 
    my $corr_CDS_exon_num2=($aux_CDS_block =~s/(\d+\.\.[<>]?\d+)/$1/g); 
    $corr_CDS_exon_num++;
    if ($corr_CDS_exon_num!=$CDS_exon_num) {
       $CDS_exon_num_err_c++;
       if (($corr_CDS_exon_num==2) && ($CDS_exon_num==1)) {
          $CDS_exon_num_equ1_err_c++;
          if (defined($prot_id)) {$CDS_exon_num_equ21_err_defpid_c++;}
             else {$CDS_exon_num_equ21_err_nopid_c++;}
       }
       if (defined($prot_id)) {$CDS_exon_num_err_defpid_c++;}
           else {$CDS_exon_num_err_nopid_c++;}
    }
    if ($corr_CDS_exon_num!=$corr_CDS_exon_num2) {
       $aux_CDS_exon_num_diff_c++;
    }
    if ($CDS_exon_num==0) {$CDS_exon_num_equ0_c++}
#    print '$CDS_exon_num=', $CDS_exon_num,' ',$corr_CDS_exon_num,
#          ' ',$corr_CDS_exon_num2, "\n";
#    &press_any_key;
    if ($CDS_exon_num==1) {$intronless_c++}
    if ($corr_CDS_exon_num==1) {$corr_intronless_c++;
       if (defined($prot_id)) {$corr_intronless_defpid_c++}
    }
#    print "corr_CDS_exon_num=$corr_CDS_exon_num\n";
    if (($corr_CDS_exon_num==1) && ($c_pseudo_flg==1))
       {$corr_intronless_CDS_pseudo_c++;
        if (defined($prot_id)) {$corr_intronless_CDS_pseudo_defpid_c++;}
       }
    if (($corr_CDS_exon_num>1) && ($c_pseudo_flg==1))
       {$corr_spliced_CDS_pseudo_c++;
        if (defined($prot_id)) {$corr_spliced_CDS_pseudo_defpid_c++;}
       }
    if ($corr_CDS_exon_num==1) { #old :$CDS_exon_num
       $aux_CDS_block =~/(\d+)\.\.[<>]?(\d+)/;
       my $CDS_exon_st=$1;
       my $CDS_exon_en=$2;
       $i=0; $mRNA_match_c=0; $spliced_mRNA_avail_flg=0;
       foreach $mRNA_rec (@mRNA_stack) {
         $i++;
         $c_match_found_flg=0;
         $mRNA_rec =~s/[\s\n]//g; 
         $mRNA_rec=&expand_rec_1nuc($mRNA_rec);
         while (my $c_res=($mRNA_rec=~/(\d+)\.\.[<>]?(\d+)/g))
           {if (($1<=$CDS_exon_st) && ($2>=$CDS_exon_en))
               {$c_match_found_flg=1;$mRNA_match_c++;last} 
           }
#         print "$mRNA_rec $c_match_found_flg $CDS_exon_st $CDS_exon_en\n"; &press_any_key;
         if ($c_match_found_flg==1) {$match_found_flg=1}
         if ($c_match_found_flg==1) 
            {($mRNA_st,$mRNA_en,$err_code)=&get_feature_start_end2($mRNA_rec);
             if ($longest_mRNA<($mRNA_en-$mRNA_st+1)) 
                {$longest_mRNA=$mRNA_en-$mRNA_st+1;
                 $longest_mRNA_ind=$i;}
            }
         if ($c_match_found_flg==1) {                      #02.12.04
             my $mRNA_exon_num=($mRNA_rec=~s/,/,/g);            #26.12.04
#         print "mRNA_exon_num=$mRNA_exon_num\n";
             if ($mRNA_exon_num>=1) {$alt_mRNA_while_CDS_intronless_c++;
                $spliced_mRNA_avail_flg=1;
             } #02.12.04
         }                                                               #02.12.04
       } #end for each
       if ($spliced_mRNA_avail_flg==1) {$spliced_mRNA_while_CDS_intronless_c1++;
           if (defined($prot_id)) {$spliced_mRNA_while_CDS_intronless_defpid_c1++}
       }
    } elsif ($corr_CDS_exon_num>=1)
    {
     my $CDS_st;
     my $CDS_end;
    ($CDS_st,$CDS_end,$err_code)=&get_feature_start_end2($aux_CDS_block);
    $aux_CDS_block =~/\d+\.\./; $aux_CDS_block='..'.$';
#    print "aux_CDS_block>>>\n";
#    print "$aux_CDS_block";
#    print "<<<aux_CDS_block\n";
#    &press_any_key;
    $aux_CDS_block =~s/\.\.[<>]?\d+\)*$/\.\./; 
#    print "aux_CDS_block>>>\n"; #s13
#    print "$aux_CDS_block";     #s13
#    print "<<<aux_CDS_block\n"; #s13
#    &press_any_key;             #s13
#     print 'new CDS $#mRNA_stack=',$#mRNA_stack,"\n";
    $i=0; $mRNA_match_c=0; @stack_OK1_mRNA=();@stack_OK2_mRNA=();
    foreach $mRNA_rec (@mRNA_stack) {
      $i++;
      $mRNA_rec =~s/[\s\n]//g; 
#print '$mRNA_rec=',$mRNA_rec,$eol_str;&press_any_key; #s13
      if (index($mRNA_rec,$aux_CDS_block) >=0) {
         push (@stack_OK1_mRNA,$i);
         $mRNA_match_c++;
         ($mRNA_st,$mRNA_en,$err_code)=&get_feature_start_end2($mRNA_rec);
         if ($longest_mRNA<($mRNA_en-$mRNA_st+1)) 
             {$longest_mRNA=$mRNA_en-$mRNA_st+1;
              $longest_mRNA_ind=$i;}
       }#end if index
    } #end foreach
#print '$mRNA_match_c=',$mRNA_match_c,'curr $err_code=',$err_code,$eol_str;&press_any_key; #s13
    if ($mRNA_match_c>1) {# scan all OK1 mRNA
       $mRNA_match_c2=0;
       foreach $i (@stack_OK1_mRNA) {
         $mRNA_rec=$mRNA_stack[$i-1];
         $mRNA_rec =~s/[\s\n]//g; 
         $mRNA_rec=&expand_rec_1nuc($mRNA_rec);
         my $left_end_OK=0;
         my $right_end_OK=0;
         while (my $c_res=($mRNA_rec=~/(\d+)\.\.[<>]?(\d+)/g))
           {if (($1<=$CDS_st) && ($2>=$CDS_st))   {$left_end_OK=1;} 
            if (($1<=$CDS_end) && ($2>=$CDS_end)) {$right_end_OK=1;} 
         } #end while exon
         if (($left_end_OK==1) && ($right_end_OK==1)) 
            {$mRNA_match_c2++;push (@stack_OK2_mRNA,$i);}
       } #end stack
#    print "$mRNA_match_c  $mRNA_match_c2\n";
#    &press_any_key;
      if ($mRNA_match_c > $mRNA_match_c2) {
          if ($mRNA_match_c2>=1) {$total_improvements_in_second_round++;
             &trace("second round improvement in $aux_gene_name",4);
             print "second round improvement in $aux_gene_name\n";
          }
          if ($mRNA_match_c2==1) {$final_improvements_in_second_round++;}
          if ($mRNA_match_c2==0) {$number_fails_in_second_round++;
             &trace("second round failed in $aux_gene_name",0);
             print "second round failed in $aux_gene_name\n";
          }
      }
      if (($mRNA_match_c > $mRNA_match_c2) && ($mRNA_match_c2>1))
         {# re-search longest mRNA
         $longest_mRNA=0;
         $longest_mRNA_ind=0; 
         $i=0; $mRNA_match_c=0; 
         foreach $i (@stack_OK2_mRNA) {
           $mRNA_rec=$mRNA_stack[$i-1];
           $mRNA_rec =~s/[\s\n]//g; 
           $mRNA_rec=&expand_rec_1nuc($mRNA_rec);
           ($mRNA_st,$mRNA_en,$err_code)=&get_feature_start_end2($mRNA_rec);
           if ($longest_mRNA<($mRNA_en-$mRNA_st+1)) 
              {$longest_mRNA=$mRNA_en-$mRNA_st+1;
              $longest_mRNA_ind=$i;}
#         print ("CDS=$c_CDS_num mRNA= $i longest=$longest_mRNA ind=$longest_mRNA_ind");  
#         &press_any_key;
         } #end foreach
         $mRNA_match_c=$mRNA_match_c2; # to adjust to return
      } #end if less new matches
      if ($mRNA_match_c2==1) 
         {$longest_mRNA_ind=$stack_OK2_mRNA[0];
#          print "    ind=$longest_mRNA_ind\n";
          $mRNA_match_c=1;
         }
      if ($mRNA_match_c2>1)
         {# see about all mRNA equal
         my $all_mRNA_equ_flg=1;
         my $start_i=$stack_OK2_mRNA[0];
         my $start_mRNA_rec=$mRNA_stack[$start_i-1];
           $start_mRNA_rec =~s/[\s\n]//g; 
           $start_mRNA_rec=&expand_rec_1nuc($start_mRNA_rec);
#           print "$start_mRNA_rec\n";
         foreach $i (@stack_OK2_mRNA) {
#           print "start_i=$start_i   i=$i\n";
           $mRNA_rec=$mRNA_stack[$i-1];
           $mRNA_rec =~s/[\s\n]//g; 
           $mRNA_rec=&expand_rec_1nuc($mRNA_rec);
#           print "$mRNA_rec\n";
           if ($start_mRNA_rec ne $mRNA_rec) {$all_mRNA_equ_flg=0;}
#           $start_i=$i; #extra if all equ
#         print ("CDS=$c_CDS_num mRNA= $i longest=$longest_mRNA ind=$longest_mRNA_ind");  
#         &press_any_key;
         } #end foreach
#         &press_any_key;
         if ($all_mRNA_equ_flg==1) {&trace("third round: mRNA equivalent",4);
             print "    third round: mRNA equivalent\n";
             $mRNA_match_c=1; # to adjust to return
             $all_mRNA_equ_c++;
         } #end if all mRNA equ
      } #end if two or more matches
    } # end if ($mRNA_match_c>1) # scan all OK1 mRNA
    } else {$err_code=1;$aux_c4++;
      };#!!!
#print 'internal $err_code=',$err_code,$eol_str;&press_any_key;
   if ($longest_mRNA_ind==0) {
      &trace('mRNA assign event:',4);
      &trace('$corr_CDS_exon_num='. $corr_CDS_exon_num,4);
      &trace('CDS_block>>>',4);
      &trace($CDS_block,4);
      &trace('<<<CDS_block',4);
#      &trace('aux_CDS_block>>>',4);
#      &trace($aux_CDS_block,4);
#      &trace('<<<aux_CDS_block',4);
      &trace('long_mRNA_stack>>>',4);
      &trace("@long_mRNA_stack",4);
      &trace('<<<long_mRNA_stack',4);
   }
   if ($err_code==1) {
      &trace('--- mRNA assign event err_code=1:',4);
      &trace('$corr_CDS_exon_num='. $corr_CDS_exon_num,4);
      &trace('CDS_block>>>',4);
      &trace($CDS_block,4);
      &trace('<<<CDS_block',4);
#      &trace('aux_CDS_block>>>',4);
#      &trace($aux_CDS_block,4);
#      &trace('<<<aux_CDS_block',4);
   }
   return $longest_mRNA_ind,$corr_CDS_exon_num,$mRNA_match_c,$err_code;
}

sub check_mRNA_block_CDS_block_rel_gb149 #($CDS_block)
    {
#SPL new subroutine to mRNA_block for relation to specific CDS 
#specific to GB149 issue
    my $CDS_block=$_[0];
    my $cds_flg=0;       #SPL 1 = cds block available
    my $aux_CDS_block='';
    my $err_code=0;
    my $mRNA_rec='';
    my $match_found_flg=0;
    my $c_match_found_flg=0;
    my $longest_mRNA=0;
    my $longest_mRNA_ind=0; 
    my $mRNA_st=0;
    my $mRNA_en=0;
    my $mRNA_len=0;
    my $mRNA_match_c=0;
    my $c_pseudo_flg=0;
    my $mRNA_len2=0;
    #...
    # purge CDS feature
#    print "CDS_block>>>\n";
#    print "$CDS_block";
#    print "<<<CDS_block\n";
#    &press_any_key;
    $c_pseudo_flg=0;
    if ($CDS_block =~ /                     \/pseudo/) 
       {$CDS_pseudo_c++; $c_pseudo_flg=1}; #s13
    ($prot_id) = ($CDS_block =~ /                     \/protein_id=\"(.*)\"/); 
    if (defined($prot_id)) {} else 
       {&trace("no protein_id in check phase", 4);
        $no_protein_id_check_phase++;
       } 
    $aux_CDS_block=&purge_feature_record($CDS_block);
    $aux_CDS_block =~s/[\s\n]//g; 
    $aux_CDS_block=&expand_rec_1nuc2($aux_CDS_block);
    $aux_CDS_block=&rec_to_exon_order($aux_CDS_block);
#    print "aux_CDS_block>>>\n"; 
#    print "$aux_CDS_block";
#    print "<<<aux_CDS_block\n";
#    &press_any_key;
    $aux_CDS_block_store=$aux_CDS_block; #nov18
    my $CDS_exon_num=($aux_CDS_block =~s/(\d+\.\.\d+)/$1/g); 
    my $corr_CDS_exon_num=($aux_CDS_block =~s/,/,/g); 
    my $corr_CDS_exon_num2=($aux_CDS_block =~s/(\d+\.\.[<>]?\d+)/$1/g); 
    $corr_CDS_exon_num++;
    if ($corr_CDS_exon_num!=$CDS_exon_num) {
       $CDS_exon_num_err_c++;
       if (($corr_CDS_exon_num==2) && ($CDS_exon_num==1)) {
          $CDS_exon_num_equ1_err_c++;
          if (defined($prot_id)) {$CDS_exon_num_equ21_err_defpid_c++;}
             else {$CDS_exon_num_equ21_err_nopid_c++;}
       }
       if (defined($prot_id)) {$CDS_exon_num_err_defpid_c++;}
           else {$CDS_exon_num_err_nopid_c++;}
    }
    if ($corr_CDS_exon_num!=$corr_CDS_exon_num2) {
       $aux_CDS_exon_num_diff_c++;
    }
    if ($CDS_exon_num==0) {$CDS_exon_num_equ0_c++}
#    print '$CDS_exon_num=', $CDS_exon_num,' ',$corr_CDS_exon_num,
#          ' ',$corr_CDS_exon_num2, "\n";
#    &press_any_key;
    if ($CDS_exon_num==1) {$intronless_c++}
    if ($corr_CDS_exon_num==1) {$corr_intronless_c++;
       if (defined($prot_id)) {$corr_intronless_defpid_c++}
    }
#    print "corr_CDS_exon_num=$corr_CDS_exon_num\n";
    if (($corr_CDS_exon_num==1) && ($c_pseudo_flg==1))
       {$corr_intronless_CDS_pseudo_c++;
        if (defined($prot_id)) {$corr_intronless_CDS_pseudo_defpid_c++;}
       }
    if (($corr_CDS_exon_num>1) && ($c_pseudo_flg==1))
       {$corr_spliced_CDS_pseudo_c++;
        if (defined($prot_id)) {$corr_spliced_CDS_pseudo_defpid_c++;}
       }
    if ($corr_CDS_exon_num==1) { #old :$CDS_exon_num
       $aux_CDS_block =~/(\d+)\.\.[<>]?(\d+)/;
       my $CDS_exon_st=$1;
       my $CDS_exon_en=$2;
       $i=0; $mRNA_match_c=0; $spliced_mRNA_avail_flg=0;
       foreach $mRNA_rec (@mRNA_stack) {
         $i++;
         if ($mRNA_stack_errs[$i-1] ==0) {
         $c_match_found_flg=0;
         $mRNA_rec =~s/[\s\n]//g; 
         $mRNA_rec=&expand_rec_1nuc2($mRNA_rec);
         $mRNA_rec=&rec_to_exon_order($mRNA_rec);
         while (my $c_res=($mRNA_rec=~/(\d+)\.\.[<>]?(\d+)/g))
           {if (($1<=$CDS_exon_st) && ($2>=$CDS_exon_en))
               {$c_match_found_flg=1;$mRNA_match_c++;last} 
           }
#         print "$mRNA_rec $c_match_found_flg $CDS_exon_st $CDS_exon_en\n"; &press_any_key;
         if ($c_match_found_flg==1) {$match_found_flg=1}
         if ($c_match_found_flg==1) 
            {($mRNA_len,$err_code)=&get_feature_start_end3($mRNA_rec);
             if ($modeCDS eq 'genome') {
                 #getting pre-mRNA length (ver 1.36)
                ($mRNA_st,$mRNA_en,$err_code)=&get_feature_start_end2($mRNA_rec);
                 $err_code=0; # dummy clear err_code;
                 if ($mRNA_en>$mRNA_st) {$mRNA_len2=$mRNA_en-$mRNA_st+1}
                    else {$mRNA_len2=$mRNA_st-$mRNA_en+1};
#                 &trace("mRNA_len2=$mRNA_len2",0); #deb 1.36
#                 print "mRNA_len2=$mRNA_len2",$eol_str;
                 $mRNA_len=$mRNA_len2;
             }

             if ($longest_mRNA<$mRNA_len) 
                {$longest_mRNA=$mRNA_len;
                 $longest_mRNA_ind=$i;}
            }
         if ($c_match_found_flg==1) {                      #02.12.04
             my $mRNA_exon_num=($mRNA_rec=~s/,/,/g);            #26.12.04
#         print "mRNA_exon_num=$mRNA_exon_num\n";
             if ($mRNA_exon_num>=1) {$alt_mRNA_while_CDS_intronless_c++;
                $spliced_mRNA_avail_flg=1;
             } #02.12.04
         }                                                               #02.12.04
         }#end if OK mRNA_stack_err
       } #end for each
       if ($spliced_mRNA_avail_flg==1) {$spliced_mRNA_while_CDS_intronless_c1++;
           if (defined($prot_id)) {$spliced_mRNA_while_CDS_intronless_defpid_c1++}
       }
    } elsif ($corr_CDS_exon_num>=1)     
    {
     my $CDS_st;
     my $CDS_end;
     my $CDS_len;
#    print "aux_CDS_block>>>\n";
#    print "$aux_CDS_block";
#    print "<<<aux_CDS_block\n";
#    &press_any_key;
    ($CDS_st,$CDS_end,$err_code)=&get_feature_start_end2($aux_CDS_block);
    ($CDS_len,$err_code)=&get_feature_start_end3($aux_CDS_block);
    $aux_CDS_block =~/\d+\.\./; $aux_CDS_block='..'.$';
#    print "aux_CDS_block>>>\n";
#    print "$aux_CDS_block";
#    print "<<<aux_CDS_block\n";
#    &press_any_key;

#nov8 out old expression    $aux_CDS_block =~s/\.\.[<>]?\d+\)*$/\.\./; 
    my $last_ddot_pos=rindex($aux_CDS_block,'..');
    if ($last_ddot_pos) 
       {$aux_CDS_block=substr($aux_CDS_block,0,$last_ddot_pos+2)} 

#    print "aux_CDS_block>>>\n"; #s13
#    print "$aux_CDS_block";     #s13
#    print "<<<aux_CDS_block\n"; #s13
#    &press_any_key;             #s13
#     print 'new CDS $#mRNA_stack=',$#mRNA_stack,"\n";
    $i=0; $mRNA_match_c=0; @stack_OK1_mRNA=();@stack_OK2_mRNA=();
    foreach $mRNA_rec (@mRNA_stack) {
#    print "mRNA_rec>>>\n";
#    print "$mRNA_rec";
#    print "<<<mRNA_rec\n";
#    &press_any_key;
      $i++;
      if ($mRNA_stack_errs[$i-1] ==0) {
      $mRNA_rec =~s/[\s\n]//g; 
#print '$mRNA_rec=',$mRNA_rec,$eol_str;&press_any_key; #s13
      $mRNA_rec=&expand_rec_1nuc2($mRNA_rec);
#    print "mRNA_rec>>>\n";
#    print "$mRNA_rec";
#    print "<<<mRNA_rec\n";
#    &press_any_key;
      $mRNA_rec=&rec_to_exon_order($mRNA_rec);
      $mRNA_rec=~s/[<>]//g; # deleting <> signs
#    print "mRNA_rec>>>\n";
#    print "$mRNA_rec";
#    print "<<<mRNA_rec\n";
#    &press_any_key;
     my $CDS_block_ind=index($mRNA_rec,$aux_CDS_block);
     if ($CDS_block_ind >=0) {
         # here check first and last CDS exon
     my @aux_exons=split(',',$aux_CDS_block_store);
     my ($CDS_exon1_len,$err_code1);
     my ($CDS_exon2_len,$err_code2);
        ($CDS_exon1_len,$err_code1)=&get_feature_start_end3($aux_exons[0]);
        ($CDS_exon2_len,$err_code2)=&get_feature_start_end3($aux_exons[$#aux_exons]);
     my @aux_mRNA_exons=split(',',$mRNA_rec);
     my $mRNA_exon_ind1=0;
     my $aux_mRNA='';
     while (length($aux_mRNA)<$CDS_block_ind) {
        $aux_mRNA=$aux_mRNA.$aux_mRNA_exons[$mRNA_exon_ind1++].','
     } # end while no in first CDS exon
     $mRNA_exon_ind1--;
     my $mRNA_exon_ind2=1;
     my $aux_mRNA_rec=$aux_mRNA_exons[0];
     while (length($aux_mRNA_rec)<$CDS_block_ind+length($aux_CDS_block)) { #???
        $aux_mRNA_rec=$aux_mRNA_rec.','.$aux_mRNA_exons[$mRNA_exon_ind2++];
     } # end while no in last CDS exon
     $mRNA_exon_ind2--;
     my ($mRNA_exon1_len,$err_code3);
     my ($mRNA_exon2_len,$err_code4);
        ($mRNA_exon1_len,$err_code3)=
             &get_feature_start_end3($aux_mRNA_exons[$mRNA_exon_ind1]);
        ($mRNA_exon2_len,$err_code4)=
             &get_feature_start_end3($aux_mRNA_exons[$mRNA_exon_ind2]);
       if (($err_code1==0)&&($err_code2==0)&&($err_code3==0)&&($err_code4==0)&&
         ($CDS_exon1_len<=$mRNA_exon1_len)&&($CDS_exon2_len<=$mRNA_exon2_len))
         {
#    print "OK mRNA found\n"; #o28
#    &press_any_key;          #o28

         push (@stack_OK1_mRNA,$i);
         $mRNA_match_c++;
         ($mRNA_len,$err_code)=&get_feature_start_end3($mRNA_rec);
#print 'get3 $err_code=',$err_code,$eol_str;&press_any_key; #o29
             if ($modeCDS eq 'genome') {
                 #getting pre-mRNA length (ver 1.36)
                ($mRNA_st,$mRNA_en,$err_code)=&get_feature_start_end2($mRNA_rec);
                 $err_code=0; # dummy clear err
                 if ($mRNA_en>$mRNA_st) {$mRNA_len2=$mRNA_en-$mRNA_st+1}
                    else {$mRNA_len2=$mRNA_st-$mRNA_en+1};
#                 &trace("mRNA_len2=$mRNA_len2",0); #deb 1.36
#                 print "mRNA_len2=$mRNA_len2",$eol_str;
                 $mRNA_len=$mRNA_len2;
             }
         if ($longest_mRNA<$mRNA_len) 
             {$longest_mRNA=$mRNA_len;
              $longest_mRNA_ind=$i;}
       } #end if OK mRNA
       }#end if index
       } #end if OK mRNA_stack_errs
    } #end foreach
#print '$mRNA_match_c=',$mRNA_match_c,'curr $err_code=',$err_code,$eol_str;&press_any_key; #s13
# temporary out further check
#    if ($mRNA_match_c>1) {# scan all OK1 mRNA
#       $mRNA_match_c2=0;
#       foreach $i (@stack_OK1_mRNA) {
#         $mRNA_rec=$mRNA_stack[$i-1];
#         $mRNA_rec =~s/[\s\n]//g; 
#         $mRNA_rec=&expand_rec_1nuc($mRNA_rec);
#         $mRNA_rec=&rec_to_exon_order($mRNA_rec);
#    print "mRNA_rec->>>\n";
#    print "$mRNA_rec";
#    print "<<<-mRNA_rec\n";
#    &press_any_key;
#    print "aux_CDS_block->>>\n"; #s13
#    print "$aux_CDS_block";     #s13
#    print "<<<-aux_CDS_block\n"; #s13
#    &press_any_key;             #s13
#         my $left_end_OK=0;
#         my $right_end_OK=0;
##old         while (my $c_res=($mRNA_rec=~/(\d+)\.\.[<>]?(\d+)/g))
##old           {if (($1<=$CDS_st) && ($2>=$CDS_st))   {$left_end_OK=1;} 
##old            if (($1<=$CDS_end) && ($2>=$CDS_end)) {$right_end_OK=1;}  #o27 ???
##old         } #end while exon
##new sub for left and right end
#         my @aux_CDS_items2=split ",",$aux_CDS_block;
#         pop(@aux_CDS_items2); my $aux_CDS_block2=join ",",@aux_CDS_items2;
#         $aux_CDS_block2.=",";
#         $mRNA_rec=~/$aux_CDS_block2/;
#                   #it is impossible to use argument with single '(' within it
#         my $mRNA_left_aux=$`;
#         my $mRNA_right_aux=$';
#    print "mRNA_right_aux>>>\n";
#    print "$mRNA_right_aux";
#    print "<<<mRNA_right_aux\n";
#    &press_any_key;
#         my $first_cod_exon=0;
#         my $last_cod_exon=0;
#         if ($c_res=($mRNA_left_aux=~/complement\(\w+\:(\d+)$/))  #??? first compl
#            {$first_cod_exon=$1; if ($1>=$CDS_st) {$left_end_OK=1}} 
#            else {$mRNA_left_aux=~/(\d+)$/;
#                 $first_cod_exon=$1; if ($1<=$CDS_st) {$left_end_OK=1;}}
#         my @aux_CDS_items=split ",",$aux_CDS_block;
#         my $last_CDS_item=$aux_CDS_items[-1];
#         if ($c_res=($mRNA_right_aux=~/complement\(\w+\:(\d+)\.\.$/))
#            {$mRNA_right_aux=~/^(\d+)/;
#             $last_cod_exon=$1; if ($1<=$CDS_end) {$right_end_OK=1}} 
#            else {$mRNA_right_aux=~/^(\d+)/;
#                 $last_cod_exon=$1; if ($1>=$CDS_end) {$right_end_OK=1}}
#
#         if (($left_end_OK==1) && ($right_end_OK==1)) 
#            {$mRNA_match_c2++;push (@stack_OK2_mRNA,$i);}
#       } #end stack
##    print "$mRNA_match_c  $mRNA_match_c2\n";
##    &press_any_key;
#      if ($mRNA_match_c > $mRNA_match_c2) {
#          if ($mRNA_match_c2>=1) {$total_improvements_in_second_round++;
#             &trace("second round improvement in $aux_gene_name",4);
#             print "second round improvement in $aux_gene_name\n";
#          }
#          if ($mRNA_match_c2==1) {$final_improvements_in_second_round++;}
#          if ($mRNA_match_c2==0) {$number_fails_in_second_round++;
#             &trace("second round failed in $aux_gene_name",0);
#             print "second round failed in $aux_gene_name\n";
#          }
#      }
#      if (($mRNA_match_c > $mRNA_match_c2) && ($mRNA_match_c2>1))
#         {# re-search longest mRNA
#         $longest_mRNA=0;
#         $longest_mRNA_ind=0; 
#         $i=0; $mRNA_match_c=0; 
#         foreach $i (@stack_OK2_mRNA) {
#           $mRNA_rec=$mRNA_stack[$i-1];
#           $mRNA_rec =~s/[\s\n]//g; 
#           $mRNA_rec=&expand_rec_1nuc($mRNA_rec);
#           $mRNA_rec=&rec_to_exon_order($mRNA_rec);
#           ($mRNA_len,$err_code)=&get_feature_start_end3($mRNA_rec);
#           if ($longest_mRNA<$mRNA_len) 
#              {$longest_mRNA=$mRNA_len;
#              $longest_mRNA_ind=$i;}
##         print ("CDS=$c_CDS_num mRNA= $i longest=$longest_mRNA ind=$longest_mRNA_ind");  
##         &press_any_key;
#         } #end foreach
#         $mRNA_match_c=$mRNA_match_c2; # to adjust to return
#      } #end if less new matches
#      if ($mRNA_match_c2==1) 
#         {$longest_mRNA_ind=$stack_OK2_mRNA[0];
#          print "    ind=$longest_mRNA_ind\n";
#          $mRNA_match_c=1;
#         }
#      if ($mRNA_match_c2>1)
#         {# see about all mRNA equal
#         my $all_mRNA_equ_flg=1;
#         my $start_i=$stack_OK2_mRNA[0];
#         my $start_mRNA_rec=$mRNA_stack[$start_i-1];
#           $start_mRNA_rec =~s/[\s\n]//g; 
#           $start_mRNA_rec=&expand_rec_1nuc($start_mRNA_rec);
##           print "$start_mRNA_rec\n";
#         foreach $i (@stack_OK2_mRNA) {
##           print "start_i=$start_i   i=$i\n";
#           $mRNA_rec=$mRNA_stack[$i-1];
#           $mRNA_rec =~s/[\s\n]//g; 
#           $mRNA_rec=&expand_rec_1nuc($mRNA_rec);
#           $mRNA_rec=&rec_to_exon_order($mRNA_rec);
##           print "$mRNA_rec\n";
#           if ($start_mRNA_rec ne $mRNA_rec) {$all_mRNA_equ_flg=0;}
##           $start_i=$i; #extra if all equ
##         print ("CDS=$c_CDS_num mRNA= $i longest=$longest_mRNA ind=$longest_mRNA_ind");  
##         &press_any_key;
#         } #end foreach
##         &press_any_key;
#         if ($all_mRNA_equ_flg==1) {&trace("third round: mRNA equivalent",4);
#             print "    third round: mRNA equivalent\n";
#             $mRNA_match_c=1; # to adjust to return
#             $all_mRNA_equ_c++;
#         } #end if all mRNA equ
#      } #end if two or more matches
#    } # end if ($mRNA_match_c>1) # scan all OK1 mRNA
    } else {$err_code=1;$aux_c4++;
      };#!!!
##print 'internal $err_code=',$err_code,$eol_str;&press_any_key;
   if ($longest_mRNA_ind==0) {
      &trace('mRNA assign event:',4);
      &trace('$corr_CDS_exon_num='. $corr_CDS_exon_num,4);
      my $out_CDS_block='';
      if ($CDS_block=~/   \/translation/) {$out_CDS_block=$`} 
                                     else {$out_CDS_block=$CDS_block}
      if ($out_CDS_block=~/   \/db_xref/) {$out_CDS_block=$`} 
      &trace('CDS_block>>>',4);
      &trace($out_CDS_block,0);
      &trace('<<<CDS_block',4);
#      &trace('aux_CDS_block>>>',4);
#      &trace($aux_CDS_block,4);
#      &trace('<<<aux_CDS_block',4);
      &trace('long_mRNA_stack>>>',4);
      &trace("@long_mRNA_stack",0);
      &trace('<<<long_mRNA_stack',4);
   }
   if ($err_code==1) {
      &trace('--- mRNA assign event err_code=1:',4);
      &trace('$corr_CDS_exon_num='. $corr_CDS_exon_num,4);
      &trace('CDS_block>>>',4);
      &trace($CDS_block,0);
      &trace('<<<CDS_block',4);
#      &trace('aux_CDS_block>>>',4);
#      &trace($aux_CDS_block,4);
#      &trace('<<<aux_CDS_block',4);
   }
   return $longest_mRNA_ind,$corr_CDS_exon_num,$mRNA_match_c,$err_code;
}

sub check_CDS_block #($CDS_block)
#SPL new subroutine to mRNA_block for relation to specific CDS 
#SPL analog of check_mRNA_block_CDS_block_rel
#SPL for get_mRNA_CDS0
    {
    my $CDS_block=$_[0];
    my $cds_flg=0;       #SPL 1 = cds block available
    my $aux_CDS_block='';
    #...
    # purge CDS feature
#    print "CDS_block>>>\n";
#    print "$CDS_block";
#    print "<<<CDS_block\n";
#    &press_any_key;
    $aux_CDS_block=&purge_feature_record($CDS_block);
    $aux_CDS_block =~s/[\s\n]//g; 
    $aux_CDS_block=&expand_rec_1nuc2($aux_CDS_block);
#    print "gene=$aux_gene_name\n";
#    print "aux_CDS_block>>>\n";
#    print "$aux_CDS_block";
#    print "<<<aux_CDS_block\n";
#    &press_any_key;
    my $CDS_exon_num=($aux_CDS_block =~s/(\d+\.\.[<>]?\d+)/$1/g); 
#    print '$CDS_exon_num=', $CDS_exon_num,"\n";
#    &press_any_key;
    if ($CDS_exon_num==1) {$intronless_c++}
       elsif ($CDS_exon_num >1) {}
       else {
    &trace("gene=$aux_gene_name",0);
    &trace("aux_CDS_block>>>",0);
    &trace("$aux_CDS_block",0);
    &trace("<<<aux_CDS_block",0);
       }
   return $CDS_exon_num;
}

sub purge_feature_record #(record) return short_record
{
#SPL new subroutine: rest only coordinates 
    my $c_rec=$_[0];
    my @temp_m_b=split("\n",$c_rec);
    #print '$#temp_m_b=',$#temp_m_b;
    #&press_any_key;
    my $note_flg=0;      #SPL 1 = note lines available
    my $short_rec='';
    my $i=0;
    while($i<= $#temp_m_b){
          #print "pf $temp_m_b[$i]\n";
          #&press_any_key;
        if ($temp_m_b[$i] =~ /^\s{21}\/./){ last}
        else {$short_rec.=substr($temp_m_b[$i],21)."\n"}
        $i++;
    }#SPL end while
##   print 'short_rec>>>',"\n";
##   print $short_rec;
##   print '<<<short_rec',"\n";
##   &press_any_key;
   return $short_rec;
}

sub get_feature_start_end #($short_feature); return (start, end, err_code)
#SPL new subroutine: extracting feature start and end
#SPL no cross-reference allowed (no check)
    {my $aux_short_feature=$_[0];
        $aux_short_feature=~s/[\s\n]//g; 
     my $c_start=0;
     my $c_end=0;
#    print "aux_CDS_block>>>\n";
#    print "$aux_CDS_block";
#    print "<<<aux_CDS_block\n";
#    &press_any_key;
     my $item_num=($aux_short_feature =~s/(\d+\.\.[<>]?\d+)/$1/g); 
        my $i=0;
        while (my $c_res=($aux_short_feature=~/(\d+)\.\.[<>]?(\d+)/g))
              {$i++;
               if ($i==1) {$c_start=$1}
               if ($i==$item_num) {$c_end=$2}
              }
     my $err_code=0; if (($c_start<1) || ($c_end<$c_start)) {$err_code=1;}
     return ($c_start,$c_end, $err_code)
    } 

sub get_feature_start_end2 #($short_feature); return (start, end, err_code)
#SPL new subroutine: extracting feature start and end
#SPL no cross-reference allowed (no check)
#SPL assuming spaces and new lines signes already deleted
    {my $aux_short_feature=$_[0];
     my $c_start=0;
     my $c_end=0;
     my $item_num=($aux_short_feature =~s/(\d+\.\.[<>]?\d+)/$1/g); 
#    print "feature>>>\n";
#    print "$aux_short_feature";
#    print "<<<feature\n";
#    &press_any_key;
        my $i=0;
        while (my $c_res=($aux_short_feature=~/(\d+)\.\.[<>]?(\d+)/g))
              {$i++;
               if ($i==1) {$c_start=$1}
               if ($i==$item_num) {$c_end=$2}
              }
     my $err_code=0; if (($c_start<1) || ($c_end<$c_start)) {$err_code=1;}
     return ($c_start,$c_end, $err_code)
    } 

sub get_feature_start_end3 #($short_feature); return (exon_len_sum, err_code)
#SPL new subroutine: calculating exon_len_sum
#SPL cross-reference allowed (it is not obligatory to get mRNA_start and end
#SPL since it is problematic if cross-reference is present)
#SPL assuming spaces and new lines signes already deleted
    {my $aux_short_feature=$_[0];
     my $c_start=0;
     my $c_end=0;
     my $c_len=0;
     my $err_code=0;
     my $item_num=($aux_short_feature =~s/(\d+\.\.[<>]?\d+)/$1/g); 
#    print "feature>>>\n";
#    print "$aux_short_feature";
#    print "<<<feature\n";
#    &press_any_key;
        my $i=0;
        while (my $c_res=($aux_short_feature=~/(\d+)\.\.[<>]?(\d+)/g))
              {$i++;
               if (($1<1) || ($2<1)) {$err_code=1}
#ORI SPL               if (($1<1) || ($2<$1)) {$err_code=1;}
               if ($2<$1) {$c_len=$c_len+($1-$2+1)}
               else {$c_len=$c_len+($2-$1+1)};
              }
     return ($c_len, $err_code)
    } 

sub get_feature_start_end4 #($short_feature); return (err_code)
#SPL new subroutine: calculating exon_len_sum
#SPL cross-reference allowed (it is not obligatory to get mRNA_start and end
#SPL since it is problematic if cross-reference is present)
#SPL not assuming spaces and new lines signes already deleted
    {my $aux_short_feature=$_[0];
     my $c_start=0;
     my $c_end=0;
     my $c_ref='';
     my $prev_start=0;
     my $prev_end=0;
     my $auxl=0;
     my $auxr=0;
     my $err_code=0;
     $aux_short_feature=~s/[\s\n]//g; 
    my $c_res=($aux_short_feature=~m/^complement/);
    if ($c_res) {$c_res=($aux_short_feature=~s/^complement\(//);
                 $c_res=($aux_short_feature=~s/\)$//);
                } #end if c_res
       $c_res=($aux_short_feature=~s/^join\(//);
       if ($c_res) {$c_res=($aux_short_feature=~s/\)$//)}
     my @c_items=split (/\,/,$aux_short_feature);
     my $item_num=($aux_short_feature =~s/(\d+\.\.[<>]?\d+)/$1/g); 
#    print "feature>>>\n";
#    print "$aux_short_feature";
#    print "<<<feature\n";
#    &press_any_key;
        my $i=0;  my $prev_ref='';
           $c_res=($c_items[$i]=~/([A-Za-z0-9_.]+)\:/);
           if ($c_res) {$prev_ref=$1}
           $c_res=($c_items[$i]=~/(\d+)\.\.[<>]?(\d+)/);
           if ($c_res) {$prev_start=$1;$prev_end=$2} else {$err_code=2}
           if (($prev_start<1) || ($prev_end<$prev_start)) {$err_code=1}
           $i++;
        while (($i<=$#c_items) && ($err_code==0)) {
           $c_res=($c_items[$i]=~/([A-Za-z0-9_.]+)\:/);
           if ($c_res) {$c_ref=$1} else {$c_ref=''}
           $c_res=($c_items[$i]=~/(\d+)\.\.[<>]?(\d+)/);
           if ($c_res) {$c_start=$1;$c_end=$2} else {$err_code=2}
           if ($err_code==0) 
              {
               if (($c_start<1) || ($c_end<$c_start)) {$err_code=1;}
#               print "$prev_ref vs  $c_ref"; &press_any_key;
               if ($c_ref eq $prev_ref) {
                 $auxl=&max($prev_start,$c_start); $auxr=&min($prev_end,$c_end);
                 if ($auxl<=$auxr) {$err_code=3}
               } #end same reference
               $prev_ref=$c_ref; $prev_start=$c_start; $prev_end=$c_end;
              } #end if err_code == 0
           $i++;
        } #end while
#print "mRNA err_code=$err_code";&press_any_key;
     return ($err_code)
    } 



sub check_cds{
   ### at this point we have: $cdsline, $codon_start, $translation, $locusline, $defline
   my @exonsizes;
   my $j;
   my $exonsize;
   my $total_exon_size = 0;
   my @intronposition;
   my @intronnt;
   my @protein;
   my @introns;
   my @phase;
   my $intronno;
   my $exonno;


   # if(!$translation) {
   #      return "";
   # }
   # @protein = ("", split(//, $translation));
#   print '>>>'; #SPL
#   &print_cds_line;print "\n"; #SPL
#   print length($cdsline),"\n"; #SPL
   $cdsline =~ s/[\w\.]+\://g;
   $cdsline =~ s/\s+//g;
#   &print_cds_line;print "\n"; #SPL
   @intr_pairs = split(/\,/,$cdsline);
   $intronno = $#intr_pairs;
   $exonno = $intronno + 1;
   foreach $intr_pair (@intr_pairs){
#      print $intr_pair,$eol_str; #SPL exons anyway
      (@temp) = ($intr_pair =~ /(\d+)/g);
#      print "@temp,\n"; #SPL
      ($#temp == 0)  && (push(@temp, $temp[0]));
      ($#temp > 1) && die "strange cdsline: /$intr_pair/:$cdsline\n\t$entrylines[1]\n";
      push(@introns, @temp);
   }

   for($j = 0; $j < $#introns; $j += 2){
      $exonsize = $introns[$j + 1] - $introns[$j] + 1;
      push(@exonsizes, $exonsize);
      $total_exon_size += $exonsize;
   }
   if($cdsline =~ /CDScomplement\(join/){
      @exonsizes = reverse(@exonsizes);
   }   
   $exonsizes[0] -= $codon_start;
   $total_exon_size -= $codon_start;

   # $diff = $total_exon_size / 3 - $#protein;
   
   $diff = 0;
   if($diff < -1 || $diff > 1){
      return "";
   }

   return "\n$cds_info\n";
}

sub get_CDS_c_str #(c_num) ret new CDS_c_str;
    {my $c_num=$_[0];
     my $aux_str='';
     my $h26;
     my $l26;
     if ($c_num>0) {
        if ($c_num>26*27) {$aux_str='ZZ'} 
           else {$h26=($c_num-1)/26; $l26=($c_num-1) % 26;
           if ($h26<1) {$aux_str=chr($l26+65)}
              else {$aux_str=chr($h26+64).chr($l26+65)}  
           }
     }
    return $aux_str;
    }
sub test_get_CDS_c_str
    {print "1 ",&get_CDS_c_str(1),"\n";
     print "2 ",&get_CDS_c_str(2),"\n";
     print "26*27-1 ",&get_CDS_c_str(26*27-1),"\n";
     print "26*27 ",&get_CDS_c_str(26*27),"\n";
     print "999 ",&get_CDS_c_str(999),"\n";
     &press_any_key;
     return
    }
#--------------
sub trace #(c_message, line_indent)
    {my $auxs;
     if ($_[1]>=0) {$auxs=' 'x$_[1];}
     print TRACEF $auxs,$_[0],"\n";
    }

#--------------
sub trace_stat #(c_message, line_indent)
    {my $auxs;
     if ($_[1]>=0) {$auxs=' 'x$_[1];}
     print STATF $auxs,$_[0],"\n";
    }

#--------------
sub test_expand_1nucl 
    {my $CDS_rec='join(20,23..24)';$CDS_rec=&expand_rec_1nuc2($CDS_rec);
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(20,23)';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(complement(20),23))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(L2:20,23)';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(complement(L2:20),23)';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(20,complement(23))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(<11,complement(L2:23))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(<1,complement(L2:23..>24))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(complement(L2:1), 31)';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(complement(L2:<1),complement(L2:23))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(complement(L2:<1..10),complement(L2:23))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(complement(L2:<1),complement(L2:>23))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='join(complement(L2:<1..10),complement(L2:23..>31))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='complement(join((L2:<1..10),(L2:23..>31)))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
     $CDS_rec='complement(join((L2:<10),(L2:>31)))';$CDS_rec=&expand_rec_1nuc2($CDS_rec);    
     print "$CDS_rec\n";&press_any_key;
    }
#--------------
sub expand_rec_1nuc #(c_rec)
    # example: join(1..20,55) -> join(1..20,55..55)
    {
    my $c_rec=$_[0];
    my $c_res=($c_rec=~s/([^\0560-9]{1})(\d+),/$1$2\.\.$2,/g);
    $c_rec=~s/,(\d+)([^\0560-9]{1})/,$1\.\.$1$2/g;
                    #\056=period (point)
    # ignore the case of single nucleotide only 
    return $c_rec;
    }
#--------------
sub expand_rec_1nuc2 #(c_rec)
    # example: join(1..20,55) -> join(1..20,55..55)
    {
    my $c_rec=$_[0];
    my $c_res=($c_rec=~s/([^\0560-9<>]{1})(\d+),/$1$2\.\.$2,/g);
    $c_res=($c_rec=~s/([^\0560-9<>]{1})<(\d+)(\)?),/$1<$2\.\.$2$3,/g); #jan05
#    $c_res=($c_rec=~s/\((<{1})(\d+),/\($1$2\.\.$2,/g); #jan05
    $c_rec=~s/,(\d+)([^\0560-9]{1})/,$1\.\.$1$2/g;
                    #\056=period (point)
    # ignore the case of single nucleotide only 
    $c_res=($c_rec=~s/([^\0560-9<>]{1})(\d+)\)/$1$2\.\.$2\)/g);
    $c_res=($c_rec=~s/([^\0560-9<>]{1})>(\d+)\)/$1$2\.\.>$2\)/g);
#    $c_rec=~s/,(\d+)([^\0560-9]{1})/,$1\.\.$1$2/g;
                    #\056=period (point)
    return $c_rec;
    }
#--------------
sub test_rec_to_exon_order
    {my $CDS_rec='complement(AL:1..2)';
     $CDS_rec=&rec_to_exon_order($CDS_rec);
     print "$CDS_rec\n";&press_any_key;
    }
#--------------
sub rec_to_exon_order #(c_rec)
    # example: complement(join(1..20,55..56)) -> 
    #          join(c(55..56),c(1..20));
    #ASSUME purge_feature_record passed
    #ASSUME single use of complement and join operators
    #ASSUME valid syntax
    {
    my @c_f;
    my $c_ref='';
    my $c_rec=$_[0];
    my $c_res=($c_rec=~m/^complement/);
    if ($c_res) {$c_res=($c_rec=~s/^complement\(//);
                 $c_res=($c_rec=~s/\)$//);
                 $c_res=($c_rec=~s/^join\(//);
                 if ($c_res) {$c_res=($c_rec=~s/\)$//)}
                 @c_f=split /,/,$c_rec;
                 @c_f=reverse @c_f;
                 foreach $exon_rec (@c_f) {
                       $c_res=($exon_rec=~/(\d+)\.\.[<>]?(\d+)/);
                       #$exon_rec=$`.$2."\.\.".$1.$';
                       $exon_rec=$2."\.\.".$1;  #v. 1.53e
                 }
                 $c_rec=join ",", @c_f; 
    } elsif ($c_res=($c_rec=~m/^join/)) {
                 $c_res=($c_rec=~s/^join\(//);
                 $c_res=($c_rec=~s/\)$//);
                 @c_f=split /,/,$c_rec;
                 foreach $exon_rec (@c_f) {
                    $c_ref='';
                    if ($exon_rec=~m/^complement/) {
                       $exon_rec=~s/^complement\(//;
                       if ($exon_rec=~m/\:/)
                          {$c_ref=$`.':';$exon_rec=$';}
                       $c_res=($exon_rec=~/(\d+)\.\.[<>]?(\d+)/);
                       $exon_rec=$c_ref.$2."\.\.".$1;
                    }
                 }
             $c_rec=join ",", @c_f; 
      }
    return $c_rec;
    }
#--------------
sub print_by_page #(block_of_lines)
    {my $p_len=10;
     my $c_c=0;
     my $cp_c=1;
     my @block_of_lines=split ("\n",$_[0]);
     while ($c_c<=$#block_of_lines)
     {print $block_of_lines[$c_c],"\n";
      $c_c++;$cp_c++;
      if ($cp_c==$p_len) {$cp_c=1;&press_any_key;}
     }
    }

#-------------- small lib ---- 
#--------------
sub print_cds_line #(c_line)
    {my $c_line=$_[0];
     for (my $j=0;$j<=(length($c_line));$j++) 
         {if (ord(substr($c_line,$j,1))!=13) {print substr($c_line,$j,1)}}
    }
#--------------
sub chop_if_chr   #(string;chr)
    {my $loc_string=$_[0];
     #print 'char=',ord($_[1]),' string=',$loc_string,' substr=',ord(substr ($loc_string,-1,1));
     if (substr ($loc_string,-1,1) eq $_[1]) 
        {chop($loc_string);} 
     #print 'after last=',ord(substr ($loc_string,-1,1));

     return $loc_string;
    }
#--------------
sub del_trail_spaces #($loc_l)
    {$_=$_[0]; s/ +$//;$_=$_}  #deleting trailing spaces
#--------------
sub purge_eol #($loc_l)
    {my $loc_l=$_[0];
     print 'i: ',$loc_l,'   ',length($loc_l);
     chomp($loc_l); 
     print 'chomp: ',$loc_l,'   ',length($loc_l);
     $loc_l=&chop_if_chr($loc_l,chr(13));
     #print 'chopic: ',$loc_l,'   ',length($loc_l);
    }
#--------------
sub purge_eol_sp #($loc_l)
    {my $loc_l=$_[0];
     chomp($loc_l); $loc_l=&chop_if_chr($loc_l,chr(13));
     $loc_l=&del_trail_spaces($loc_l);
    }
#--------------
sub press_any_key
    { read STDIN,my $a_key,1; $a_key='';}
#---- end of lib -----
#------- REVISION HISTORY --------------------------
# ver 1.03 20.10.04 tracing, debug in intronless CDS 
# ver 1.04 21.10.04 minor debug    
# ver 1.05 22.10.04 more statistics
# ver 1.06 01.11.04 correction for CDS exon length=1
#                   represented by single nucleotide pos
# ver 1.07 07.11.04 flow chart correction
#                   error statistics correction
# ver 1.08 11.11.04 special subroutines for handling 
#                   all CDS without wings
# ver 1.09 17.11.04 store entries with stop codons,
#                   statistics for those events,
#                   scan for CDS overlap around contig
# ver 1.10 19.11.04 scan for CDS nonoverlap around gene,
#                   scan for CDS gene name contradiction,
#                   dismiss alt splicing after CDS gene name test
# ver 1.11 19.11.04 debug CDS nonoverlap around gene
#                   debug CDS gene name mismatch and statistics
# ver 1.12 02.12.04 debug check_mRNA_block_CDS_block_rel 
#                   (second and third round)
#                   SPL_note="mRNA assignment controversial"
# ver 1.13 11.12.04 mark CDS with not founf mRNA as 
#                   SPLnote="DUMMY mRNA NF"
# ver 1.14 16.12.04 additional statistics for intronless CDS 
# ver 1.15 17.12.04 additional statistics for no protein_id events
#                   statistics for correct CDS_exon_num, 
#                   corrected CDS_exon_num
# ver 1.16 17.12.04 debug mRNA_exon_num
# ver 1.17 15.01.05 no trace intronless_c since 
#                   corr_intronless_c is valid
# ver 1.18 19.08.05 statistics file introduced
#                   minor bugs fixed
# ver 1.19 04.09.05 correction type misprint effected
#                   to lost lines in first part of statistics 
#                   in tEID output
# ver 1.20 11.09.05 GB149
# ver 1.21 13.09.05 reference (nothing special)
# ver 1.22 15.09.05 PSEUDO
# ver 1.23 18.09.05 "locus_tag=" handling
# ver 1.24 25.10.05 GB149 mRNA assignment
# ver 1.25 01.11.05 --- " ---
# ver 1.26 08.11.05 --- " --- ----------- new version
# ver 1.27 18.11.05 --- " --- ----------- debug in first end last 
#                                         CDS exon
# ver 1.28 25.12.05 GB149 out contig_scan
#                   - " - out name mismatch 
#                   no output /translation in CDS block in .tEID
# ver 1.29 26.12.05 debug rec_to_exon_order
# ver 1.30 28.12.05 debug get_gene_block
# ver 1.31 01.01.06 new version of expand_rec_1nuc2,
#                   mRNA_stack_errs,
#                   corrected check_mRNA_block_CDS_block_rel_gb149,
#                   new get_feature_start_end4: getting format error  
# ver 1.32 02.01.06 statistics of rejected mRNA records
#                   no output from /db_xref till end of CDS block  
#                   in .tEID
#                   corrected get_feature_start_end4
# ver 1.33 05.01.06 debug expand_rec_1nuc2
# ver 1.34 29.06.06 fixing errors debugged in GB149 version
# ver 1.35 11.07.06 CDS and mRNA block: no leading 4 spaces
# ver 1.36 19.07.06 returning to longest pre-mRNA in GB149 subroutines
#                   under genome global selector
#                   Note. GB149 always longest mRNA  as exon sum
#                   Since it is impossble to calculate pre-mRNA
#                   length in the case external sequence reference       
# ver 1.37 23.08.06 fixing version after all the tests made 
# ver 1.37e 01.09.06  alternative exon count:starting 
# ver 1.38e 01.09.06  first working
# ver 1.39e 01.09.06  another definition of alternative exons
# ver 1.40e 13.09.06  skipped, truncated and other exons, started.
# ver 1.41e 21.09.06  skipped debugged, get_exon_overlap debugged
# ver 1.42e 21.09.06  exon projection 
# ver 1.43e 21.09.06  truncated and other exons, finished, skipped temp out 
# ver 1.44e 25.09.06  bug in truncated and other exons  
# ver 1.45e 25.09.06  truncated and other exons debug 
# ver 1.46e 07.10.06  retained intron among truncated exon and others classes
# ver 1.47e 18.10.06  (1) retained intron among others classes prototype 
#                         (in class number)
#                     (2) retained intron among others classes prototype
#                         (in intron number)
# ver 1.48e 19.10.06  marginal skipped exon number prototype
# ver 1.49e 20.10.06  debug previous ver, print individual stat,
#                     mRNA_ind1++ position correction
# ver 1.50e 20.10.06  debug previous ver, print total stat.
# ver 1.51e 20.10.06  brushing, out debug prints, total stat in stat file 
# ver 1.52e 20.10.06  minor modification
# ver 1.53e 08.04.07  modified [<>] signs handling in rec_to_exon_order:
#                     [<>] are omitted but mentioned in statistics as
#                     CDS_incomplete; 
#                     modified format of exon statistics
# ver 1.54e 18.07.07  corrected mRNA word in stat files
# ver 1.55e 26.07.07  damping invalid format in EMBL formatted to GenBank
#                     in line /gene=gene_name
#                     in gene record, that was made without double quotation
#                     which leads to empty gene_name 
#                     mark is EMBL_GB                      
# ver 1.56e 05.08.07  rewritten proc produce projection in exon count block
#                     since previous version has a bug
#
#------- KNOWN PROBLEMS ----------------------------
# 1. exon feature is not used 
# 2. Coordinate of the kind one-of-the-interval is not handled
#    and considered as a format error, example: (23.25)..35
#---------------------------------------------------