#!/usr/bin/perl

#this program was modified by Alexei Fedorov on Feb 09, 2005 to work with new human EID prepared by Shepelev
$c =  0;
print "please print the prefix for the exon-intron database \n";
$prefix =<STDIN>;
chomp($prefix);
print "please print the sufix for the type of exon-intron database EID or IUD \n";
$sufix =<STDIN>;
chomp($sufix);
$DB = $prefix . '.d' . $sufix;
$exDB = $prefix. '.ex' . $sufix;
$mrnaDB = $prefix. '.mrna' . $sufix;
$intrDB = $prefix. '.intr' . $sufix;

open (INTRONS,  ">$intrDB") || die "Can't open: $intrDB $!\n";
open (EXONS,  ">$mrnaDB") || die "Can't open: $mrnaDB $!\n";
open (EXON,  ">$exDB") || die "Can't open: $exDB $!\n";
open (OUTPUT,  ">output") || die "Can't open: output $!\n";

open (CDS,  "$DB") || die "Can't open $DB : $!\n";
$/ = "\n>";
while (<CDS>) {
	$c++;
	$sign = chop($_);
	undef($CDS); undef($EXONS); undef($INTRONS); undef($complementary); undef($sequence);
	@lines = split("\n", $_);
	$id = $lines[0];
        if($c == 1) {$id = substr($lines[0],1);}
	for $n (1..$#lines) {
		chomp ($lines[$n]);
		$lines[$n] =~ s/\s//g;
		$sequence .= $lines[$n];
	}
	$count_ex = 0;
	while ($sequence) {
		$count_ex++;
		if ($sequence =~/(^[A-Z]+)/) {
			$curr_ex = $+;
			$sequence = $';
			$CDS .= $curr_ex;
			$EXONS .= '> EXON_' . $count_ex . $id . "\n" . $curr_ex . "\n\n";
		}
		if ($sequence =~/(^[a-z\.]+)/) {
			$curr_intr = $+;
			$sequence = $';
			$INTRONS .= '> INTRON_' . $count_ex . $id . "\n" . $curr_intr . "\n\n";
		}
		unless($curr_ex) {
			print 'CHECK YOUR CURRENT SEQUENCE', $id, "\n";
			print OUTPUT $sequence, "\n";
			die;
		}
	}
	if ($c =~ /000$/) {print "current item is $c \n", $aa, "\n\n" ;}


		print EXONS '>', $id, "\n", $CDS,"\n\n";
		print INTRONS $INTRONS;
		print EXON $EXONS;
}