#!/usr/bin/perl # codonUsage.pl - Sam Shepard - 10.2009 # Takes a FASTA file, assuming it is a CDS, and measures the codon usage. # 2.2010 - SS # Does initialization of codon table using a recursive enumerate function. # Performs additional error-checking. # Perform input error-checking. if ( scalar(@ARGV) != 1 ) { die("Usage:\n\tperl $0 \n"); } open( IN, '<', $ARGV[0] ) or die("Cannot open $ARGV[0].\n"); # Define enumerate() sub-routine. Uses global vars, so dangerous. %codons = (); @letters = ('a','t','c','g'); sub enumerate($$) { my $level = shift; my $tail = shift; my $base; if ( $level == 0 ) { $codons{$tail} = 0; } else { for $base ( @letters ) { enumerate(($level-1),"$tail$base"); } } } # Initialize the hash for all codons to zero. enumerate(3,''); # Process data from all sequences in fasta file. $/ = '>'; $totalCodons = 0; while( $record = ) { @lines = split("\n", $record ); $header = shift(@lines); $seq = lc(join('',@lines)); chomp($seq); $length = length($seq); # Test for sequence purity. $count = ($seq =~ tr/actg//); if ( $length != $count ) { next; } for($i = 0; $i < $length - 2; $i += 3) { $codons{substr($seq,$i,3)}++; $totalCodons++; } } # Error check. if ( scalar(keys(%codons)) != 64 ) { die("Processing error.\n"); } # Print the results. foreach $codon ( keys(%codons) ) { $freq = $codons{$codon} / $totalCodons; $freq = sprintf("%.3f", $freq); print $codon,"\t",$freq,"\t",$codons{$codon},"\n"; }