#!/usr/bin/perl -w # Perl script: graph_islands.pl # # Authors: Jason M. Bechtel # Bioinformatics Lab, Department of Medicine # Medical College of Ohio, Toledo, Ohio, USA # written July, 2007 by Jason M. Bechtel # Contact afedorov@meduohio.edu # # LICENSE: GPL version 3 or later # If you use this code or incorporate it in another program, please cite our paper: # Bechtel JM, Wittenschlaeger T, Dwyer T, Song J, Arunachalam S, Ramakrishnan SK, Shepard S, Fedorov A: Genomic mid-range inhomogeneity correlates with an abundance of RNA secondary structures, BMC Genomics 2008, 9:284. # # This Perl script does the following: # - reads in the primary/complementary regions data (the output from one of the # 'MRI analyzer' scripts) # - generates "spike graphs" of the data # use GD::Graph::linespoints; use File::Basename; #require 'libgd-graph-save.pl'; if (scalar( @ARGV ) < 1) { print STDOUT "Usage: graph_islands.pl ...\n"; print STDOUT "\twhere is the name of a file containing locations of islands (in tab-delimited format),\n"; print STDOUT "\n"; exit 1; } # flush on every write select(STDOUT); $| = 1; for ( my $i=0; $i<=$#ARGV; $i++ ) { $infile = $ARGV[$i]; print STDOUT "Reading from \"$infile\"..."; @data = read_data_from_csv($infile) or die "Cannot read data from \"$infile\"!"; print STDOUT " done\n"; print STDOUT "Generating \"spike graph\" from $num_data data points..."; # calculate tick spacing based on size of dataset $num_x_ticks = 6; $num_locs = $num_data; $tick_spacing = round ( $num_locs / $num_x_ticks ); $main_title = 'Spike Graph of Content in Windows'; # default/dummy title # see if datafile specified any parameters if ( scalar keys %params >= 1 ) { if ( exists $params{content} && exists $params{winsize} && exists $params{upper_threshold} && exists $params{lower_threshold}) { if ( exists $params{filename} ) { $main_title = sprintf "\"%s\", %s-content, %d nt", basename($params{filename}), uc($params{content}), $params{winsize}; } else { $main_title = sprintf "%s-content, %d nt", uc($params{content}), $params{winsize}; } # scale x-axis (@data[*,0]) by $params{winsize} for $i (0 .. $#{$data[0]}) { $data[0][$i] *= $params{winsize}; $data[0][$i] /= 1000; # round to nearest whole number to keep it looking clean $data[0][$i] = round( $data[0][$i] ); } # scale y-axis (@data[*,1] and @data[*,2]) so points are centered vertically in their panels for $i (0 .. $#{$data[0]}) { for $j (1 .. 2) { if (defined($data[$j][$i])) { $data[$j][$i] /= 2; } } } } else { printf STDERR "Warning: datafile does not specify all island parameters; using dummy title...\n"; } } else { printf STDERR "Warning: datafile does not contain island parameters; using dummy title...\n"; } #$graph_height = 300; # this matches the default $graph_height = 100; $graph_width = 400; # this matches the default # widen the graph some to accommodate more information if ($num_locs > 100000) { $graph_width = 800; } elsif ($num_locs > 10000) { $graph_width = 600; } $my_graph = new GD::Graph::linespoints($graph_width,$graph_height); $my_graph->set( r_margin => 10, l_margin => 5, t_margin => 5, b_margin => 5, title => $main_title, axis_space => 8, # default = 4 #text_space => 2, # default = 8 transparent => 0, #no_axes => 1, zero_axis => 1, skip_undef => 1, markers => [ 10, 10 ], # use vertical lines for point markers #marker_size => 54, #marker_size => 9, marker_size => 4, dclrs => [ qw(blue red) ], bgclr => "white", # background of entire graph boxclr => "white", # background of graph area fgclr => "black", # axes #y_label => 'Presence of Island', #y_label_position => 1, #y_label => 'Region of content', y_max_value => 1, y_min_value => -1, y_tick_number => 2, # actually produces one tick at the zero axis y_label_skip => 2, x_label => 'Window position (kb)', #x_plot_values => 0, # do not plot the values along the axis x_label_position => 0.5, x_label_skip => $tick_spacing, x_ticks => 1, #x_tick_offset => 1, #x_tick_number => 'auto', #x_tick_number => 10, #x_min_value => 0, x_max_value => $num_locs, x_labels_vertical => 0, long_ticks => 0, tick_length => -4, ); $my_graph->set( 'y_number_format' => \&y_format ); $my_graph->set_x_label_font( ['helvetica', 'arial', gdMediumBoldFont], 14); $my_graph->set_y_label_font( ['helvetica', 'arial', gdMediumBoldFont], 14); $my_graph->set_x_axis_font( ['helvetica', 'arial', gdMediumBoldFont], 12); $my_graph->set_y_axis_font( ['helvetica', 'arial', gdMediumBoldFont], 12); if ( exists $params{primary_regions} && exists $params{complementary_regions} ) { $my_graph->set_legend( 'primary content'.' ('.$params{primary_regions}.')', 'complementary content'.' ('.$params{complementary_regions}.')' ); } else { $my_graph->set_legend( 'primary content', 'complementary content' ); } $my_graph->plot( \@data ); save_chart( $my_graph, $infile . "_graph" ); print STDOUT " done\n"; } ## Functions sub y_format { my $value = shift; my $ret; if ($value > 0) { $ret = sprintf("%s%%", round($params{upper_threshold}/$params{winsize} * 100)); } elsif ($value < 0) { $ret = sprintf("%s%%", round($params{lower_threshold}/$params{winsize} * 100)); } else { $ret = sprintf("%d", 0); } return $ret; } # implement our own round() function sub round { my($number) = shift; return int($number + (.5 * ($number <=> 0))); } # read in data from tab-delimited file # *** be sure to catch find_islands parameters sub read_data_from_csv { my $filename = shift; my @data = (); %params = (); $num_data = 0; $num_recs = 0; open( CSVFILE, $filename ) || return (); while () { chomp; my @row = split ( /\t/, $_, 3 ); # catch island parameters and store them so they can used in the graph if ( $row[0] =~ /(\w+):/ ) { $params{$1} = $row[1]; next; } last; } # here you could optionally calculate something based on the parameters # before reading through the entire file... seek(CSVFILE, 0, 0); while () { chomp; my @row = split ( /\t/, $_, 3 ); # ignore parameters if ( $row[0] =~ /(\w+):/ ) { next; } if ( $row[2] =~ /(\S+)\s.*/ ) { # ignore any extra data after the third field $row[2] = $1; } $num_recs++; for ( my $i = 0 ; $i <= $#row ; $i++ ) { undef $row[$i] if ( $row[$i] eq 'x' ); push @{ $data[$i] }, $row[$i]; } $num_data++; } close(CSVFILE); return @data; } # 'save_chart' function from Martien Verbruggen's 2002 book # "Graphics Programming with Perl", ISBN: 1930110022 # © 2002 Manning Publications Co. # obtained in 'verbruggen_src.tar.gz' from http://www.manning.com/verbruggen/ on 7/18/2007 # - writes the chart out as an image file in whatever format is supported. # + added preference for PNG format over GIF format sub save_chart { my $chart = shift or die "INTERNAL ERROR: save_chart: arg1 (chart) not specified!"; my $name = shift or die "INTERNAL ERROR: save_chart: arg2 (name) not specified!"; my $ext = $chart->export_format; # default format (GIF) # prefer PNG format if ($chart->gd->can('png')) { $ext = 'png'; } local (*OUT); open(OUT, ">$name.$ext") or die "Cannot open \"$name.$ext\" for write: $!"; binmode OUT; print OUT $chart->gd->$ext(); close OUT; }