package ScatterPlot; #use 5.008001; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use SCatterPlot ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( ); our $VERSION = '0.01'; # constructor sub new { # declare the class name and assign it the input parameter my ($class_name) = @_; # create the new variable, its a hash my ($self) = []; # bless it to be an object within class $class_name bless ($self, $class_name); # return the hash and exit return $self; } # draw an ASCII plot sub draw { # declare local copy of self and sport, assigning with input paramters my ($self, $xy_points, $x_size, $y_size, $x_label, $y_label, $char, $type, $reg_calc) = @_; # default variables unless ($type) { $type = 'text'; } unless ($char) { $char = 'o'; } unless ($y_label) { $y_label = ''; } unless ($x_label) { $x_label = ''; } unless ($y_size) { $y_size = 26; } else { $y_size--; } unless ($x_size) { $x_size = 60; } unless ($xy_points) { my @xy_points = (); my $i_max = 20; for (my $i=0; $i<$i_max; $i++) { $xy_points[$i][0] = ($i - ($i_max - 1) / 2 ) * 6 / $i_max; $xy_points[$i][1] = ($xy_points[$i][0] + 2) * ($xy_points[$i][0] - 2) * $xy_points[$i][0]; } $xy_points = \@xy_points; } unless ($reg_calc) { $reg_calc = 0; } # find the number of points to plot my $num_points = @$xy_points; # loop thru the points and find min/max values my $x_min = $$xy_points[0][0]; my $x_max = $$xy_points[0][0]; my $y_min = $$xy_points[0][1]; my $y_max = $$xy_points[0][1]; for (my $j=1; $j<$num_points; $j++) { if (($$xy_points[$j][0]||$$xy_points[$j][0]==0) and ($$xy_points[$j][1]||$$xy_points[$j][1]==0)) { if ($$xy_points[$j][0] < $x_min) { $x_min = $$xy_points[$j][0]; } if ($$xy_points[$j][0] > $x_max) { $x_max = $$xy_points[$j][0]; } if ($$xy_points[$j][1] < $y_min) { $y_min = $$xy_points[$j][1]; } if ($$xy_points[$j][1] > $y_max) { $y_max = $$xy_points[$j][1]; } } } # calculate the scale and offset value my $slopish=($y_max-$y_min)/($x_max-$x_min+.0000000001); my $x_scale = 0; if ($x_max - $x_min != 0) { $x_scale = $x_size / ($x_max - $x_min); } my $x_offset = -$x_min; my $y_scale = 0; if ($y_max - $y_min != 0) { $y_scale = $y_size / ($y_max - $y_min); } #$y_scale*=int($slopish); my $y_offset = -$y_min; # clear the graph for (my $x=0; $x<$x_size; $x++) { for (my $y=0; $y<=$y_size; $y++) { $$self[$x][$y] = ' '; } } # draw the axes my $x_axis = int($x_scale * $x_offset); my $y_axis = int($y_scale * $y_offset); if (($y_axis >= 0) and ($y_axis < $y_size)) { for (my $x=0; $x<$x_size; $x++) { $$self[$x][$y_axis] = '-'; } } if (($x_axis >= 0) and ($x_axis < $x_size)) { for (my $y=0; $y<$y_size; $y++) { $$self[$x_axis][$y] = '|'; } } if (($x_axis >= 0) and ($x_axis < $x_size) and($y_axis >= 0) and ($y_axis < $y_size)) { $$self[$x_axis][$y_axis] = '+'; } # plot the points for (my $i=0; $i<$num_points; $i++) { my $x_pos = 0; if ($$xy_points[$i][0]||$$xy_points[$i][0]==0) { $x_pos = int($x_scale * ($$xy_points[$i][0] + $x_offset)); } if ($x_pos < 0) { $x_pos = 0; } elsif ($x_pos > $x_size - 1) { $x_pos = $x_size - 1; } my $y_pos = 0; if ($$xy_points[$i][1]||$$xy_points[$i][1]==0) { $y_pos = int($y_scale * ($$xy_points[$i][1] + $y_offset)); } if ($y_pos < 0) { $y_pos = 0; } elsif ($y_pos > $y_size - 1) { $y_pos = $y_size - 1; } if (($$xy_points[$i][0]||$$xy_points[$i][0]==0) and ($$xy_points[$i][1]||$$xy_points[$i][1]==0)) { $$self[$x_pos][$y_pos] = $char; } } ############################################### Louis my @xvalues; #Values from column 1 of the file my @yvalues; #Values from column 2 of the file my $correlation; #The "r" value my $slope; #Indicates rate of y/x my $intercept; #Regressed value of y at x=0 my $sign; #For printing the f(x)=mx+b equation my $countx=0; #For iterating through the file my @scatter_array; #Populate the two arrays containing the x and y values for (my $i=0; $i<$num_points; $i++) { if (($$xy_points[$i][0])||($$xy_points[$i][0] == 0)) { $xvalues[$i] = $$xy_points[$i][0]; } if (($$xy_points[$i][1])||($$xy_points[$i][1] == 0)) { $yvalues[$i] = $$xy_points[$i][1]; } } #Return the total from one array sub total () { my ($z_ref) = @_; my $total=0; my $value=0; foreach $value (@$z_ref) { $total+=$value; } return $total; } #Return the mean from one array [requires &total] sub mean () { my ($z_ref) = @_; my $total=&total($z_ref); return $total/(@$z_ref); } #Return the variance from one array [requires &mean] sub variance () { my ($z_ref) = @_; my $numerator=0; my $mean=&mean($z_ref); foreach my $value (@$z_ref) { $numerator+=($value-$mean)**2; } return $numerator/($#$z_ref); } #Return the standard deviation from one array [requires &variance] sub stdDeviation () { my ($z_ref) = @_; return sqrt(&variance($z_ref))+.000000001; #Avoiding divide by zero with the result } #Return the correlation between two arrays [requires &mean, &stdDeviation] sub correlation () { my ($x_ref, $y_ref) = @_; my $corrsub=0; my $meanx=&mean($x_ref); my $meany=&mean($y_ref); my $devx=&stdDeviation($x_ref); my $devy=&stdDeviation($y_ref); for (my $i=0; $i<@$x_ref; $i++) { $corrsub+=(($x_ref->[$i]-$meanx)/$devx)*(($y_ref->[$i]-$meany)/$devy); } if ($devy<.0001) { return 1; } else { return $corrsub/($#$x_ref); } } #Return the regressed slope of two arrays [requires &correlation, &stdDeviation] sub getSlope () { my ($x_ref, $y_ref) = @_; return &correlation($x_ref, $y_ref)*(&stdDeviation($y_ref)/&stdDeviation($x_ref)); } #Return the regressed y intercept of two arrays [requires &getSlope, &mean] sub getIntercept () { my ($x_ref, $y_ref) = @_; return &mean($y_ref)-(&getSlope($x_ref, $y_ref) * &mean($x_ref)); } $slope=&getSlope(\@xvalues, \@yvalues); $intercept=&getIntercept(\@xvalues, \@yvalues); $correlation=&correlation(\@xvalues, \@yvalues); my $correlation_type; my $correlation_strength; if ($correlation<0) { $correlation_type="negative"; } elsif ($correlation>0) { $correlation_type="positive"; } else { $correlation_type="nil"; } if (abs($correlation)>.99999) { $correlation_strength=" and perfectly correlated"; } elsif (abs($correlation)>0.99) { $correlation_strength=" and almost perfectly correlated"; } elsif (abs($correlation)>0.90) { $correlation_strength=" and very strongly correlated"; } elsif (abs($correlation)>0.70) { $correlation_strength=" and strongly correlated"; } elsif (abs($correlation)>0.50) { $correlation_strength=" and moderately correlated"; } elsif (abs($correlation)>0.30) { $correlation_strength=" and weakly correlated"; } elsif (abs($correlation)>0.20) { $correlation_strength=" and neglibily correlated"; } else { $correlation_type=""; $correlation_strength="not correlated"; } if ($intercept<0) { $sign="-"; }else{ $sign="+"; } # plot the regression line if ($reg_calc) { for (my $i=$x_min; $i< $x_max; $i+=(1/$x_scale)) { my $x_pos; $x_pos = int($x_scale * ($i + $x_offset)); if ($x_pos < 0) { $x_pos = 0; } elsif ($x_pos > $x_size - 1) { $x_pos = $x_size - 1; } my $y_pos = 0; $y_pos = int($y_scale * ($i*$slope +$intercept + $y_offset)); if ($y_pos < 0) { $y_pos = 0; } elsif ($y_pos > $y_size - 1) { $y_pos = $y_size - 1; } if ($$self[$x_pos][$y_pos] eq '.') { $$self[$x_pos][$y_pos] = '.'; } elsif ($$self[$x_pos][$y_pos] eq '*') { $$self[$x_pos][$y_pos] = '*'; } elsif ($$self[$x_pos][$y_pos] eq $char) { $$self[$x_pos][$y_pos] = '*'; } else { $$self[$x_pos][$y_pos] = '.'; } } } ############################################### Louis # add the axes limits # left label my $y_pos = 1; if ($y_axis < 1) { $y_pos = 1; } elsif ($y_axis > $y_size) { $y_pos = $y_size; } else { $y_pos = $y_axis; } my $label = sprintf("%0.1f ", $x_min); my $l = length($label); for (my $i=0; $i<$l; $i++) { $$self[$i][$y_pos] = substr($label, $i, 1); } # right label $label = sprintf(" %0.1f", $x_max); $l = length($label); my $x_label_pos = $x_size - $l; for (my $i=0; $i<$l; $i++) { $$self[$x_label_pos+$i][$y_pos] = substr($label, $i, 1); } # bottom label $label = sprintf("%0.1f", $y_min); $l = length($label); my $y_label_pos = $x_axis - int($l/2); if ($y_label_pos < 0) { $y_label_pos = 0; } elsif ($y_label_pos + $l > $x_size) { $y_label_pos = $x_size - $l; } for (my $i=0; $i<$l; $i++) { $$self[$y_label_pos+$i][0] = substr($label, $i, 1); } # top label $label = sprintf("%0.1f", $y_max); $l = length($label); $y_label_pos = $x_axis - int($l/2); if ($y_label_pos < 0) { $y_label_pos = 0; } elsif ($y_label_pos + $l > $x_size) { $y_label_pos = $x_size - $l; } for (my $i=0; $i<$l; $i++) { $$self[$y_label_pos+$i][$y_size-1] = substr($label, $i, 1); } # add the labels # x label $l = length($x_label); $x_label_pos = $x_size - $l; for (my $i=0; $i<$l; $i++) { $$self[$x_label_pos+$i][$y_pos+1] = substr($x_label, $i, 1); } # y label $l = length($y_label); $y_label_pos = $x_axis - int($l/2); if ($y_label_pos < 0) { $y_label_pos = 0; } elsif ($y_label_pos + $l > $x_size) { $y_label_pos = $x_size - $l; } for (my $i=0; $i<$l; $i++) { $$self[$y_label_pos+$i][$y_size] = substr($y_label, $i, 1); } # print the $self my $prefix = "\n"; my $newline = "\n"; my $postfix = "\n"; if (($type eq 'html') or ($type eq 'HTML')) { $prefix = "
\n";
$newline = "\n";
$postfix = "\n";
}
print $prefix;
for (my $y=$y_size; $y>=0; $y--) {
for (my $x=0; $x<$x_size; $x++) {
print $$self[$x][$y];
}
print $newline;
}
print $postfix;
if ($reg_calc) {
printf ("Linear regression forumula: f(x)=%gx +%.4f\n", $slope, $intercept);
printf (" R-value: %g\n", $correlation);
printf (" R^2-value: %g\n", $correlation**2);
printf ("\n\nSummary:\n");
printf("The function f(x)=mx+b represented by this data is: f(x)=%gx %s%.4f\n", $slope, $sign, abs($intercept));
printf("The x intercept is: %g\n", (-1)*(abs($intercept))/$slope);
printf("The correlation between the %g records in this set is: %g\n", scalar @xvalues, $correlation);
printf("This correlation is defined as %s%s.\n\n", $correlation_type, $correlation_strength);
printf("The coefficient of determination (r^2 value) of these records is: %g\n", $correlation**2);
printf("This means that %.1f%% of these records can be explained by the above equation.\n\n", 100*($correlation**2));
}
return 1;
}
1;
__END__
=head1 NAME
ScatterPlot - Perl extension for drawing ASCII scatter plots
=head1 SYNOPSIS
use ScatterPlot;
=head1 DESCRIPTION
This module will draw a quick and easy ASCII scatter plot. It has only two functions, new() and draw(). new() takes no arguments and creates a new ScatterPlot object. draw() can be called with no arguments to draw a sample test plot. You can call draw like this:
draw($xy_points);
where $xy_points is a reference to an array of (x,y) pairs. See the file ScatterPlot.pl for an example. The full call to draw is:
draw($xy_points, $x_size, $y_size, $x_label, $y_label, $char, $type, $reg_line);
where $xy_points is a reference to an array of (x,y) pairs, $x_size is an integer describing the width of the plot in characters, $y_size is an integer describing the height of the plot in characters, $x_label is a string for the horizontal axis label, $y_label is a string for the vertical axis lable, $char is the plot character, and $type is either 'text', 'html', or 'HTML'. If you are using CGI or sending the plot output to a web page, then use $type='html' or $type='HTML'.
The method draw() will automatically scale the plot to fit your data and draw the axes labels accordingly. The size of the output text will be $y_size lines of text, each of which is $x_size long in characters (plus line terminator). In text mode the plot begins with "\n" and ends with "\n", while in html mode the plot begins with "" and ends with "<\pre>". =head2 EXPORT none =head1 SEE ALSO The example file ScatterPlot.pl contains an example of how to use the ScatterPlot module. =head1 AUTHORS Les Hall, Einventor-66@comcast.netE Louis Romero, E louis_romero@hotmail.comE =head1 COPYRIGHT AND LICENSE Copyright 2007 by Les Hall This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut