#!/usr/bin/perl

# ===========================================================================
# APPLICATION: BLASTaid
# AUTHOR:      Todd Wylie
# CONTACT:     perldev@monkeybytes.org
# VERSION:     $Id: BLASTaid 3 2006-02-13 19:31:01Z Todd Wylie $
# ===========================================================================

use strict;
use warnings;
use Getopt::Long;
use IO::File;
use BLASTaid;

# ---------------------------------------------------------------------------
# COMMAND LINE OPTIONS
# ---------------------------------------------------------------------------
# --report  : Path/name of required WU-BLAST report to index.
# --index   : Path/name of index which will be created (required).
# --query   : Specific name of query to pull back report from report.
# --list    : Path/name of user supplied list of queries.
my ($report, $index, $query, $list);
GetOptions(
           "report=s" => \$report,
           "index=s"  => \$index,
           "query=s"  => \$query,
           "list=s"   => \$list,
           );

# ---------------------------------------------------------------------------
# PARAMETER AND FILE VALIDATION
# ---------------------------------------------------------------------------
# Make sure the user supplies all required arguments or exit.
if (
    (!$report)    ||
    (!$index)     
    ) {
    _fatal_error_exit(
        code   => '0',
        error  => 'MISSING COMMAND LINE ARGUMENT',
        reason => 'USAGE: BLASTaid --report <file> --index <file> (opt: --query "<query name>")',
                      );
}

# ---------------------------------------------------------------------------
# CALL BLASTAID MODULE AND CREATE OR LOAD THE INDEX
# ---------------------------------------------------------------------------
# If index does not exist then it will be created. If it does exist,
# load it into the object. If a QUERY is sent, retrieve & print the
# report. If the LIST option is invoked, load the given LIST file and
# loop through retrieving each report.
my $blast  = BLASTaid->new( report => $report, index => $index );
if (defined $list && defined $query) {                  
    _fatal_error_exit(
        code   => '1',
        error  => 'INCOMPATIBLE ARGUMENTS [--query & --list]',
        reason => 'USAGE: BLASTaid --report <file> --index <file> (opt: --query OR --list)',
                      );
}
elsif (defined $query) {
    # Single query pull.
    my $report = $blast->return_report( query => $query );
    print $report;
}
elsif (defined $list) {
    # A list of queries to return.
    if (!-f $list) {
        _fatal_error_exit(
            code   => '2',
            error  => 'LIST FILE ERROR',
            reason => 'The provided --list file is not a text file.',
                          );
    }
    else {
        my @wanted_queries;
        my $in = new IO::File;
        $in->open($list) or die "could not open $list";
        while (<$in>) {
            chomp;
            push(@wanted_queries, $_);
        }
        $in->close;
        foreach my $query (@wanted_queries) {
            my $string = $blast->return_report( query => $query );
            print $string;        }
    }
}

# FATAL ERROR EXIT:
# Generic error reporting for fatal errors in the typescript.pl
# application. Use this routine for *controlled* errors.
sub _fatal_error_exit {
    my %arg = @_;
    print "\nBLASTaid (exit code $arg{code})\n";
    print "-" x 79, "\n";
    print $arg{error}, "\n";
    print $arg{reason}, "\n";
    print "-" x 79, "\n\n";
    exit;
}

__END__

# --------------------------------------------------------------------------
# P O D : (area below reserved for documentation)  
# ==========================================================================

=head1 NAME

BLASTaid - A simple interface for byte indexing a WU-BLAST multi-part report for faster access.


=head1 VERSION

This document describes BLASTaid version 0.0.1


=head1 SYNOPSIS

    BLASTaid --report <file> --index <file> (opt: --query OR --list)

    EX: BLASTaid --report "report.blast" --index "/tmp/report.index" --query "gi|71143145|ref|NM_006300.2|"
    EX: BLASTaid --report "report.blast" --index "/tmp/report.index" --list "queries.fof"
  
=head1 DESCRIPTION

This script is a simple interface to the BALSTaid.pm module and aids in accessing specific reports from longer, multi part WU-BLAST (http://blast.wustl.edu/) alignments reports. Depending on parameters and starting input, BLAST reports may be several gigabytes in size. Extremely large files can prove to be problematic and rate-limiting in post-process analysis. BLASTaid takes a multi-part BLAST report and creates a byte index. Specific reports may be pulled directly from the larger set by jumping directly to the entry via the byte-index. The index file need only be created one time per BLAST report... the script automatically uses a supplied index file if it already exists. A user may also also loop through a list of queries in the report in a systematic way by invoking the --list option. Retrieval is always based around QUERY name, as this should be a unique value in the BLAST report. When BLASTaid makes QUERY names, it takes the first section of the BLAST report's "Query=" line before any white space.


=head1 INTERFACE 

The following options are supported:

=head2 --report

report: Path/name of the WU-BLAST report to parse and index. [REQUIRED]

=head2 --index

index: Path/name of the byte index file to be written--or possibly already exists, in which case it will be loaded. [REQUIRED]

=head2 --query 

query: Specific query name (as it appears in the byte index file) used to return a single report. [OPTIONAL]

=head2 --list

list: Path/name of a file which lists a number of queries to return reports for. Should be one query per line, as follows. [OPTIONAL]

 gi|45238847|ref|NM_000945.3|
 gi|71143145|ref|NM_006300.2|
 gi|56121814|ref|NM_030957.2|
 ...


=head1 CONFIGURATION AND ENVIRONMENT

BLASTaid requires no configuration files or environment variables. This distribution supplies the perl script "BLASTaid", which is a command line interface for BLASTaid.pm's main functions. The default installation is /usr/bin/. If you are installing on a Windows based system, please manually place the BLASTaid script somewhere in your perl path.


=head1 DEPENDENCIES

This script requires the BLASTaid.pm module.


=head1 INCOMPATIBILITIES

None reported.


=head1 BUGS AND LIMITATIONS

No bugs have been reported. This script is limited to parsing WU-BLAST style reports--it does not currently support NCBI or other formats.

Please report any bugs or feature requests to
C<bug-blastaid@rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org>.


=head1 AUTHOR

Todd Wylie  

C<< <perldev@monkeybytes.org> >>  

L<< http://www.monkeybytes.org >>


=head1 LICENSE AND COPYRIGHT

Copyright (c) 2006, Todd Wylie C<< <perldev@monkeybytes.org> >>. All rights reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perlartistic.


=head1 DISCLAIMER OF WARRANTY

BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
NECESSARY SERVICING, REPAIR, OR CORRECTION.

IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

=head1 NOTE

This software was written using the latest version of GNU Emacs, the
extensible, real-time text editor. Please see
L<http://www.gnu.org/software/emacs> for more information and download
sources.


