#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
use autodie;

use FindBin '$Bin';
use lib "$Bin/lib";

use Vnlog::Util qw(parse_options read_and_preparse_input ensure_all_legends_equivalent reconstruct_substituted_command);



# This comes from the struct option longopts in uniq.c in GNU coreutils
my @specs = ( # options with no args
             "count|c",
             "repeated|d",
             "ignore-case|i",
             "unique|u",
             "zero-terminated|z",

             # options with args
             "all-repeated|D:s",
             "skip-fields|f=i",
             "skip-chars|s=i",
             "check-chars|w=i",
             "group:s",

             "vnl-count=s",

             "help|h");


my %options_unsupported = ( 'zero-terminated' => <<'EOF'
vnlog is built on assuming a particular record separator
EOF
                          );

my ($filenames,$options) = parse_options(\@ARGV, \@specs, 0, <<EOF);
  $0 [options] < logfile

A very common usage takes no options: suppresses duplicate consecutive records.

The most common options are (from the GNU uniq manpage):

  -c, --count
         prefix lines by the number of occurrences

  -d, --repeated
         only print duplicate lines, one for each group

  -D     print all duplicate lines

  --all-repeated[=METHOD]
         like  -D,  but  allow  separating  groups with an empty line;
         METHOD={none(default),prepend,separate}

  -f, --skip-fields=N
         avoid comparing the first N fields (vnl-uniq extension: N<0 avoids
         comparing all but the LAST -N fields. To use ONLY the one last field,
         pass -f -1 or --skip-fields=-1)

  --group[=METHOD]
         show  all  items,  separating  groups  with  an  empty  line;
         METHOD={separate(default),prepend,append,both}

  -i, --ignore-case
         ignore differences in case when comparing

  -u, --unique
         only print unique lines

There's also one vnlog-specific option

  --vnl-count NAME
         Implies -c. Like -c, adds a column of occurrence counts, but gives this
         column an arbitrary name given on the commandline
EOF

if( $options->{'vnl-count'} )
{
    $options->{count} = 1;
}
if( defined $options->{'all-repeated'}      &&
    length($options->{'all-repeated'}) > 0  &&
    $options->{'all-repeated'} ne 'none'    &&
    $options->{'all-repeated'} ne 'prepend' &&
    $options->{'all-repeated'} ne 'separate'  )
{
    die "--all-repeated=XXX was given. XXX may be ONLY one of 'none','prepend','separate'. Got '" . $options->{'all-repeated'} . "'";
}
if( defined $options->{'group'} )
{
    if( length($options->{'group'}) > 0   &&
        $options->{'group'} ne 'separate' &&
        $options->{'group'} ne 'prepend'  &&
        $options->{'group'} ne 'append'   &&
        $options->{'group'} ne 'both' )
    {
        die "--group=XXX was given. XXX may be ONLY one of 'separate','prepend','append','both'. Got '" . $options->{'group'} . "'";
    }

    if( $options->{count}                  ||
        $options->{repeated}               ||
        defined $options->{'all-repeated'} ||
        $options->{unique} )
    {
        # The underlying uniq tool dictates this
        die "--group is mutually exclusive with -c/-d/-D/-u";
    }
}

if(@$filenames != 1)
{
    die "At most one file may be given on the commandline: the input";
}

for my $key(keys %$options)
{
    if($options_unsupported{$key})
    {
        my $keyname = length($key) == 1 ? "-$key" : "--$key";
        die("I don't support $keyname: $options_unsupported{$key}");
    }
}

my $inputs = read_and_preparse_input($filenames);

if( defined $options->{'skip-fields'} &&
    $options->{'skip-fields'} < 0 )
{
    # I want to use only the last -N fields, so I skip the first Nfields-N
    # fields
    my $Nfields = @{$inputs->[0]{keys}};
    $options->{'skip-fields'} += $Nfields;
}

my $ARGV_new = reconstruct_substituted_command($inputs, $options, [], \@specs);

print '# ';
if($options->{count})
{
    print (($options->{'vnl-count'} || 'count') . ' ');
}
say join(' ', @{$inputs->[0]{keys}});
exec 'uniq', @$ARGV_new;



__END__

=head1 NAME

vnl-uniq - uniq a log file, preserving the legend

=head1 SYNOPSIS

 $ cat colors.vnl
 # color
 blue
 yellow
 yellow
 blue
 yellow
 orange
 orange

 $ < colors.vnl | vnl-sort | vnl-uniq -c
 # count color
       2 blue
       2 orange
       3 yellow

=head1 DESCRIPTION

  Usage: vnl-uniq [options] < logfile

This tool runs C<uniq> on a given vnlog dataset. C<vnl-uniq> is a wrapper around
the GNU coreutils C<uniq> tool. Since this is a wrapper, most commandline
options and behaviors of the C<uniq> tool are present; consult the L<uniq(1)>
manpage for detail. The differences from GNU coreutils C<uniq> are

=over

=item *

The input and output to this tool are vnlog files, complete with a legend

=item *

C<--zero-terminated> is not supported because vnlog assumes
newline-separated records

=item *

Only I<one> input is supported (a file on the cmdline or data on standard
input), and the output I<always> goes to standard output. Specifying the output
as a file on the commandline is not supported.

=item *

C<--vnl-count NAME> can be given to name the C<count> column. C<-c> is still
supported to add the default new column named C<count>, but if another name is
wanted, C<--vnl-count> does that. C<--vnl-count> implies C<-c>

=item *

In addition to the normal behavior of skipping fields at the start, C<-f> and
C<--skip-fields> can take a negative argument to skip the I<all but the last> N
fields. For instance, to use only the one last field, pass C<-f -1> or
C<--skip-fields=-1>.

=back

Past that, everything C<uniq> does is supported, so see that man page for
detailed documentation.

=head1 BUGS

This and the other C<vnl-xxx> tools that wrap coreutils are written specifically
to work with the Linux kernel and the GNU coreutils. None of these have been
tested with BSD tools or with non-Linux kernels, and I'm sure things don't just
work. It's probably not too effortful to get that running, but somebody needs to
at least bug me for that. Or better yet, send me nice patches :)

=head1 SEE ALSO

L<uniq(1)>

=head1 REPOSITORY

https://github.com/dkogan/vnlog/

=head1 AUTHOR

Dima Kogan C<< <dima@secretsauce.net> >>

=head1 LICENSE AND COPYRIGHT

Copyright 2019 Dima Kogan C<< <dima@secretsauce.net> >>

This library is free software; you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 2.1 of the License, or (at your option) any
later version.

=cut
