#!/usr/bin/python
#emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
#ex: set sts=4 ts=4 sw=4 noet:
"""Script to help maintaining package definitions within Debian blends
task files.

Often it becomes necessary to duplicate the same information about a
package within debian packaging and multiple blends task files,
possibly of different blends.  This script allows to automate:

- construction of entries
- injection into task files
- modification of existing entries if things changed
- removal of previously injected entries

Possible TODOs:
---------------

* For every package the same task file might be re-read/written (if
  entry changed/added) from disk.
  That allows to replace easily original entry for 'source' package
  (listed as Suggests:) with actual first listed binary package.
  This should be taken into consideration if current per-package
  handling gets changed

"""

"""
Configuration
-------------

Paths to the blends top directories, containing tasks directories are
specified in ~/.blends-inject.cfg file, e.g.::

 [debian-med]
 path = ~/deb/debian-med/

 [debian-science]
 path = ~/deb/debian-science/

Definition of the fields for task files by default are looked up
within debian/blends, or files provided in the command line.  Also for "-a"
mode of operation you should define list of globs to match your debian/blends
files::

 [paths]
 all=~/deb/gits/pkg-exppsy/neurodebian/future/blends/*
     ~/deb/gits/*/debian/blends
     ~/deb/gits/pkg-exppsy/*/debian/blends
 # Python regular expression on which files to skip
 # Default is listed below
 #skip=.*[~#]$


Format of debian/blends
-----------------------

Example::

 ; If originally filed using project source name, and it is different
 ; from the primary (first) binary package name, keep 'Source' to be
 ; able to adopt previously included tasks entry
Source: brian

 ; Define the format on how entries should be handled.
 ; Possible values:
 ;   extended -- whenever package is not in Debian and additional
 ;               fields should be obtained from debian/*:
 ;               * License
 ;               * WNPP
 ;               * Pkg-Description
 ;               * Responsible
 ;               * Homepage
 ;               * Vcs-*
 ;   plain [default] -- only fields listed here should be mentioned.
 ;               Common use -- whenever package is already known to UDD.
 ;
 ;  By default, all fields specified previously propagate into following
 ;  packages as well.  If that is not desired, add suffix '-clean' to
 ;  the Format
Format: extended

Tasks: debian-science/neuroscience-modeling

 ; Could have Depends/Recommends/Suggests and Ignore
 ; All those define Pkg-Name field which is not included
 ; in the final "rendering" but is available as Pkg-Name item
Depends: python-brian
Pkg-URL: http://neuro.debian.net/pkgs/%(Pkg-Name)s.html
Language: Python, C
Published-Authors: Goodman D.F. and Brette R.
Published-Title: Brian: a simulator for spiking neural networks in Python
Published-In: Front. Neuroinform
Published-Year: 2008
Published-DOI: 10.3389/neuro.11.005.2008

 ; May be some previous entry should be removed, thus say so
Remove: python-brian-doc

 ;Tasks: debian-med/imaging-dev
 ;Why: Allows interactive development/scripting

 ; ; It should be possible to switch between formats,
 ; ; e.g. if some component is not yet in Debian
 ;Format: extended
 ;
 ; ; Now some bogus one but with customizations
 ;Tasks: debian-med/documentation
 ;Recommends: python-brian-doc
 ;Language:
 ;Remark: some remark
 ;

"""


import re, os, sys, tempfile, glob
from os.path import join, exists, expanduser, dirname, basename

from ConfigParser import ConfigParser
from optparse import OptionParser, Option

from copy import deepcopy
#from debian_bundle import deb822
from debian import deb822
#import deb822
from debian.changelog import Changelog

# all files we are dealing with should be UTF8, thus
# lets override
import codecs

def open(f, *args):
    return codecs.open(f, *args, encoding='utf-8')

__author__ = 'Yaroslav Halchenko'
__prog__ = os.path.basename(sys.argv[0])
__version__ = '0.0.7'
__copyright__ = 'Copyright (c) 2010 Yaroslav Halchenko'
__license__ = 'GPL'

# What fields initiate new package description
PKG_FIELDS = ('depends', 'recommends', 'suggests', 'ignore', 'remove')

# We might need to resort to assure some what a canonical order
# Prefixes for "standard" blends/tasks fields.  Others do not get embedded
# into tasks files
BLENDS_FIELDS_PREFIXES = ('depends', 'recommends', 'suggests', 'ignore',
                'why', 'homepage', 'language', 'wnpp', 'responsible', 'license',
                'vcs-', 'pkg-url', 'pkg-description',
                'published-', 'x-', 'registration', 'remark')
# Additional fields which might come useful (e.g. for filing wnpp bugs)
# but are not "standard" thus should be in the trailer
CUSTOM_FIELDS_PREFIXES = ('author', 'pkg-name', 'pkg-source',
                          'version', 'remove')
# Other fields should cause Error for consistency

FIELDS_ORDER = BLENDS_FIELDS_PREFIXES + CUSTOM_FIELDS_PREFIXES

verbosity = None

def error(msg, exit_code=1):
    sys.stderr.write(msg + '\n')
    sys.exit(exit_code)

def verbose(level, msg):
    if level <= verbosity:
        sys.stderr.write(" "*level + msg + '\n')


def parse_debian_blends(f='debian/blends'):
    """Parses debian/blends file

    Returns unprocessed list of customized Deb822 entries
    """
    # Linearize all the paragraphs since we are not using them
    items = []
    for p in deb822.Deb822.iter_paragraphs(open(f)):
        items += p.items()

    verbose(6, "Got items %s" % items)
    # Traverse and collect things
    format_ = 'plain'
    format_clean = False # do not propagate fields into a new pkg if True
    pkg, source = None, None
    pkgs = []
    tasks = []

    def new_pkg(prev_pkg, bname, sname, tasks):
        """Helper function to create a new package
        """
        if format_clean or prev_pkg is None:
            pkg = deb822.Deb822()
        else:
            pkg = deepcopy(prev_pkg)
            for k_ in PKG_FIELDS:   # prune older depends
                pkg.pop(k_, None)
        pkg['Pkg-Name'] = pkg[k] = bname.lower()
        if sname is not None:
            sname = sname.lower()
        pkg['Pkg-Source'] = sname
        pkgs.append(pkg)
        pkg.tasks = dict( (t.strip(), deb822.Deb822Dict()) for t in tasks )
        pkg.format = format_
        return pkg

    for k, v in items:

        kl = k.lower()

        if kl == 'source':
            source = v.strip()
        elif kl == 'format':
            format_ = v.strip()
            format_clean = format_.endswith('-clean')
            if format_clean:
                format_ = format_[:-6]
        elif kl == 'tasks':
            tasks = [x.strip() for x in v.split(',')]
            newtasks = pkg is not None      # either we need to provide tune-ups
                                            # for current package
        elif kl in PKG_FIELDS: # new package
            if source is None and not format_ in ['extended']:
                source = v
            pkg = new_pkg(pkg, v, source, tasks)
            newtasks = False
        else:
			if pkg is None:
				# So we had just source?
				if source is None:
					error("No package or source is known where to add %s" % (k,), 1)
					# TODO: just deduce source from DebianMaterials
				pkg = new_pkg(pkg, source, source, tasks)
				# Since only source is available, it should be only Suggest:-ed
				pkg['Suggests'] = source.lower()
				newtasks = False

            if newtasks:
                # Add customization
                for t in tasks:
                    if not t in pkg.tasks:
                        pkg.tasks[t] = deb822.Deb822Dict()
                    pkg.tasks[t][k] = v
            else:
                # just store the key in the pkg itself
                pkg[k] = v

    return pkgs


def expand_pkgs(pkgs, topdir='.'):
    """In-place modification of pkgs taking if necessary additional
    information from Debian materials, and pruning empty fields
    """
    verbose(4, "Expanding content for %d packages" % len(pkgs))
    debianm = None

    # Expand packages which format is extended
    for pkg in pkgs:
        if pkg.format == 'extended':
            # expanding, for that we need debian/control
            if debianm is None:
                debianm = DebianMaterials(topdir)
            for k, m in (('License', lambda: debianm.get_license(pkg['Pkg-Name'])),
                         ('WNPP', debianm.get_wnpp),
                         ('Pkg-Description',
                          lambda: debianm.get_description(pkg['Pkg-Name'])),
                         ('Responsible', debianm.get_responsible),
                         ('Homepage', lambda: debianm.source.get('Homepage', None)),
                         ('Pkg-source', lambda: debianm.source.get('Source', None)),
                         ):
                if pkg.get(k, None):
                    continue
                v = m()
                if v:
                    pkg[k] = v
            # VCS fields
            pkg.update(debianm.get_vcsfields())


def prefix_index(x, entries, strict=True, case=False, default=10000):
    """Returns an index for the x in entries
    """
    if not case:
        x = x.lower()
    for i, v in enumerate(entries):
        if x.startswith(v):
            return i

    if strict:
        raise IndexError(
            "Could not find location for %s as specified by %s" %
            (x, entries))
    return default


def key_prefix_compare(x, y, order, strict=True, case=False):
    """Little helper to help with sorting

    Sorts according to the order of string prefixes as given by
    `order`.  If `strict`, then if no matching prefix found, would
    raise KeyError; otherwise provides least priority to those keys
    which were not found in `order`
    """
    if not case:
        order = [v.lower() for v in order]

    cmp_res =  cmp(prefix_index(x[0], order, strict, case),
                   prefix_index(y[0], order, strict, case))
    if not cmp_res:                     # still unknown
        return cmp(x, y)
    return cmp_res


def group_packages_into_tasks(pkgs):
    """Given a list of packages (with .tasks) group them per each
    task and perform necessary customizations stored in .tasks
    """
    # Time to take care about packages and tasks
    # Unroll pkgs into a collection of pkgs per known task
    tasks = {}
    for pkg in pkgs:
        # Lets just create deepcopies with tune-ups for each task
        for itask, (task, fields) in enumerate(pkg.tasks.iteritems()):
            pkg_ = deepcopy(pkg)
            pkg_.update(fields)

            # Perform string completions and removals
            for k,v in pkg_.iteritems():
                pkg_[k] = v % pkg_
                if v is None or not len(v.strip()):
                    pkg_.pop(k)

            # Sort the fields according to FIELDS_ORDER. Unfortunately
            # silly Deb822* cannot create from list of tuples, so will do
            # manually
            pkg__ = deb822.Deb822()
            for k,v in sorted(pkg_.items(),
                              cmp=lambda x, y:
                              key_prefix_compare(x, y, order=FIELDS_ORDER)):
                pkg__[k] = v

            # Move Pkg-source/name into attributes
            pkg__.source = pkg__.pop('Pkg-Source')
            pkg__.name = pkg__.pop('Pkg-Name')
            # Store the action taken on the package for later on actions
            for f in PKG_FIELDS:
                if f in pkg__:
                    pkg__.action = f
                    break

            tasks[task] = tasks.get(task, []) + [pkg__]
    verbose(4, "Grouped %d packages into %d tasks: %s" %
            (len(pkgs), len(tasks), ', '.join(tasks.keys())))
    return tasks

def inject_tasks(tasks, config):
    # Now go through task files and replace/add entries
    for task, pkgs in tasks.iteritems():
        verbose(2, "Task %s with %d packages" % (task, len(pkgs)))
        blend, puretask = task.split('/')
        taskfile = expanduser(join(config.get(blend, 'path'), 'tasks', puretask))

        # Load the file
        stats = dict(Added=[], Modified=[])
        for pkg in pkgs:
            msgs = {'Name': pkg.name.strip(), 'Action': None}

            # Create a copy of the pkg with only valid tasks
            # fields:
            # TODO: make it configurable?
            pkg = deepcopy(pkg)
            for k in pkg:
                if prefix_index(k, BLENDS_FIELDS_PREFIXES,
                                strict=False, default=None) is None:
                    pkg.pop(k) # remove it from becoming present in
                               # the taskfile

            # Find either it is known to the task file already

            # Load entirely so we could simply manipulate
            entries = open(taskfile).readlines()
            known = False
            # We need to search by name and by source
            # We need to search for every possible type of dependency
            regexp_str = '^ *(%s) *: *(%s) *$' \
                         % ('|'.join(PKG_FIELDS),
                            '|'.join((pkg.name, pkg.source)).replace('+', '\+'))
            verbose(4, "Searching for presence in %s using regexp: '%s'"
                    % (taskfile, regexp_str))
            regexp = re.compile(regexp_str, re.I)
            for istart, e in enumerate(entries):
                if regexp.search(e):
                    verbose(4, "Found %s in position %i: %s" %
                            (pkg.name, istart, e.rstrip()))
                    known = True
                    break

            descr = ' ; Added by %s %s. [Please note here if modified manually]\n' % \
                    (__prog__,  __version__)

            entry = pkg.dump()
            # Replace existing entry?
            if known:
                # TODO: Check if previous copy does not have our preceding comment
                # Find the previous end
                icount = 1
                try:
                    while entries[istart+icount].strip() != '':
                        icount += 1
                except IndexError, e:
                    pass                # if we go beyond

                # Lets not change file without necessity, if entry is identical --
                # do nothing
                old_entry = entries[istart:istart+icount]

                if u''.join(old_entry) == entry:
                    # no changes -- just go to the next one
                    continue
                else: # Rewrite the entry
                   if __prog__ in entries[istart-1]:
                       istart -= 1
                       icount += 2
                   if 'remove' != pkg.action:
                       entry = descr + entry
                       msgs['Action'] = 'Changed'
                   else:
                       while entries[istart-1].strip() == '':
                           istart -=1
                           icount +=2
                       entry = ''
                       msgs['Action'] = 'Removed'
                   entries_prior = entries[:istart]
                   entries_post = entries[istart+icount:]
            elif not 'remove' == pkg.action:  # or Append one
                msgs['Action'] = 'Added'
                entries_prior = entries
                entry = descr + entry
                entries_post = []
                # could be as simple as
                # Lets do 'in full' for consistent handling of empty lines
                # around
                #output = '\n%s%s' % (descr, pkg.dump(),)
                #open(taskfile, 'a').write(output)

            if msgs['Action']:
                # Prepare for dumping
                # Prune spaces before
                while len(entries_prior) and entries_prior[-1].strip() == '':
                    entries_prior = entries_prior[:-1]
                if len(entries_prior) and not entries_prior[-1].endswith('\n'):
                    entries_prior[-1] += '\n' # assure present trailing newline
                # Prune spaces after
                while len(entries_post) and entries_post[0].strip() == '':
                    entries_post = entries_post[1:]
                if len(entries_post) and len(entry):
                    # only then trailing empty line
                    entry += '\n'
                output = ''.join(entries_prior + [ '\n' + entry ] + entries_post)
                open(taskfile, 'w').write(output) # then only overwrite

                verbose(3, "%(Action)s %(Name)s" % msgs)


class DebianMaterials(object):
    """Extract selected information from an existing debian/
    """
    _WNPP_RE = re.compile('^ *\* *Initial release.*closes:? #(?P<bug>[0-9]*).*', re.I)

    def __init__(self, topdir):
        #self.topdir = topdir
        self._debiandir = join(topdir, 'debian')
        self._source = None
        self._binaries = None

    @property
    def source(self):
        if self._source is None:
            self._assign_packages()
        return self._source

    @property
    def binaries(self):
        if self._binaries is None:
            self._assign_packages()
        return self._binaries

    def fpath(self, name):
        return join(self._debiandir, name)

    def _assign_packages(self):
        try:
            control = deb822.Deb822.iter_paragraphs(
                open(self.fpath('control')))
        except Exception, e:
            raise RuntimeError(
                  "Cannot parse %s file necessary for the %s package entry. Error: %s"
                  % (control_file, pkg['Pkg-Name'], str(e)))
        self._binaries = {}
        self._source = None
        for v in control:
            if v.get('Source', None):
                self._source = v
            else:
                # Since it might be hash-commented out
                if 'Package' in v:
                    self._binaries[v['Package']] = v

    def get_license(self, package=None, first_only=True):
        """Return a license(s). Parsed out from debian/copyright if it is
        in machine readable format
        """
        licenses = []
        # may be package should carry custom copyright file
        copyright_file_ = self.fpath('%s.copyright' % package)
        if package and exists(copyright_file_):
            copyright_file = copyright_file_
        else:
            copyright_file = self.fpath('copyright')

        try:
            for p in deb822.Deb822.iter_paragraphs(open(copyright_file)):
                if not 'Files' in p or p['Files'].strip().startswith('debian/'):
                    continue
                l = p['License']
                # Take only the short version of first line
                l = re.sub('\n.*', '', l).strip()
                if not len(l):
                    l = 'custom'
                if not l in licenses:
                    licenses.append(l)
                    if first_only:
                        break
        except Exception, e:
            # print e
            return None
        return ', '.join(licenses)

    def get_wnpp(self):
        """Search for a template changelog entry closing "Initial bug
        """
        for l in open(self.fpath('changelog')):
            rr = self._WNPP_RE.match(l)
            if rr:
                return rr.groupdict()['bug']
        return None

    def get_responsible(self):
        """Returns responsible, atm -- maintainer
        """
        return self.source['Maintainer']

    def get_vcsfields(self):
        vcs = deb822.Deb822()
        for f,v in self._source.iteritems():
            if f.lower().startswith('vcs-'):
                vcs[f] = v
        return vcs

    def get_description(self, pkg_name):
        """Some logic to extract description.

           If binary package matching pkg_name is found -- gets it description.
           If no binary package with such name, and name matches source name,
           obtain description of the first binary package.
        """
        if pkg_name in self.binaries:
            pass
        elif pkg_name.lower() == self.source['Source'].lower():
            pkg_name = self.binaries.keys()[0]
        else:
            error("Name %s does not match any binary, nor source package in %s"
                  % (pkg_name, self))
        return self.binaries[pkg_name]['Description']

def print_wnpp(pkgs, config, wnpp_type="ITP"):
    """Little helper to spit out formatted entry for WNPP bugreport

    TODO: It would puke atm if any field is missing
    """

    pkg = pkgs[0]                       # everything is based on the 1st one
    opts = dict(pkg.items())
    opts['WNPP-Type'] = wnpp_type.upper()
    opts['Pkg-Description-Short'] = re.sub('\n.*', '', pkg['Pkg-Description'])

    subject = "%(WNPP-Type)s: %(Pkg-Name)s -- %(Pkg-Description-Short)s" % opts
    body = """*** Please type your report below this line ***

* Package name    : %(Pkg-Name)s
  Version         : %(Version)s
  Upstream Author : %(Author)s
* URL             : %(Homepage)s
* License         : %(License)s
  Programming Lang: %(Language)s
  Description     : %(Pkg-Description)s

""" % opts

    # Unfortunately could not figure out how to set the owner, so I will just print it out
    if False:
        tmpfile = tempfile.NamedTemporaryFile()
        tmpfile.write(body)
        tmpfile.flush()
        cmd = "reportbug -b --paranoid --subject='%s' --severity=wishlist --body-file='%s' -o /tmp/o.txt wnpp" \
              % (subject, tmpfile.name)
        verbose(2, "Running %s" %cmd)
        os.system(cmd)
    else:
        print "Subject: %s\n\n%s" % (subject, body)


def is_template(p):
    """Helper to return true if pkg definition looks like a template
       and should not be processed
    """
    # We might want to skip some which define a skeleton
    # (no source/homepage/etc although fields are there)
    for f in ['vcs-browser', 'pkg-url', 'pkg-description',
              'published-Title', 'pkg-name', 'homepage',
              'author']:
        if f in p and p[f] != "":
            return False
    return True


def main():

    p = OptionParser(
                usage="%s [OPTIONS] [blends_files]\n\n" % __prog__ + __doc__,
                version="%prog " + __version__)

    p.add_option(
        Option("-d", "--topdir", action="store",
               dest="topdir", default=None,
               help="Top directory of a Debian package. It is used to locate "
               "'debian/blends' if none is specified, and where to look for "
               "extended information."))

    p.add_option(
        Option("-c", "--config-file", action="store",
               dest="config_file", default=join(expanduser('~'), '.%s.cfg' % __prog__),
               help="Noise level."))

    p.add_option(
        Option("-v", "--verbosity", action="store", type="int",
               dest="verbosity", default=1, help="Noise level."))

    # We might like to create a separate 'group' of options for commands
    p.add_option(
        Option("-w", action="store_true",
               dest="wnpp", default=False,
               help="Operate in WNPP mode: dumps cut-paste-able entry for WNPP bugreport"))

    p.add_option(
        Option("--wnpp", action="store",
               dest="wnpp_mode", default=None,
               help="Operate in WNPP mode: dumps cut-paste-able entry for WNPP bugreport"))

    p.add_option(
        Option("-a", action="store_true",
               dest="all_mode", default=False,
               help="Process all files listed in paths.all"))


    (options, infiles) = p.parse_args()
    global verbosity; verbosity = options.verbosity

	if options.wnpp and options.wnpp_mode is None:
	     options.wnpp_mode = 'ITP'

    # Load configuration
    config = ConfigParser(defaults={'skip': '.*[~#]$'})
    config.read(options.config_file)

    if options.all_mode:
        if len(infiles):
            raise ValueError("Do not specify any files in -a mode.  Use configuration file, section paths, option all")
        globs = config.get('paths', 'all', None).split()
        infiles = reduce(list.__add__, (glob.glob(expanduser(f)) for f in globs))
        verbose(1, "Found %d files in specified paths" % len(infiles))

    if not len(infiles):
        infiles = [join(options.topdir or './', 'debian/blends')]     #  default one

    skip_re = re.compile(config.get('paths', 'skip', None))

    for blends_file in infiles:
        verbose(1, "Processing %s" % blends_file)
        if not exists(blends_file):
            error("Cannot find a file %s.  Either provide a file or specify top "
                  "debian directory with -d." % blends_file, 1)
        if skip_re.match(blends_file):
            verbose(2, "W: Skipped since matches paths.skip regexp")
            continue
        pkgs = parse_debian_blends(blends_file)
        if options.topdir is None:
            if dirname(blends_file).endswith('/debian'):
                topdir = dirname(dirname(blends_file))
            else:
                topdir = '.'            # and hope for the best ;)
        else:
            topdir = options.topdir

		expand_pkgs(pkgs, topdir=topdir)

        pkgs = [p for p in pkgs if not is_template(p)]
        if not len(pkgs):
            verbose(2, "W: Skipping since seems to contain templates only")
            continue
        if options.wnpp_mode is not None:
		    print_wnpp(pkgs, config, options.wnpp_mode)
        else:
            # by default -- operate on blends/tasks files
            tasks = group_packages_into_tasks(pkgs)
            inject_tasks(tasks, config)


if __name__ == '__main__':
    main()

