/*
 * tagged collection - Experimental programs to test and study tagged collections
 *
 * Copyright (C) 2003--2008  Enrico Zini <enrico@enricozini.org>
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#define APPNAME PACKAGE
#else
#warning No config.h found: using fallback values
#define APPNAME __FILE__
#define VERSION "unknown"
#endif

#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>

#include <stdio.h>

#include <stdlib.h>	// getenv

#include <wibble/exception.h>
#include <wibble/operators.h>

#include <tagcoll/stream/filters.h>
#include <tagcoll/stream/sink.h>
#include <tagcoll/stream/substitutions.h>
#include <tagcoll/stream/patcher.h>
#include <tagcoll/stream/expression.h>

#include <tagcoll/SmartHierarchy.h>

#include <tagcoll/coll/simple.h>
#include <tagcoll/Implications.h>
#include <tagcoll/patch.h>
#include <tagcoll/DerivedTags.h>
#include <tagcoll/coll/fast.h>

#include <tagcoll/input/stdio.h>
#include <tagcoll/TextFormat.h>

#include <algorithm>
#include <iostream>
#include <sstream>

#include "TagcollParser.h"
#include "Timing.h"

using namespace std;
using namespace tagcoll;
using namespace wibble::operators;


bool isdir(const std::string& file)
{
	struct stat st;
	if (stat(file.c_str(), &st) == -1)
		throw wibble::exception::System("trying to stat file " + file);
	return S_ISDIR(st.st_mode);
}

void printItems(const set<string>& items, const string& prefix = "")
{
	for (set<string>::const_iterator i = items.begin();
			i != items.end(); i++)
		cout << prefix << *i << endl;
}

template<typename COLL>
void printNode(HierarchyNode<COLL>* node, string prefix = "")
{
	std::set<string> items = node->getItems();
	if (!items.empty())
	{
		printItems(items, prefix + ": ");
	} else {
		cout << prefix << endl;
//		printf("%.*s: no items\n", PFSTR(prefix));
	}
	
	if (prefix.size() > 0 && prefix[prefix.size() - 1] != '/')
		prefix += '/';

	for (typename HierarchyNode<COLL>::iterator i = node->begin();
			i != node->end(); i++)
	{
		printNode(*i, prefix + (*i)->tag());
	}
}

template<typename COLL>
std::set<string> getItems(HierarchyNode<COLL>* node)
{
	std::set<string> items = node->getItems();
	
	for (typename HierarchyNode<COLL>::iterator i = node->begin();
			i != node->end(); i++)
		items |= getItems(*i);

	return items;
}

template<typename OUT>
void readCollection(const string& file, const OUT& out)
{
	if (file == "-")
	{
		input::Stdio input(stdin, "<stdin>");
		textformat::parse(input, out);
	}
	else
	{
		input::Stdio input(file);
		textformat::parse(input, out);
	}
}


PatchList<string, string> readPatches(const string& file)
{
	if (file == "-")
	{
		input::Stdio input(stdin, "<stdin>");
		return textformat::parsePatch(input);
	}
	else if (isdir(file))
	{
		PatchList<string, string> patches;
		DIR* dir = opendir(file.c_str());
		if (dir == NULL)
			throw wibble::exception::System("reading directory " + file);
		while (struct dirent* d = readdir(dir))
		{
			if (d->d_name[0] == '.')
				continue;
			input::Stdio input(file + '/' + d->d_name);
			patches.addPatch(textformat::parsePatch(input));
		}
		closedir(dir);
		return patches;
	}
	else
	{
		input::Stdio input(file);
		return textformat::parsePatch(input);
	}
}

void parseDerivedTags(input::Input& in, DerivedTags& output)
{
	string tag;
	string expr;

	int c;
	enum {TAG, TAGCOLON, SEXPR, EXPR} state = TAG;
	int line = 1;
	while ((c = in.nextChar()) != input::Input::Eof)
	{
		if (c == '\n')
		{
			if (tag.size() > 0 && expr.size() > 0)
				output.add(tag, expr);
			else
				fprintf(stderr, "In derived tags file, ignoring incomplete line %d.\n", line);
			tag = string();
			expr = string();
			state = TAG;
			line++;
		} else
			switch (state)
			{
				// Read item
				case TAG:
					switch (c)
					{
						case ':':
							state = TAGCOLON;
							break;
						default:
							tag += c;
							break;
					}
					break;
				// After colon on item
				case TAGCOLON:
					switch (c)
					{
						case ' ':
						case '\t':
							state = SEXPR;
							break;
						case ':':
							tag += c;
							break;
						default:
							tag += ':';
							tag += c;
							state = EXPR;
							break;
					}
					break;
				// Space before tag
				case SEXPR:
					switch (c)
					{
						case ' ':
						case '\t':
							break;
						default:
							expr += c;
							state = EXPR;
							break;
					}
					break;
				// Read tag
				case EXPR:
					expr += c;
					break;
			}
	}
}

void readDerivedTags(const string& file, DerivedTags& derivedTags)
{
	if (file == "-")
	{
		input::Stdio input(stdin, "<stdin>");
		parseDerivedTags(input, derivedTags);
	}
	else
	{
		input::Stdio input(file);
		parseDerivedTags(input, derivedTags);
	}
}


/**
 * Serialize tags to strings
 */
template<typename OUT>
class TagFormatter : public wibble::mixin::OutputIterator< TagFormatter<OUT> >
{
	OUT out;

	template<typename T>
	std::string format(const T& value)
	{
		std::stringstream str;
		str << value;
		return str.str();
	}

public:
	TagFormatter(const OUT& out) : out(out) {}

	template<typename ITEMS, typename TAGS>
	TagFormatter<OUT>& operator=(const std::pair<ITEMS, TAGS>& data)
	{
		std::set<std::string> formatted;
		for (typename TAGS::const_iterator i = data.second.begin();
				i != data.second.end(); ++i)
			formatted.insert(format(*i));
		*out = make_pair(data.first, formatted);
		++out;
		return *this;
	}
};

template<typename OUT>
TagFormatter<OUT> tagFormatter(const OUT& out)
{
	return TagFormatter<OUT>(out);
}

// Break filtering into generic steps that are abstracted by a virtual function

class FilterStep
{
public:
	virtual ~FilterStep() {}
	virtual FilterStep& operator=(const pair< set<string>, set<string> >& data) = 0;
	virtual FilterStep& operator=(const pair< wibble::Singleton<string>, set<string> >& data) = 0;
	virtual FilterStep& operator=(const pair< set<string>, wibble::Empty<string> >& data) = 0;
	virtual FilterStep& operator=(const pair< wibble::Singleton<string>, wibble::Empty<string> >& data) = 0;
	FilterStep& operator++() { return *this; }
	FilterStep& operator*() { return *this; }
};

template<typename OUT>
class StringFilterStep : public FilterStep
{
	OUT out;

public:
	StringFilterStep(const OUT& out) : out(out) {}
	virtual ~StringFilterStep() {}

	virtual FilterStep& operator=(const pair< set<string>, set<string> >& data)
	{
		*out = data;
		++out;
		return *this;
	}
	virtual FilterStep& operator=(const pair< wibble::Singleton<string>, set<string> >& data)
	{
		*out = data;
		++out;
		return *this;
	}
	virtual FilterStep& operator=(const pair< set<string>, wibble::Empty<string> >& data)
	{
		*out = data;
		++out;
		return *this;
	}
	virtual FilterStep& operator=(const pair< wibble::Singleton<string>, wibble::Empty<string> >& data)
	{
		*out = data;
		++out;
		return *this;
	}
};

template<typename OUT>
FilterStep* filterStep(const OUT& out)
{
	return new StringFilterStep<OUT>(out);
}

class FilterForwarder : public wibble::mixin::OutputIterator<FilterForwarder>
{
	FilterStep* out;

public:
	FilterForwarder(FilterStep* out) : out(out) {}

	template<typename ITEMS, typename TAGS>
	FilterForwarder& operator=(const pair<ITEMS, TAGS>& data)
	{
		**out = data;
		++*out;
		return *this;
	}
};

class FilterStepManager
{
	// Use a vector to be able to deallocate in reverse order
	std::vector<FilterStep*> m_managed;
public:
	~FilterStepManager()
	{
		for (std::vector<FilterStep*>::const_reverse_iterator i = m_managed.rbegin();
				i != m_managed.rend(); ++i)
			if (*i)
				delete *i;
	}

	FilterStep* manage(FilterStep* fs)
	{
		// Avoid double insert
		for (std::vector<FilterStep*>::const_iterator i = m_managed.begin();
				i != m_managed.end(); ++i)
			if (*i == fs)
				return fs;

		m_managed.push_back(fs);
		return fs;
	}
};

// Construct the elaborate readers and writers by chunks, to avoid having a
// virtual layer among each and every filtering step
class Tagcoll
{
	wibble::commandline::TagcollParser& opts;
	coll::Fast<string, string> grouper;

	FilterStepManager wman;
	FilterStep* m_writer;

	// Support structures for the input filter chain
	Implications<string> implications;
	DerivedTags derivedTags;
	stream::Substitutions<string> substitutions;
	PatchList<string, string> patches;

	FilterStep* makeReaderHead(FilterStep* out)
	{
		if (opts.in_patch->isSet())
			if (opts.in_rename->isSet())
				return filterStep(
						stream::patcher(patches,
							stream::substitute(substitutions,
								FilterForwarder(out))));
			else
				return filterStep(
						stream::patcher(patches,
							FilterForwarder(out)));
		else
			if (opts.in_rename->isSet())
				return filterStep(
							stream::substitute(substitutions,
								FilterForwarder(out)));
			else
				return out;
	}

	FilterStep* makeReaderMain(FilterStep* out)
	{
		bool hasImpl = opts.in_extimpl->isSet();
		bool hasDerv = opts.in_derived->isSet();

		if (hasImpl)
			if (hasDerv)
				// Add derived tags computing them using the expanded tag set,
				// then adding further tags implicated by the derived tags
				return filterStep(
						addImplied(implications,
							addDerived(derivedTags,
								addImplied(implications, FilterForwarder(out)))));
			else
				return filterStep(addImplied(implications, FilterForwarder(out)));
		else
			if (hasDerv)
				return filterStep(addDerived(derivedTags, FilterForwarder(out)));
			else
				return out;
	}

	template<typename OUT>
	FilterStep* makeReaderTail(const OUT& out)
	{
		if (opts.in_rmunfaceted->boolValue())
			if (opts.in_rmtags->boolValue())
				return filterStep(
						stream::unfacetedRemover(
							stream::filterTagsByExpression(
								not Expression(opts.in_rmtags->stringValue()), out)));
			else
				return filterStep(stream::unfacetedRemover(out));
		else
			if (opts.in_rmtags->boolValue())
				return filterStep(
						stream::filterTagsByExpression(
							not Expression(opts.in_rmtags->stringValue()), out));
			else
				return filterStep(out);
	}

	FilterStep* makeWriterHead(FilterStep* out)
	{
		// Intermix implications and derived tags as seems best
		if (!opts.out_redundant->boolValue())
			if (derivedTags.empty())
				if (implications.empty())
					return out;
				else
					return filterStep(removeImplied(implications, FilterForwarder(out)));
			else
				if (implications.empty())
					return filterStep(removeDerived(derivedTags, FilterForwarder(out)));
				else
					// Expand implications, then remove derived tags computing
					// them using the expanded tag set
					return filterStep(
							addImplied(implications,
								removeDerived(derivedTags,
									removeImplied(implications,
										FilterForwarder(out)))));
		else
			if (derivedTags.empty())
				if (implications.empty())
					return out;
				else
					return filterStep(addImplied(implications, FilterForwarder(out)));
			else
				if (implications.empty())
					return filterStep(addDerived(derivedTags, FilterForwarder(out)));
				else
					// Expand implications, then add derived tags computing
					// them using the expanded tag set
					return filterStep(
							addImplied(implications,
								addDerived(derivedTags,
									addImplied(implications,
										FilterForwarder(out)))));
	}

	FilterStep* makeWriterTail()
	{
		if (opts.out_itemsOnly->boolValue())
			if (opts.out_group->boolValue())
				return filterStep(stream::itemsOnly(inserter(grouper)));
			else
				return filterStep(stream::itemsOnly(stream::ungroupItems(textformat::StdioWriter(stdout))));
		else if (opts.out_count->boolValue())
			if (opts.out_group->boolValue())
				return filterStep(stream::tagCounter(tagFormatter(inserter(grouper))));
			else
				return filterStep(stream::tagCounter(tagFormatter(stream::ungroupItems(textformat::StdioWriter(stdout)))));
		else
			if (opts.out_group->boolValue())
				return filterStep(inserter(grouper));
			else
				return filterStep(stream::ungroupItems(textformat::StdioWriter(stdout)));
	}

public:
	Tagcoll(wibble::commandline::TagcollParser& opts)
		: opts(opts), m_writer(0)
	{
		if (opts.in_extimpl->isSet())
		{
			readCollection(opts.in_extimpl->stringValue(), inserter(implications));
			// Pack the structure for faster expansions
			implications.pack();
		}
		if (opts.in_derived->isSet())
			readDerivedTags(opts.in_derived->stringValue(), derivedTags);
		if (opts.in_rename->isSet())
			readCollection(opts.in_rename->stringValue(), substitutions.inserter());
		if (opts.in_patch->isSet())
			patches = readPatches(opts.in_patch->stringValue());
	}
	~Tagcoll()
	{
		if (!grouper.empty())
			grouper.output(textformat::StdioWriter(stdout));
	}

	template<typename OUT>
	void readFile(const string& file, const OUT& out)
	{
		FilterStepManager m;
		FilterStep* reader =
			m.manage(makeReaderHead(
						m.manage(makeReaderMain(
								m.manage(makeReaderTail(out))))));

		readCollection(file, FilterForwarder(reader));
	}

	template<typename OUT>
	void readAll(const OUT& out)
	{
		FilterStepManager m;
		FilterStep* reader =
			m.manage(makeReaderHead(
						m.manage(makeReaderMain(
								m.manage(makeReaderTail(out))))));
		
		if (opts.hasNext())
			while (opts.hasNext())
				readCollection(opts.next(), FilterForwarder(reader));
		else
			readCollection("-", FilterForwarder(reader));
	}

	FilterForwarder writer()
	{
		if (!m_writer)
			m_writer =
				wman.manage(makeWriterHead(
					wman.manage(makeWriterTail())));
		return FilterForwarder(m_writer);
	}

	void copy()
	{
		readAll(writer());
	}

	int grep(const std::string& expr)
	{
		int countItems = 0, countTags = 0;
		stream::ExpressionFilter::MatchType type =
			opts.misc_invert->boolValue() ?
			stream::ExpressionFilter::INVERTED : stream::ExpressionFilter::PLAIN;

		if (opts.misc_quiet->boolValue())
			readAll(stream::filterItemsByExpression(expr, type, stream::countingSink(countItems, countTags)));
		else
			readAll(stream::filterItemsByExpression(expr, type,
						teeFilter(writer(), stream::countingSink(countItems, countTags))));

		return countItems > 0 ? 0 : 1;
	}
};

int main(int argc, const char* argv[])
{
	wibble::commandline::TagcollParser opts;

	try {
		// Parse commandline.
		// If the parser took care of the action, we can exit just fine.
		if (opts.parse(argc, argv))
			return 0;
		Tagcoll tagcoll(opts);
		
		// Perform the correct operation
		if (opts.foundCommand() == opts.implications)
		{
			coll::Fast<string, string> coll;
			tagcoll.readAll(inserter(coll));

			Implications<string> newImpls;

			// Find tag implications
			newImpls.addFrom(coll);

			newImpls.pack();

			if (opts.out_redundant->boolValue())
				newImpls.outputFull(textformat::StdioWriter(stdout));
			else
				newImpls.output(textformat::StdioWriter(stdout));
		}
		else if (opts.foundCommand() == opts.hierarchy)
		{
			int flattenThreshold = 0;
			if (opts.hie_flatten->boolValue())
				flattenThreshold = opts.hie_flatten->intValue();

			coll::Fast<string, string> coll;
			tagcoll.readAll(inserter(coll));

			if (opts.hie_filter->boolValue())
				coll.removeTagsWithCardinalityLessThan(opts.hie_filter->intValue());

			// Default operation: build the smart hierarchy
			HierarchyNode< coll::Fast<string, string> >* root =
				smartHierarchyNode("_top", coll, flattenThreshold);
			printNode(root, "/");
		}
		else if (opts.foundCommand() == opts.cleanhierarchy)
		{
			int flattenThreshold = 0;
			if (opts.hie_flatten->boolValue())
				flattenThreshold = opts.hie_flatten->intValue();

			coll::Fast<string, string> coll;
			tagcoll.readAll(inserter(coll));

			if (opts.hie_filter->boolValue())
				coll.removeTagsWithCardinalityLessThan(opts.hie_filter->intValue());

			// Default operation: build the smart hierarchy
			HierarchyNode< coll::Fast<string, string> >* root =
				cleanSmartHierarchyNode("_top", coll, flattenThreshold);
			printNode(root, "/");
		}
		else if (opts.foundCommand() == opts.dischierarchy)
		{
			int flattenThreshold = 0;
			if (opts.hie_flatten->boolValue())
				flattenThreshold = opts.hie_flatten->intValue();

			coll::Fast<string, string> coll;
			tagcoll.readAll(inserter(coll));

			if (opts.hie_filter->boolValue())
				coll.removeTagsWithCardinalityLessThan(opts.hie_filter->intValue());

			// Default operation: build the smart hierarchy
			HierarchyNode< coll::Fast<string, string> >* root =
				discHierarchyNode("_top", coll, flattenThreshold);
			printNode(root, "/");
		}
		else if (opts.foundCommand() == opts.diff)
		{
			coll::Simple<string, string> merger1;
			tagcoll.readFile(opts.next(), inserter(merger1));

			coll::Simple<string, string> merger2;
			tagcoll.readFile(opts.next(), inserter(merger2));

			PatchList<string, string> newpatches;
			newpatches.addPatch(merger1, merger2);

			textformat::outputPatch(newpatches, stdout);
		}
		else if (opts.foundCommand() == opts.related)
		{
			string item = opts.next();
			coll::Simple<string, string> merger;
			tagcoll.readAll(inserter(merger));

			int maxdist = 0;
			if (opts.misc_distance->boolValue())
				maxdist = opts.misc_distance->intValue();

			// Split the items on commas
			string splititem;
			set<string> splititems;
			for (string::const_iterator c = item.begin();
					c != item.end(); c++)
				if (*c == ',')
				{
					if (!merger.hasItem(splititem))
					{
						cerr << "Item \"" << splititem << "\" does not exist in the collection" << endl;
						return 1;
					}
					splititems.insert(splititem);
					splititem = string();
				} else
					splititem += *c;
			if (!merger.hasItem(splititem))
			{
				cerr << "Item \"" << splititem << "\" does not exist in the collection" << endl;
				return 1;
			}
			splititems.insert(splititem);

			// Get the tagset as the intersection of the tagsets of all input items
			set<string>::const_iterator i = splititems.begin();
			std::set<string> ts = merger.getTagsOfItem(*i);
			for (++i; i != splititems.end(); i++)
				ts = ts & merger.getTagsOfItem(*i);

			if (ts.empty())
			{
				if (splititems.size() > 1)
					cerr << "The items " << item << " are unrelated: cannot find a barycenter to start computing relationships from." << endl;
				else
					cerr << "The items " << item << " has no tags attached." << endl;
				return 1;
			}

			// Build a full TagCollection
			coll::Fast<string, string> coll;
			merger.output(inserter(coll));

			printItems(coll.getItemsExactMatch(ts));

			if (maxdist)
				printItems(coll.getRelatedItems(ts, maxdist));
		}
		else if (opts.foundCommand() == opts.reverse)
		{
			string revnull;
			if (opts.misc_untaggedTag->boolValue())
				revnull = opts.misc_untaggedTag->stringValue();

			coll::Fast<string, string> coll;
			tagcoll.readAll(inserter(coll));
			coll.outputReversed(tagcoll.writer());
		}
		else if (opts.foundCommand() == opts.copy)
		{
			tagcoll.copy();
		}
		else if (opts.foundCommand() == opts.findspecials)
		{
			int flattenThreshold = 0;
			if (opts.hie_flatten->boolValue())
				flattenThreshold = opts.hie_flatten->intValue();

			coll::Fast<string, string> coll;
			tagcoll.readAll(inserter(coll));

			if (opts.hie_filter->boolValue())
				coll.removeTagsWithCardinalityLessThan(opts.hie_filter->intValue());

			// Default operation: build the smart hierarchy
			SmartHierarchyNode< coll::Fast<string, string> > root("_top", coll, flattenThreshold);

			std::set<string> seen;
			for (HierarchyNode< coll::Fast<string, string> >::iterator i = root.begin();
					i != root.end(); i++)
			{
				std::set<string> items = getItems(*i);

				// Find the items in this branch that are not present in
				// any of the previous ones
				std::set<string> newItems;
				if (!seen.empty())
				{
					for (std::set<string>::const_iterator j = items.begin();
							j != items.end(); j++)
					{
						std::set<string> tags = coll.getTagsOfItem(*j) & seen;
						if (tags.empty())
							newItems |= *j;
					}

					cout << (*i)->tag() << ": " << items.size() << " items, " <<
						newItems.size() << " special items:" << endl;

					int indent = (*i)->tag().size() + 2;
					for (std::set<string>::const_iterator j = newItems.begin(); j != newItems.end(); j++)
					{
						for (int ind = 0; ind < indent; ++ind)
							cout << ' ';
						cout << *j << endl;
					}
				}

				seen |= (*i)->tag();
			}
		}
		else if (opts.foundCommand() == opts.grep)
		{
			return tagcoll.grep(opts.next());
		}
		else if (opts.foundCommand() == opts.test)
		{
			Timing timing("test");

			coll::Fast<string, string> coll;
			tagcoll.readAll(inserter(coll));

			cerr << timing.partial() << ": read collection." << endl;

			{
				coll::Simple<string, string> t;
				coll.outputReversed(inserter(t));
			}
			cerr << timing.partial() << ": reversed." << endl;

			cerr << timing.total() << ": total." << endl;
		}
		else
			throw wibble::exception::BadOption(string("unhandled command ") +
						(opts.foundCommand() ? opts.foundCommand()->name() : "(null)"));

		return 0;
	} catch (wibble::exception::BadOption& e) {
		cerr << e.desc() << endl;
		opts.outputHelp(cerr);
		return 1;
	} catch (std::exception& e) {
		cerr << e.what() << endl;
		return 1;
	}
}

#include <tagcoll/TextFormat.tcc>
#include <tagcoll/Implications.tcc>
#include <tagcoll/SmartHierarchy.tcc>
#include <tagcoll/stream/filters.tcc>
#include <tagcoll/coll/simple.tcc>
#include <tagcoll/coll/fast.tcc>

// vim:set ts=4 sw=4:
