#!/usr/bin/env python
#******************************************************************************\
#* Copyright (C) 2000-2006 Martin Blais <blais@furius.ca>
#*
#* This program is free software; you can redistribute it and/or modify
#* it under the terms of the GNU General Public License as published by
#* the Free Software Foundation; either version 2 of the License, or
#* (at your option) any later version.
#*
#* This program is distributed in the hope that it will be useful,
#* but WITHOUT ANY WARRANTY; without even the implied warranty of
#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#* GNU General Public License for more details.
#*
#* You should have received a copy of the GNU General Public License
#* along with this program; if not, write to the Free Software
#* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#*
#*****************************************************************************/

"""sort-order <order-file> [<unsorted-file> ...]

Sort lines according to an arbitrary list of ordering string prefixes.

The idea is that sometimes we want a list of strings ordered according to an
arbitrary ordering.  This program will read a file that specifies a list of
prefixes and that will then sort the given input lines according to the order of
the arbitrary list of prefixes, by attempting to match the prefixes to the input
lines.

The ordering file is compulsory and must be specified. It simply contains one
string per line which is assumed to be a prefix for the lines to be sorted by.

Note that strings to be sorted which cannot be sorted because they don't match
any prefix from the ordering file will be placed at the end of the list with
special markers. There are options to alter this behaviour.

Use Case
--------

This script was originally created to sort a list of filenames in compilation
order.  A special target in the makefiles would simply generate to a file the
list of directories to be compiled, in compilation order. Then that file was
used as the ordering file on an arbitrary list of source files (we were
performing diffs for merge review, but we wanted the diffs to show up in
compilation order).

"""

__version__ = "Revision: 1.11 "
__author__ = "Martin Blais <blais@furius.ca>"
__depends__ = ['>=Python-2.3']



# stdlib imports
import sys


def sort_order(ordering, unsorted_input):
    """
    Accepts as input a sequence of ordering prefix strings, and a sequence of
    unordered input to sort, and return two lists: a list of the sorted inputs
    and a list of the unmatched inputs.
    """

    # For each of the strings to be sorted, look at each of the ordering
    # prefixes and look if the prefix matches the string.  If it does, prepend
    # the index of the ordering (Schwartzian transform) for sorting later.
    # Note: this is a SIMPLE O(nm) algorithm, we could do better.
    anno_input = []
    notfound = []
    for s in unsorted_input:
        # Look at all the orderings.
        for idx, orderstr in enumerate(ordering):
            # Match the ordering prefix to the given string to be sorted.
            if s.startswith(orderstr):
                anno_input.append( (idx, s) )
                break
        else:
            # If none of the ordering prefixes has matched the string, place the
            # string in a special "loser" list for handling later.
            notfound.append(s)

    # Sort items and remove the sorting index.
    anno_input.sort()
    sorted_input = map(lambda x: x[1], anno_input)

    return sorted_input, notfound


def main():
    import optparse
    parser = optparse.OptionParser(__doc__.strip(), version=__version__)

    parser.add_option('-i', '--ignore-unmatched', action='store_true',
                      help="""Ignore unmatched lines, don't output them.""")

    parser.add_option('-s', '--stderr-unmatched', action='store_true',
                      help="""Separate by outputting unsorted files to
                      stderr. Normally, markers are added to show the unmatched
                      lines."""
                      )

    opts, args = parser.parse_args()

    # Validate and unpack the arguments.
    if opts.ignore_unmatched and opts.stderr_unmatched:
        parser.error("You can either ignore or separate the unmatched lines, "
                     "but not both.")

    if len(args) == 0:
        parser.error("You need to specify an ordering file.")
    orderfn = args[0]
    sortfn = args[1:]

    try:
        # Read the file with the ordering prefixes.
        ordering = open(orderfn, 'r').readlines()

        # Read all the strings to be sorted.
        if len(sortfn) == 0:
            # From stdin if no filename is specified.
            unsorted_input = sys.stdin.readlines()
        else:
            # From the given filenames.
            unsorted_input = []
            for fn in sortfn:
                unsorted_input += open(fn, 'r').readlines()

    except IOError, e:
        raise SystemExit("Error: %s" % e)

    # Strip the whitespace off the inputs.
    ordering = map(str.strip, ordering)
    unsorted_input = map(str.strip, unsorted_input)

    sorted_input, notfound = sort_order(ordering, unsorted_input)

    # Output sorted items.
    for s in sorted_input:
        print s

    # Deal with unmatched items.
    if not opts.ignore_unmatched:
        # Figure out where to output the unmatched strings.
        unmatched_f = sys.stdout
        if opts.stderr_unmatched:
            unmatched_f = sys.stderr

        # Output the unmatched strings.
        notfound.sort() # (Sort them lexicographically, why not)
        for s in notfound:
            print >> unmatched_f, s


if __name__ == "__main__":
    main()

