#!/usr/bin/env python
#
# $Source: /home/blais/repos/cvsroot/conf/bin/fit-sizes,v $
# $Id: fit-sizes,v 1.5 2005/07/10 00:39:58 blais Exp $
#

"""fit-sizes [<options>] <file-or-dir> [<file-or-dir> ... ]

From a list of files or directories, select the files in a way that will fit as
close as possible a requested space, either by using given ordering, or by using
any ordering to fit as close as possible the allowed size.

In other words, given a fixed size for bundling files or directory contents,
what is the smallest amount of bundles we can fit the files in?

Output is lists of files, one list per line.

(For now only the simple algorithm is provided.)
"""

## Changes:
## 
##   2005-06-17: towo at geocities.com
##     * changed use of `du` to `os.stat()`. much faster, but no filesyztem block sizes.
##     * added _sortedBitBest algorithm from http://www.cs.arizona.edu/icon/oddsends/bpack/bpack.htm
##
##   2005-06-16: towo at geocities.com
##     * added -R option. now rejecting can be switched off.
##     * changed name to fitSizes.py so it can be imported as a module.
## 
##   2005-06-16: blais
##     * incorporated towi changes.

__version__ = "Revision: 1.5 "
__author__ = "Martin Blais <blais@furius.ca>"

__all__ = [ 'fit', 'bytesize_cb' ]

# From bestfit: http://www.student.lu.se/~nbi98oli/bestfit.html::
# 
#   This problem is also known as the 0-1 knapsack problem. The algorithm
#   implemented by bestfit solves instances of it optimally in theta(n*W)
#   time, where n is number of files to choose between and W the amount of
#   free space. This may sound bad but since W is number of blocks and not
#   bytes, this algorithm is not that inefficient. Try it yourself and
#   see.
#   
#   Due to the nature of this algorithm, bestfit uses a lot of memory -
#   approximately 1.5M per file specified on command line. As long as you
#   have enough swap space, this is usually not a problem since bestfit
#   doesn't use all at the same time.


import sys


import os, stat

def gather_size(fn):
    """Return the size of the given filename or directory, in bytes."""
    return os.stat(fn)[stat.ST_SIZE]


def _restrictOrder(fitsize, szlist):
    """fast and bad.
    szlist = [ (filename, size), ... ]
    returns list of buckets = [ [(f1,sz1),...], [(f,s),...], ...]
    """
    #szmap[fn] = dict(filesAndSizes) # creates { 'f1':zs1, 'f2':sz2, ... }
    buckets = []
    curbuck, cursz = [], long(0)
    for fn, sz in szlist:
        if cursz + sz > fitsize:
            buckets.append(curbuck)
            curbuck, cursz = [], long(0)
        curbuck.append( (fn, sz) )
        cursz += sz
    if curbuck != []:
        buckets.append(curbuck)
    return buckets


def _sortedBestFit(fitsize, szlist):
    """signature see _restrictOrder(). currently O(n*n), could be O(n*logn).
    inspired by: http://www.cs.arizona.edu/icon/oddsends/bpack/bpack.htm
    author: towi
    """
    def _snd(x,y): # normal sort by second element
        a,b = x[1], y[1]
        if a<b: return -1
        if a>b: return 1
        return 0
    def _put_in_best_bucket(bs, item):
        # scan all => O(n*n).
        # could be improved by sort bucket list by size, then binary search it => O(n*logn)
        fn, sz = item
        bestS = sz+1
        bestI = None
        for i in range(len(bs)):
            bsz = bs[i][0]
            if bsz+sz > fitsize: continue
            if bsz+sz > bestS:
                bestS = bsz+sz
                bestI = i
        if bestI == None:
            # introduce new bucket for item, too big
            bs.append( [ sz, [item] ])  # {*1*}
        else:
            # put item into best fitting bucket
            bs[bestI][0] = bestS        # {*2*}
            bs[bestI][1].append( item ) # {*3*}
    # sort by size, descending
    szlist.sort(_snd)
    szlist.reverse()
    # one bucket: [ bSize, [ item, item, ...] ], see {*1*}.
    #   a buckets needs to be mutable, see {*2*} and {*3*}.
    buckets = [ ] # initially no buckets. 
    for item in szlist:
        _put_in_best_bucket(buckets, item)
    return [ x[1] for x in buckets ] # throw away bucket size information

def fit(fitsize, filenames, restrict_order, reject_big=False):
    """Do the fitting operation.
    returns a list of lists (bins) with the files arranged in a bin
      that they fit up to the fitsize.
    arguments:
      fitsize -- bin size in bytes
      filename -- list of files
      restrict_order -- if true: simple algorithm that does not change the
        order of files. currently only 'true' is implemented.
      reject_big -- reject files too big for a bin with an exception
    """
    szlist = []
    for fn in filenames:
        sz = gather_size(fn)

        # reject single files that are larger than the requested size.
        if reject_big and sz > fitsize:
            raise SystemExit(
                "Error: file '%s' is larger than the requested size (%d > %d)" %
                (fn, sz, fitsize))

        szlist.append( (fn, sz) )

    if restrict_order:
        # only a simple naive algorithm is required.
        return _restrictOrder(fitsize, szlist)
    else:
        return _sortedBestFit(fitsize, szlist)
        raise SystemExit("Error: full algorithm not implemented.  "
                         "Use --restrict in the meantime.")


def complete(parser):
    "Programmable completion support. Script should work without it."
    try:
        import optcomplete
        optcomplete.autocomplete(parser)
    except ImportError:
        pass

def bytesize_cb(option, opt, value, parser):
    "optparse option callback handler for byte sizes."

    ovalue = value
    value = value.lower()
    mult = 1
    if value.endswith('b'): # strip final B for bytes, for all multipliers.
        value = value[:-1]

    if value.endswith('k'):
        value = value[:-1]
        mult = 1024
    elif value.endswith('m'):
        value = value[:-1]
        mult = 1024*1024
    elif value.endswith('g'):
        value = value[:-1]
        mult = 1024*1024*1024

    try:
        value = long( float(value) * mult )
    except ValueError:
        raise parser.error("Error in size specification '%s'." % ovalue)

    setattr(parser.values, option.dest, value)

def main():
    import optparse
    parser = optparse.OptionParser(__doc__.strip(), version=__version__)

    group = optparse.OptionGroup(parser, "Size",
                                 "Select the size to fit into. "
                                 "Default is CDROM size.")
    group.add_option('-s', '--size',
                     type='string', dest='size',
                     action='callback', callback=bytesize_cb, nargs=1,
                     help="Fit to given size.  Format can be followed "
                     "by K or MB, or GB for convenience.")
    group.add_option('-R', '--reject-big', action='store_true', default=False,
                     help='Reject files too large for the given binsize')
    group.add_option('-c', '--size-cdr-650', '--size-cdr', action='store_const',
                     dest='size', const=long(650*1024*1024),
                     help="Fit to 650MB CD-ROM")
    group.add_option('-7', '--size-cdr-700', action='store_const',
                     dest='size', const=long(700*1024*1024),
                     help="Fit to 700MB CD-ROM")
    group.add_option('-d', '--size-dvd', action='store_const',
                     dest='size', const=long(4.37*1024*1024*1024),
                     help="Fit to 4.37GB DVD-ROM")
    parser.add_option_group(group)

    parser.add_option('-r', '--restrict-ordering', action='store_true',
                      help="Strictly select the files in the given order.")

    complete(parser)
    opts, args = parser.parse_args()

    if not args:
        raise parser.error("No files to fit.")

    if not opts.size:
        opts.size = long(650*1024*1024)
        print >> sys.stderr, \
              'Warning: no size specified, using 650MB CD-ROM size.'
    assert type(opts.size) is type(long(1))

    buckets = fit(opts.size, args, opts.restrict_ordering, reject_big=opts.reject_big)
    for bucket in buckets:
        bucksz = 0
        for fn, sz in bucket:
            bucksz += sz
            print "'%s'" % fn,
        print

if __name__ == '__main__':
    main()