#!/usr/bin/env python
#******************************************************************************\
#* Copyright (C) 2003-2004 Martin Blais <blais@furius.ca>
#*
#* This program is free software; you can redistribute it and/or modify
#* it under the terms of the GNU General Public License as published by
#* the Free Software Foundation; either version 2 of the License, or
#* (at your option) any later version.
#*
#* This program is distributed in the hope that it will be useful,
#* but WITHOUT ANY WARRANTY; without even the implied warranty of
#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#* GNU General Public License for more details.
#*
#* You should have received a copy of the GNU General Public License
#* along with this program; if not, write to the Free Software
#* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#*
#*****************************************************************************/

"""random-tree-cp [<options>] <output-dir> [<in-file> ...]

Copy the given files in the specified directory, creating a random directory
structure in the output directory to place the files in.  The filename list can
be read from stdin.  This can be used to generate various test directories.

"""

__version__ = "Revision: 1.11 "
__author__ = "Martin Blais <blais@furius.ca>"

# stdlib imports
import os, sys, string
from os.path import *
import random
import shutil

letset = string.ascii_lowercase + string.digits

def ranname():
    name = ""
    for i in range( 0, 5 ):
	r = int( random.random() * len(letset) )
	name += letset[r]
    return name

## #----------------------------------------------------------------------------
## #
## def rrcopy(f, dir):
##     r = random.random()
##     files = dircache.listdir( dir )
##     if r < opts.probability * ( 1 - float(len(files))/nbtotal ):
##         # keep in directory
##         try:
##             shutil.copy( f, dir )
##         except OSError:
##             sys.stderr.write( "Error: copying file " + f + "\n" )
##     else:
##         # go down
##         dirs = []
##         for x in files:
##             if isdir( join( dir, x ) ):
##                 dirs += [ x ]
##         if len( dirs ) == 0:
##             # create new directory and store
##             nn = join( dir, randirname() )
##             os.mkdir( nn )
##             shutil.copy( f, nn )
##         else:
##             dr = int( random.random() * len(dirs) )
##             dd = join( dir, dirs[dr] )
##             rrcopy( f, dd )



def main():
    import optparse
    parser = optparse.OptionParser(__doc__.strip(), version=__version__)

    parser.add_option('-n', '--nb-files', action='store', type='int',
                      default=30,
                      help="Total number of files to copy.")
    parser.add_option('-t', '--target', action='store', type='int',
                      default=5,
                      help="Target number of entries per created directory")

    parser.add_option('-k', '--create-probability', action='store',
                      type='float', default=0.8,
                      help="""'k' factor, probability of creating new dirs.""")

    parser.add_option('--delete', action='store_true',
                      help="Delete destination directory before filling in.")

    parser.add_option('-v', '--verbose', action='store_true',
                      help="Verbose output.")

    global opts; opts, args = parser.parse_args()

    if len(args) == 0:
        raise parser.error("Error: requires destination directory.")

    dest = normpath(args[0])

    if opts.delete and exists(dest):
        if opts.verbose:
            print 'Delete output directory %s' % dest
        for root, dirs, files in os.walk(dest, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
                os.rmdir(os.path.join(root, name))
        
    if not exists(dest):
        if opts.verbose:
            print 'Creating output directory %s' % dest
        os.makedirs(dest)
    if not isdir(dest):
        raise SystemExit("Error: destination must be a directory.")
        
    if len(args) > 1:
        infiles = args[1:]
    else:
        infiles = map(str.strip, sys.stdin.readlines())

    # remove dirs from input files, and restrict to existing files only
    infiles = [x for x in infiles if exists(x) and isfile(x)]

    # we implement an algorithm that tries to balance the number of files
    # equally among the created directories.
    outdirs = {}
    for root, dirs, files in os.walk(dest):
        entries = len(dirs) + len(files)
        outdirs[root] = entries

    for n, infn in enumerate(random.sample(infiles, opts.nb_files)):
        # sample a directory according to the cumulative probability that is
        # inverse to the target number of entries in each directory.
        totpdf = 0
        for entries in outdirs.itervalues():
            totpdf += opts.target - entries

        r = random.randint(0, totpdf - 1)

        s = 0
        for dn, entries in outdirs.iteritems():
            s += opts.target - entries
            if r < s:
                d = dn
                break
        else:
            assert False

        if random.random() < opts.create_probability:
            d = join(d, ranname())
            outdirs[d] = 0

            if opts.verbose:
                print 'Creating directory', d
            os.mkdir(d)
            
        dfn = join(d, basename(infn))
        while exists(dfn):
            dfn = '%s.%s' % (dfn, ranname())
        if opts.verbose:
            print 'Copying file "%s" to "%s"' % (infn, dfn)
        shutil.copy(infn, dfn)

        outdirs[d] += 1


if __name__ == '__main__':
    main()
