#!/usr/bin/env python #******************************************************************************\ #* Copyright (C) 2003-2004 Martin Blais #* #* This program is free software; you can redistribute it and/or modify #* it under the terms of the GNU General Public License as published by #* the Free Software Foundation; either version 2 of the License, or #* (at your option) any later version. #* #* This program is distributed in the hope that it will be useful, #* but WITHOUT ANY WARRANTY; without even the implied warranty of #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #* GNU General Public License for more details. #* #* You should have received a copy of the GNU General Public License #* along with this program; if not, write to the Free Software #* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. #* #*****************************************************************************/ """random-tree-cp [] [ ...] Copy the given files in the specified directory, creating a random directory structure in the output directory to place the files in. The filename list can be read from stdin. This can be used to generate various test directories. """ __version__ = "Revision: 1.11 " __author__ = "Martin Blais " # stdlib imports import os, sys, string from os.path import * import random import shutil letset = string.ascii_lowercase + string.digits def ranname(): name = "" for i in range( 0, 5 ): r = int( random.random() * len(letset) ) name += letset[r] return name ## #---------------------------------------------------------------------------- ## # ## def rrcopy(f, dir): ## r = random.random() ## files = dircache.listdir( dir ) ## if r < opts.probability * ( 1 - float(len(files))/nbtotal ): ## # keep in directory ## try: ## shutil.copy( f, dir ) ## except OSError: ## sys.stderr.write( "Error: copying file " + f + "\n" ) ## else: ## # go down ## dirs = [] ## for x in files: ## if isdir( join( dir, x ) ): ## dirs += [ x ] ## if len( dirs ) == 0: ## # create new directory and store ## nn = join( dir, randirname() ) ## os.mkdir( nn ) ## shutil.copy( f, nn ) ## else: ## dr = int( random.random() * len(dirs) ) ## dd = join( dir, dirs[dr] ) ## rrcopy( f, dd ) def main(): import optparse parser = optparse.OptionParser(__doc__.strip(), version=__version__) parser.add_option('-n', '--nb-files', action='store', type='int', default=30, help="Total number of files to copy.") parser.add_option('-t', '--target', action='store', type='int', default=5, help="Target number of entries per created directory") parser.add_option('-k', '--create-probability', action='store', type='float', default=0.8, help="""'k' factor, probability of creating new dirs.""") parser.add_option('--delete', action='store_true', help="Delete destination directory before filling in.") parser.add_option('-v', '--verbose', action='store_true', help="Verbose output.") global opts; opts, args = parser.parse_args() if len(args) == 0: raise parser.error("Error: requires destination directory.") dest = normpath(args[0]) if opts.delete and exists(dest): if opts.verbose: print 'Delete output directory %s' % dest for root, dirs, files in os.walk(dest, topdown=False): for name in files: os.remove(os.path.join(root, name)) for name in dirs: os.rmdir(os.path.join(root, name)) if not exists(dest): if opts.verbose: print 'Creating output directory %s' % dest os.makedirs(dest) if not isdir(dest): raise SystemExit("Error: destination must be a directory.") if len(args) > 1: infiles = args[1:] else: infiles = map(str.strip, sys.stdin.readlines()) # remove dirs from input files, and restrict to existing files only infiles = [x for x in infiles if exists(x) and isfile(x)] # we implement an algorithm that tries to balance the number of files # equally among the created directories. outdirs = {} for root, dirs, files in os.walk(dest): entries = len(dirs) + len(files) outdirs[root] = entries for n, infn in enumerate(random.sample(infiles, opts.nb_files)): # sample a directory according to the cumulative probability that is # inverse to the target number of entries in each directory. totpdf = 0 for entries in outdirs.itervalues(): totpdf += opts.target - entries r = random.randint(0, totpdf - 1) s = 0 for dn, entries in outdirs.iteritems(): s += opts.target - entries if r < s: d = dn break else: assert False if random.random() < opts.create_probability: d = join(d, ranname()) outdirs[d] = 0 if opts.verbose: print 'Creating directory', d os.mkdir(d) dfn = join(d, basename(infn)) while exists(dfn): dfn = '%s.%s' % (dfn, ranname()) if opts.verbose: print 'Copying file "%s" to "%s"' % (infn, dfn) shutil.copy(infn, dfn) outdirs[d] += 1 if __name__ == '__main__': main()