#!/usr/bin/env python
"""
Find some jars and start a Clojure VM with them.

This is meant to be a simpler, more transparent and faster running
alternative to Leiningen's Clojure startup commands. The assumption is
that you have a few directories that contain all the jars that you
need to run, and you just want to start a Clojure VM with those.
Leiningen has been useful to the community, but unfortunately has
multiple problems:

- it downloads a lot of files from the internet, much of which we
  don't really need

- is very slow to start, but virtue of the fact that it's written
  itself in Clojure, and occasionally gets in a funked state.

- installs separate copies/caches of files in a private repo, which
  sometimes gets borked (and the recovery involves deleting the local
  repo and reinstalling)

- is not able to work offline properly (2012-03-31, I'm writing this
  on the train, pissed off I can't work on my project without internet
  access)

This script...

- Does not download anything for you: you download your jars yourself,
  you build them, resolve dependencies, put them wherever you like; I
  assume you know better. I'm not attempting to solve that problem, I
  just wanna run Clojure without unexpected magic. (However, the
  chasing of dependencies is a problem in Java, in general, which
  needs resolving, and that can be made independent of this script).

- Given a list of directories, it finds all the jar files anywhere
  under them. If multiple versions of a library are found, it selects
  the higest versioned one. The script ignores duplicates (i.e. you're
  allowed to be a little messy, real life is sometimes messy). Also,
  you can specify jar files directly.

- It also tries to identify projects with some .clj files in them and
  if the directory names under the .clj files correspond to the
  namespace directires, it automatically adds their source directories
  to the classpath. This works for the typical Leiningen project
  hierarchies and Clojure core projects.

- Finally, it starts a JVM with those libraries, invoking a REPL or a
  Swank for you, with debugging options if you want. It supports
  options for cdt.  You can also run as a main program.

The script is very transparent about what it does (use -v or -vv to
have it print full details), there's no hidden magic (in particular,
no Maven or Ant stuff). You'll probably enjoy using this if you're not
coming to Clojure from a Java background.

Note: this is not an attempt to replace the full functionality of
Leiningen, but an attempt to make running Clojure _very_ easy (when
you've already fetched all the jars you need).

To run a REPL, invoke one of these::

  streamlined [DIR] [DIR ...]  --repl
  streamlined [DIR] [DIR ...]  --swank
  streamlined [DIR] [DIR ...]  --nrepl

To start a main program, try this::

  streamlined [DIR] [DIR ...]  --main [NAMESPACE] -- [ARG1] [ARG2 ...]

"""
__author__ = 'Martin Blais <blais@furius.ca>'
__copyright__ = 'BSD License'

import sys, os, re, logging
from collections import defaultdict
from os.path import *
from subprocess import call


_vcremove = '.svn', '.hg', '.git'

def find_files_regexp(roots, regexp):
    """ Yields filenames under the roots which match the given regexp."""
    mre = re.compile(regexp) if isinstance(regexp, (str,unicode)) else regexp
    for root_ in roots:
        if isfile(root_):
            if mre.match(root_):
                yield abspath(root_)
        else:
            for root, dirs, files in os.walk(root_):
                for vcremove in _vcremove:
                    if vcremove in dirs:
                        dirs.remove(vcremove)
                for fn in files:
                    if mre.match(fn):
                        afn = abspath(join(root_, root, fn))
                        yield afn

def identify(fn):
    "Given a full jar filename, return the name and version of them."
    base = basename(fn)
    mo = re.match('([^.]*)-([0-9.]+)(.*)\.jar$', base)
    if mo:
        version = mo.group(2)
        name = ''.join(mo.group(1,3))
    else:
        mo = re.match('(.*)\.jar$', base)
        assert mo
        name, version = mo.group(1), None
    return name, version

class JarFile:
    "A struct for all the jar files founds."
    def __init__(self, fn):
        self.filename = fn
        self.name, self.version = identify(fn)
        self.mtime = getmtime(fn)
        self.size = getsize(fn)
        self.ignored = None

    @staticmethod
    def key(self):
        # Note: we treat a version of 'None' as the highest priority, thus the 'Z'
        return (self.version if self.version is not None else 'Z', self.mtime)

def find_jars(searchdirs):
    "Find all the jar files and select the desired versions."
    jarre = re.compile('.*\\.jar$')

    # Find all the jars of all the versions.
    jarmap = defaultdict(list)
    for fn in find_files_regexp(searchdirs, jarre):
        jf = JarFile(fn)
        jarmap[jf.name].append(jf)
    jarmap = dict((k, sorted(flist, key=JarFile.key))
                  for (k, flist) in jarmap.iteritems())

    # Identify all the duplicate files.
    for foundlist in jarmap.itervalues():
        sizemap = defaultdict(list)
        for jf in reversed(foundlist):
            if jf.size in sizemap:
                jf.ignored = 'duplicate'
            else:
                sizemap[jf.size].append(jf)

    # Identify all the source-only files.
    for foundlist in jarmap.itervalues():
        for jf in foundlist:
            if jf.version and re.match('.*-sources$', jf.version):
                jf.ignored = 'sourcejar'

    # Select the versions we need. We pick the highest numbered version that is
    # not being ignored.
    selected = []
    for name, foundlist in sorted(jarmap.items()):
        for jf in reversed(foundlist):
            if jf.ignored is None:
                selected.append(jf.filename)
                break
        else:
            logging.warning("Could not find a suitable version for '%s'" % name)

    return jarmap, selected


def find_clojure(searchdirs, verbose):
    "Find all the Clojure files and associated root directories to add to the classpath."
    cljre = re.compile('[^.].*\\.(clj|cljs)$')

    # Find all the jars of all the versions.
    cljmap = defaultdict(list)
    for fn in find_files_regexp(searchdirs, cljre):
        fn = abspath(fn)

        # Attempt to find a namespace specification of the top directories in
        # it.
        topoffile = open(fn).read(4096)
        components = filter(None, splitext(fn.replace('_', '-'))[0].split(os.sep))
        namespace = None
        for i in xrange(len(components)-1):
            mre = r'\(ns.*[ \t]\b(%s)\b' % r'\.'.join(components[i:])
            mo = re.compile(mre, re.DOTALL|re.M).search(topoffile)
            if mo:
                namespace = mo.group(1)
                rootdir = os.sep + os.sep.join(components[:i])
                break
        else:
            if verbose >= 2:
                logging.warning("Could not find namespace in '%s'" % fn)
            continue

        cljmap[rootdir].append(namespace)

    # Remove test directories, we won't need to include them.
    cljmap = dict( (d,l) for (d,l) in cljmap.iteritems() if not re.search(r'/(test|samples|examples)\b', d) )

    return cljmap


# Bootstrap Clojure code for the VM.
_bootstrap = """
(do
(def ^:dynamic *classpath* "CLASSPATH")
(set! *warn-on-reflection* WARN_ON_REFLECTION)
STARTCODE
)
"""

_repl = """
(do (clojure.core/require 'clojure.main) (clojure.main/repl))
"""

# Note: the port-file argument is gone in more recent versions of swank-clojure.
_swank = """
(do
  (clojure.core/require 'swank.swank)
  (import '[java.io File])
  (swank.swank/start-server :port 4005 :host \"localhost\")
  )
"""

_nrepl = """
(do
  (clojure.core/require 'clojure.tools.nrepl.server)
  (clojure.tools.nrepl.server/start-server :port 4006)
  )
"""
## (.get (first (clojure.tools.nrepl/start-server 4006))) ;; Blocking version.


def main():
    import optparse
    parser = optparse.OptionParser(__doc__.strip())

    parser.add_option('-v', '--verbose', action='count', default=0,
        help="Output all the gory details of what we're doing.")

    parser.add_option('-j', '--interpreter', action='store', default='java',
                      help="Specifiy a non-default Java executable to run.")

    parser.add_option('-c', '--classpath', action='append', default=[], metavar='PATH',
        help="Specify an additional path for the classpath option.")

    parser.add_option('-l', '--libpath', action='append', default=[], metavar='PATH',
        help="Specify an additional C library path for the JVM (not classpath).")

    # FIXME: add options for adding custom Java option, e.g. large stack size.
    # -Xss8192k

    parser.add_option('-g', '--debug', action='store_true',
        help="Enable development options (e.g. warn-on-reflection, CDT)")

    group = optparse.OptionGroup(parser, "Command set to run on startup.")
    group.add_option('--main', action='store', help=(
        "Run a clojure '-main' function from the given module name. "
        "Command-line arguments are provided after a -- argument."))
    group.add_option('-i', '--init', action='store', help=(
        "Load a file or resource (same as -i on clojure main)."))
    group.add_option('--repl', action='store_true', help="Start with a basic REPL.")
    group.add_option('--swank', action='store_true', help="Start with Swank server.")
    group.add_option('--nrepl', action='store_true', help="Start with nREPL server.")
    parser.add_option_group(group)

    # Split out arguments after the --, for main.
    rargs = sys.argv[1:]
    try:
        i = rargs.index('--')
        rargs, cljargs = rargs[:i], rargs[(i+1):]
    except ValueError:
        cljargs = []

    opts, args = parser.parse_args(args=rargs)
    logging.basicConfig(level=logging.INFO if opts.verbose else logging.WARNING,
                        format='%(levelname)-8s: %(message)s')

    # Pre-process the search directories.
    searchdirs = sorted(abspath(x) for x in args)
    if opts.verbose >= 2:
        print "Search directories and files:"
        print # "-" * 80
        for dn in searchdirs:
            print '   %s' % dn
        print

    # Check that all the explicitly specified files are there.
    for fn in searchdirs:
        if not isdir(fn) and not isfile(fn):
            logging.warn("File '%s' does not exist." % fn)

    # Find all the jar files and select the versions we want.
    jarmap, classpath = find_jars(searchdirs)
    classpath_set = set(classpath)

    if opts.verbose >= 2:
        print "Jar files found:"
        print # "-" * 80
        for name, foundlist in sorted(jarmap.items()):
            print '  %s' % name
            for jf in foundlist:
                print '    %1s  %-32s : %10s : %s    %s' % (
                    '*' if jf.filename in classpath_set else '',
                    jf.version,
                    jf.size,
                    jf.filename,
                    '(%s)' % jf.ignored.upper() if jf.ignored else '')
            print

    # Find all the Clojure source files and the associated root directories.
    classpath_clj = find_clojure(searchdirs, opts.verbose)
    classpath.extend(sorted(classpath_clj.iterkeys()))

    if opts.verbose >= 2:
        print "Clojure paths found:"
        print # "-" * 80
        for rootdir, namespaces in sorted(classpath_clj.items()):
            print '  %s' % rootdir
            for ns in namespaces:
                print '    %s' % ns
            print


    # Add the explicitly specified paths to the classpath.
    classpath.extend(opts.classpath)
    if opts.verbose >= 1:
        print "Classpath:"
        print # "-" * 80
        for fn in classpath:
            print '  %s' % fn
        print

    # Build up some code to run at startup.
    classpath = ':'.join(classpath)
    lisp = _bootstrap.replace('\n', ' ')
    lisp = lisp.replace('CLASSPATH', classpath)
    lisp = lisp.replace('WARN_ON_REFLECTION', 'true' if opts.debug else 'nil')

    if opts.main:
        cljmainopts = ['-m', opts.main] + cljargs

    elif opts.init:
        cljmainopts = ['-i', opts.init] + cljargs

    elif opts.swank:
        lisp = lisp.replace('STARTCODE', _swank)
        cljmainopts = ['-e', lisp]

    elif opts.nrepl:
         # Note: nrepl's start-server does not block.
        lisp = lisp.replace('STARTCODE', _nrepl)
        cljmainopts = ['-e', lisp]

    elif opts.repl or True: # Default.
        lisp = lisp.replace('STARTCODE', _repl)
        cljmainopts = ['-e', lisp]


    # Finally, start the JVM.
    cmdopts = []
    if opts.debug:
        cmdopts.append('-Xdebug')
        cmdopts.append('-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=8030')
    for lib in opts.libpath:
        cmdopts.append('-Djava.library.path=%s' % lib)
    cmd = [opts.interpreter] + cmdopts + ['-classpath', classpath, 'clojure.main'] + cljmainopts
    # Note: do we need this?: -Dclojure.compile.path=/home/blais/s/merced/classes
    if opts.verbose >= 2:
        print 'Command:'
        print
        print '    ', ' '.join(cmd)
        print
    r = call(cmd)
    if r != 0:
        logging.error("Error starting Java; %s" % r)


if __name__ == '__main__':
    main()
