#!/usr/bin/env python
"""
Extract the first page of each of the PDF files provided on the cmdline and join
them in a single output file. Use -l in order to get the last pages.
"""

import os, tempfile, shutil, re
from subprocess import *
from os.path import *


def pdf_nbpages(fn):
    "Return the number of pages in the PDF file in 'fn'."
    p = Popen(('pdfinfo', fn), stdout=PIPE, shell=False)
    out, err = p.communicate()
    mo = re.search('Pages:[ \t]+(\d+)', out)
    return int(mo.group(1))


def main():
    import optparse
    parser = optparse.OptionParser(__doc__.strip())

    parser.add_option('-r', '--reverse', '--last', action='store_true',
                      help="Get the last page instead of the first one")

    parser.add_option('-o', '--outfile', action='store',
                      default='output.pdf',
                      help="The name of the output filename.")

    opts, args = parser.parse_args()

    tempdir = tempfile.mkdtemp(prefix='pdfhead.')
    try:
        tmpfiles = []
        for i, fn in enumerate(args):
            if getsize(fn) == 0:
                continue
            p = 1
            if opts.reverse:
                p = pdf_nbpages(fn)

            ofn = join(tempdir, '%04d-%s' % (i, fn))
            r = call(('pdfnup', '--nup', '1', '--pages', str(p), '--outfile', ofn, fn),
                     shell=False)
            assert r == 0
            if exists(ofn):
                tmpfiles.append(ofn)

        r = call(['pdfjoin', '--outfile', opts.outfile] + tmpfiles, shell=False)
        assert r == 0
    finally:
        shutil.rmtree(tempdir)

if __name__ == '__main__':
    main()

