#!/usr/bin/env python
#******************************************************************************\
#* Copyright (C) 2004 Martin Blais <blais@furius.ca>
#*
#* This program is free software; you can redistribute it and/or modify
#* it under the terms of the GNU General Public License as published by
#* the Free Software Foundation; either version 2 of the License, or
#* (at your option) any later version.
#*
#* This program is distributed in the hope that it will be useful,
#* but WITHOUT ANY WARRANTY; without even the implied warranty of
#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#* GNU General Public License for more details.
#*
#* You should have received a copy of the GNU General Public License
#* along with this program; if not, write to the Free Software
#* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#*
#*****************************************************************************/

"""rss-to-mail [<options>] <url> <email>

Fetch a RSS file from the internet, parse it, remove items that have already
been processed by this script before (they are recorded to a file each time) and
send the other/new issues by email.

This is meant to be a bridge between RSS and email publishing, to be run from a
cron.  The author likes to receive and accumulate some news sources by email.
This makes it easy to accumulate news from syndicated sources in a folder.

"""

# This is my crontab entry:
# 17 5,13,18 * * * PYTHONPATH=$HOME/p/conf/lib/python \
#      $HOME/p/conf/bin/rss-to-mail --host=smtp1.qc.sympatico.ca \
#      -f blais@furius.ca http://stuff.vandervossen.net/feeds/dailypython.rss blais@furius.ca \
#      >> rss-to-mail.log 2>&1

__version__ = "Revision: 1.6 "
__author__ = "Martin Blais <blais@furius.ca>"
__depends__ = ['Python-2.3', 'RSS.py']
__copyright__ = """Copyright (C) 2003-2004 Martin Blais <blais@furius.ca>.
This code is distributed under the terms of the GNU General Public License."""

# Note: the following code is assuming that the URLs are unique per RSS feed.


import sys, os
from os.path import *
import pwd
import textwrap
import datetime
import StringIO

from RSS import ns, TrackingChannel


def_history_file = join(os.environ['HOME'], '.rsstomail_history')


RSS10_TITLE = (ns.rss10, 'title')
RSS10_DESC = (ns.rss10, 'description')
wrapper = textwrap.TextWrapper(width=60,
                               initial_indent="   ",
                               subsequent_indent="   ",)

def format_item(url, item_data):

    """Format one item for email text."""

    f = StringIO.StringIO()

    print >> f, """============================================================
Title:  %s
URL:    %s
""" % (item_data.get(RSS10_TITLE, "(none)"), url)
    desc = item_data.get(RSS10_DESC, None)
    if desc:
        print >> f, """
Description:
""" 
        print >> f, wrapper.fill(desc)

    return f.getvalue()

def format_histline(url, item_url):
    
    return '%s @@ %s' % (url, item_url)

def send_mail(subject, text, toaddr, host, fromaddr):

    import smtplib
    from email.MIMEText import MIMEText
    
    # Create a text/plain message
    msg = MIMEText(text)
    
    # me == the sender's email address
    # you == the recipient's email address
    msg['Subject'] = subject
    msg['From'] = fromaddr
    msg['To'] = toaddr
    
    # Send the message via our own SMTP server, but don't include the
    # envelope header.
    s = smtplib.SMTP(host)
    ## s.connect() # this doesn't work with sympatico, besides it works w/out.
    s.sendmail(fromaddr, [toaddr], msg.as_string())
    s.close()
    

def main():
    import optparse
    parser = optparse.OptionParser(__doc__.strip(), version=__version__)

    parser.add_option('-H', '--history-file', action='store_true',
                      default=def_history_file,
                      help="Specify history file to use "
                      "(default: %s)" % def_history_file)
    parser.add_option('-a', '--all', '--no-history', action='store_true',
                      help="Send all items, do check nor write history")

    parser.add_option('--host', action='store',
                      help="hostname of smtp server to connect to.")
    parser.add_option('-f', '--from-address', action='store',
                      help="from field to use when sending the mail.")

    parser.add_option('-o', '--output-text', action='store_true',
                      help="Print out the email body on stdout.")

    opts, args = parser.parse_args()

    if len(args) != 2:
        raise parser.error("Error: you must specify a URL and email address.")
    url, emailaddr = args

    if not opts.host:
        try:
            opts.host = os.environ['SMTPSERVER']
        except KeyError:
            opts.host = 'localhost'
        print 'Using host:', opts.host

    if not opts.from_address:
        try:
            opts.from_address = os.environ['EMAIL']
        except KeyError:
            p = pwd.getpwuid(os.geteuid())
            user, name = p[0], p[4]
            opts.from_address = '%s <%s@%s>' % (name, user, 'localhost')
        print 'Using from-address:', opts.from_address

    #
    # Fetch and parse the RSS file.
    #
    #Create a tracking channel, which is a data structure that
    #Indexes RSS data by item URL
    tc = TrackingChannel()

    #Returns the RSSParser instance used, which can usually be ignored
    tc.parse(url)

    #
    # Read history file.
    #
    history = {}
    if exists(opts.history_file) and not opts.all:
        try:
            lines = open(opts.history_file, 'r').readlines()
            # use a hashmap for quick checking
            for l in lines:
                history[l.strip()] = True
        except IOError, e:
            raise SystemExit("Error: reading history file (%s)" % str(e))

    #
    # Choose only those items that have not already been seen.
    #
    valid_items = []
    for item in tc.listItems():
        #Each item is a (url, order_index) tuple
        histline = format_histline(url, item[0])
        if histline not in history:
            valid_items.append(item)

            # Update history.
            history[histline] = True

    # If nothing to do, exit.
    if len(valid_items) == 0:
        print "No items to send, not sending email."
        sys.exit(0)

    #
    # Format output text.
    #
    channelMD = tc.getMD((ns.rss10, "channel"))
    rss_title = channelMD[(ns.rss10, 'title')]
    rss_link = channelMD[(ns.rss10, 'link')]
    datestr = str(datetime.datetime.now().date())

    subject = "[RSS Update -- %s] %s" % (datestr, rss_title)

    text =  """

RSS Update for: %s
RSS URL:        %s
Date:           %s

""" % (rss_title, rss_link, datestr)

    for item in valid_items:

        #Get all the data for the item as a Python dictionary
        item_data = tc.getItem(item)

        text += format_item(item[0], item_data)

    #
    # Send the email, this does not return upon failure.
    #
    send_mail(subject, text, emailaddr, opts.host, opts.from_address)

    #
    # Write out new history file after succesfully sending the email notice.
    #
    if not opts.all:
        try:
            f = open(opts.history_file, 'w')
            for histline in history.keys():
                print >> f, histline
        except IOError, e:
            raise SystemExit("Error: writing history file (%s)" % str(e))

    print text

if __name__ == '__main__':
    main()
