#!/usr/bin/env python
# vim: set ts=8 sw=4 sts=4 et ai:
# Walter Doekes 2011-01-21, OSSO B.V.
import sys, time
from MySQLdb import connect
from datetime import date, datetime, timedelta
from decimal import Decimal


SLEEP_TIME = 0.001 # sleep this many seconds every iteration to ease server load


def dump2csv(db_connection, table_name, date_column, until_date, dest_file_tpl):
    rows = describe_table(db_connection, table_name)
    first_date = min_column_value(db_connection, table_name, date_column)
    first_date = first_date.replace(minute=0, second=0)

    cursor = db_connection.cursor()
    period = timedelta(seconds=3600)
    last_date = first_date + period
    dest_file = None
    dest_file_name = ''

    while first_date < until_date:
        # Check if we need to open a new file
        if first_date.strftime(dest_file_tpl) != dest_file_name:
            if dest_file:
                dest_file.close()
            dest_file_name = first_date.strftime(dest_file_tpl)
            dest_file = open(dest_file_name, 'w')
            dest_file.write('%s\n' % (','.join(rows),))
            sys.stdout.write('+') ; sys.stdout.flush()

        # Query a bit of data and write to file
        cursor.execute("SELECT * FROM %s WHERE '%s' <= %s AND %s < '%s' ORDER BY %s" % (
            table_name, first_date.strftime('%Y-%m-%d %H:%M:%S'), date_column,
            date_column, last_date.strftime('%Y-%m-%d %H:%M:%S'), date_column,
        ))
        for row in cursor.fetchall():
            dest_file.write('%s\n' % (','.join(to_string(i) for i in row),))

        # Increase dates
        first_date, last_date = last_date, last_date + period
        #sys.stdout.write('.') ; sys.stdout.flush()
        time.sleep(SLEEP_TIME)

def prune(db_connection, table_name, date_column, until_date):
    # Simply use MySQL LIMIT here.. that should work just fine.
    limit = 1000
    cursor = db_connection.cursor()
    sys.stdout.write(' ...pruning') ; sys.stdout.flush()
    while True:
        # DELETE without ORDER BY is much faster than with, but it might not
        # work if you're using replication in certain scenarios. See:
        # http://dev.mysql.com/doc/refman/5.1/en/replication-features-limit.html
        cursor.execute("DELETE FROM %s WHERE %s < '%s' LIMIT %d" % (
            table_name, date_column, until_date.strftime('%Y-%m-%d %H:%M:%S'), limit,
        ))
        #cursor.execute("DELETE FROM %s WHERE %s < '%s' ORDER BY %s LIMIT %d" % (
        #    table_name, date_column, until_date.strftime('%Y-%m-%d %H:%M:%S'), date_column, limit,
        #))
        if cursor.rowcount < limit:
            break
        time.sleep(SLEEP_TIME)

def describe_table(db_connection, table_name):
    cursor = db_connection.cursor()
    cursor.execute('DESCRIBE %s;' % (table_name,))
    ret = []
    for row in cursor.fetchall():
        ret.append(row[0])
    return ret

def min_column_value(db_connection, table_name, column):
    cursor = db_connection.cursor()
    cursor.execute("SELECT MIN(%s) FROM %s WHERE %s >= '1971-01-01';" % (column, table_name, column))
    ret = cursor.fetchall()
    return ret[0][0]

def to_string(data):
    if isinstance(data, str):
        return '"%s"' % (data.replace('"', '""'),)
    if isinstance(data, unicode):
        return '"%s"' % (data.encode('utf-8').replace('"', '""'),)
    if isinstance(data, int) or isinstance(data, long):
        return str(data)
    if isinstance(data, datetime):
        return '"%s"' % (data.strftime('%Y-%m-%d %H:%M:%S'),)
    if isinstance(data, date): # must come after datetime
        return '"%s"' % (data.strftime('%Y-%m-%d'),)
    if isinstance(data, Decimal):
        return str(data)
    if data is None:
        return 'NULL'
    print type(data)
    print data
    assert False


if __name__ == '__main__':
    default_until_date = datetime(2030, 1, 1)
    default_filename = '%s.%s.%%Y%%m.csv'

    if len(sys.argv) == 1:
        print 'Usage: mysql2csv.py [--prune] HOST USER DATABASE TABLE DATE_COLUMN [UNTIL] \\'
        print '                    [FILENAME]'
        print 'Dumps all data from the MySQL table to a CSV-formatted file on stdout IN A NON-'
        print 'LOCKING FASHION. In most cases you can safely use this on a production database'
        print 'to dump old records that you will be pruning later on.'
        print ''
        print 'Supply the necessary HOST/USER/DATABASE to connect to, the password is prompted'
        print 'on stdin. The data from TABLE will be exported ordered by the date from'
        print 'DATE_COLUMN in ascending order.'
        print ''
        print 'The CSV file is rotated by the DATE_COLUMN date if strftime variables are used,'
        print 'the default filename being: %s' % (default_filename % ('DATABASE', 'TABLE'))
        print ''
        print 'The default UNTIL time is 2030-01-01 which should be far enough in the future'
        print 'to get "all" records. (It is used as "exclusive" date, meaning that no records'
        print 'from that date and onwards will be output.)'
        print ''
        print 'WARNING: if you supply --prune as first argument, it will continue to prune the'
        print 'dumped records from the database. Make sure this is really what you want.'
        print ''
        sys.exit(0)

    if sys.argv[1] == '--prune':
        PRUNE_DUMPED_DATA = True
        del sys.argv[1]
    else:
        PRUNE_DUMPED_DATA = False
    host = sys.argv[1]
    user = sys.argv[2]
    database, table, date_column = sys.argv[3], sys.argv[4], sys.argv[5]
    until_date = default_until_date
    if len(sys.argv) > 6:
        until_date = datetime.fromtimestamp(time.mktime(time.strptime(sys.argv[6], '%Y-%m-%d')))
    filename = None # use default, see below
    if len(sys.argv) > 7:
        filename = sys.argv[7]

    print 'MySQL server settings: %s@%s/%s' % (user, host, database)
    print 'Table to dump: %s (ordered by %s, from begin to %s)' % (table, date_column, until_date.strftime('%Y-%m-%d'))
    print 'Filename template: "%s"' % (filename, default_filename % (database, table))[filename is None]
    print 'Enter your MySQL password to begin dumping%s' % ('.', ' AND PRUNING!!!')[PRUNE_DUMPED_DATA]
    import getpass
    password = getpass.getpass()

    to_backup_and_prune = []
    to_backup_and_prune.append((database, table, date_column))

    for item in to_backup_and_prune:
        database, table, date_column = item
        print 'Processing %s.%s (ordered by %s)' % (database, table, date_column)
        conn = connect(host=host, user=user, passwd=password, db=database)
        filetpl = (filename, default_filename % (database, table))[filename is None]
        dump2csv(conn, table, date_column, until_date, filetpl)
        if PRUNE_DUMPED_DATA:
            prune(conn, table, date_column, until_date)
        conn.close()
        print
