#!/usr/bin/env python # vim: set ts=8 sw=4 sts=4 et ai: # Walter Doekes 2011-01-21, OSSO B.V. import sys, time from MySQLdb import connect from datetime import date, datetime, timedelta from decimal import Decimal SLEEP_TIME = 0.001 # sleep this many seconds every iteration to ease server load def dump2csv(db_connection, table_name, date_column, until_date, dest_file_tpl): rows = describe_table(db_connection, table_name) first_date = min_column_value(db_connection, table_name, date_column) first_date = first_date.replace(minute=0, second=0) cursor = db_connection.cursor() period = timedelta(seconds=3600) last_date = first_date + period dest_file = None dest_file_name = '' while first_date < until_date: # Check if we need to open a new file if first_date.strftime(dest_file_tpl) != dest_file_name: if dest_file: dest_file.close() dest_file_name = first_date.strftime(dest_file_tpl) dest_file = open(dest_file_name, 'w') dest_file.write('%s\n' % (','.join(rows),)) sys.stdout.write('+') ; sys.stdout.flush() # Query a bit of data and write to file cursor.execute("SELECT * FROM %s WHERE '%s' <= %s AND %s < '%s' ORDER BY %s" % ( table_name, first_date.strftime('%Y-%m-%d %H:%M:%S'), date_column, date_column, last_date.strftime('%Y-%m-%d %H:%M:%S'), date_column, )) for row in cursor.fetchall(): dest_file.write('%s\n' % (','.join(to_string(i) for i in row),)) # Increase dates first_date, last_date = last_date, last_date + period #sys.stdout.write('.') ; sys.stdout.flush() time.sleep(SLEEP_TIME) def prune(db_connection, table_name, date_column, until_date): # Simply use MySQL LIMIT here.. that should work just fine. limit = 1000 cursor = db_connection.cursor() sys.stdout.write(' ...pruning') ; sys.stdout.flush() while True: # DELETE without ORDER BY is much faster than with, but it might not # work if you're using replication in certain scenarios. See: # http://dev.mysql.com/doc/refman/5.1/en/replication-features-limit.html cursor.execute("DELETE FROM %s WHERE %s < '%s' LIMIT %d" % ( table_name, date_column, until_date.strftime('%Y-%m-%d %H:%M:%S'), limit, )) #cursor.execute("DELETE FROM %s WHERE %s < '%s' ORDER BY %s LIMIT %d" % ( # table_name, date_column, until_date.strftime('%Y-%m-%d %H:%M:%S'), date_column, limit, #)) if cursor.rowcount < limit: break time.sleep(SLEEP_TIME) def describe_table(db_connection, table_name): cursor = db_connection.cursor() cursor.execute('DESCRIBE %s;' % (table_name,)) ret = [] for row in cursor.fetchall(): ret.append(row[0]) return ret def min_column_value(db_connection, table_name, column): cursor = db_connection.cursor() cursor.execute("SELECT MIN(%s) FROM %s WHERE %s >= '1971-01-01';" % (column, table_name, column)) ret = cursor.fetchall() return ret[0][0] def to_string(data): if isinstance(data, str): return '"%s"' % (data.replace('"', '""'),) if isinstance(data, unicode): return '"%s"' % (data.encode('utf-8').replace('"', '""'),) if isinstance(data, int) or isinstance(data, long): return str(data) if isinstance(data, datetime): return '"%s"' % (data.strftime('%Y-%m-%d %H:%M:%S'),) if isinstance(data, date): # must come after datetime return '"%s"' % (data.strftime('%Y-%m-%d'),) if isinstance(data, Decimal): return str(data) if data is None: return 'NULL' print type(data) print data assert False if __name__ == '__main__': default_until_date = datetime(2030, 1, 1) default_filename = '%s.%s.%%Y%%m.csv' if len(sys.argv) == 1: print 'Usage: mysql2csv.py [--prune] HOST USER DATABASE TABLE DATE_COLUMN [UNTIL] \\' print ' [FILENAME]' print 'Dumps all data from the MySQL table to a CSV-formatted file on stdout IN A NON-' print 'LOCKING FASHION. In most cases you can safely use this on a production database' print 'to dump old records that you will be pruning later on.' print '' print 'Supply the necessary HOST/USER/DATABASE to connect to, the password is prompted' print 'on stdin. The data from TABLE will be exported ordered by the date from' print 'DATE_COLUMN in ascending order.' print '' print 'The CSV file is rotated by the DATE_COLUMN date if strftime variables are used,' print 'the default filename being: %s' % (default_filename % ('DATABASE', 'TABLE')) print '' print 'The default UNTIL time is 2030-01-01 which should be far enough in the future' print 'to get "all" records. (It is used as "exclusive" date, meaning that no records' print 'from that date and onwards will be output.)' print '' print 'WARNING: if you supply --prune as first argument, it will continue to prune the' print 'dumped records from the database. Make sure this is really what you want.' print '' sys.exit(0) if sys.argv[1] == '--prune': PRUNE_DUMPED_DATA = True del sys.argv[1] else: PRUNE_DUMPED_DATA = False host = sys.argv[1] user = sys.argv[2] database, table, date_column = sys.argv[3], sys.argv[4], sys.argv[5] until_date = default_until_date if len(sys.argv) > 6: until_date = datetime.fromtimestamp(time.mktime(time.strptime(sys.argv[6], '%Y-%m-%d'))) filename = None # use default, see below if len(sys.argv) > 7: filename = sys.argv[7] print 'MySQL server settings: %s@%s/%s' % (user, host, database) print 'Table to dump: %s (ordered by %s, from begin to %s)' % (table, date_column, until_date.strftime('%Y-%m-%d')) print 'Filename template: "%s"' % (filename, default_filename % (database, table))[filename is None] print 'Enter your MySQL password to begin dumping%s' % ('.', ' AND PRUNING!!!')[PRUNE_DUMPED_DATA] import getpass password = getpass.getpass() to_backup_and_prune = [] to_backup_and_prune.append((database, table, date_column)) for item in to_backup_and_prune: database, table, date_column = item print 'Processing %s.%s (ordered by %s)' % (database, table, date_column) conn = connect(host=host, user=user, passwd=password, db=database) filetpl = (filename, default_filename % (database, table))[filename is None] dump2csv(conn, table, date_column, until_date, filetpl) if PRUNE_DUMPED_DATA: prune(conn, table, date_column, until_date) conn.close() print