#!/usr/bin/env python # vim: set ts=8 sw=4 sts=4 et ai: # Copyright (C) 2011, OSSO B.V. # Walter Doekes # # This little script can be used to take a sampling of queries that # are run often and/or take a lot of time. # # Every time you run this, a single 'show processlist' is executed on # the local mysqld. If any queries are seen, they're cleaned up a bit # and printed to stdout. Run this as often as you feel comfortable # with; if run often/long enough you'll get statistically significant # data about which queries put the most load on your database. # # Example output sort: # $ grep -v '^\[' output.txt | sed -e 's/^[^:]*: //' | # sed -e 's/[0-9]\{5,\}//g' | sort | uniq -c | sort -n | # tail -n5 # import datetime, re, subprocess WS_RE = re.compile(r'(\s{2,}|\t)') def get_processlist(): if False: f = open('mysql-slow-queries-sample.input', 'r') data = f.read() f.close() else: devnull = open('/dev/null', 'w') proc = subprocess.Popen("echo 'show full processlist;' | mysql --defaults-file=/etc/mysql/debian.cnf -E", stdout=subprocess.PIPE, stderr=devnull, shell=True) (data, void) = proc.communicate() ret = proc.wait() if ret != 0: raise Exception() return data def processlist_to_dict(data): lines = data.strip().split('\n') if not lines: return line = lines.pop(0) while line.startswith('****'): line = lines.pop(0) in_info = False row = {} while not line.startswith('****'): if in_info: row['info'] = row['info'] + ' ' + WS_RE.sub(' ', line.strip()) else: key, value = line.split(':', 1) key = key.strip().lower() row[key] = WS_RE.sub(' ', value.strip()) if key == 'info': in_info = True if not lines: break line = lines.pop(0) yield row def relevant_rows(row_iter): for row in processlist_to_dict(get_processlist()): if row['info'] == 'NULL' or row['info'] == 'show full processlist': continue host = row['host'].split(':')[0] duration = row['time'] db = row['db'].split(':')[0] query = row['info'] yield {'host': host, 'duration': duration, 'db': db, 'query': query} def main(): rows = list(relevant_rows(processlist_to_dict(get_processlist()))) if rows: print '[%s]' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),) for row in rows: print '%s (%s, %ss): %s' % (row['host'], row['db'], row['duration'], row['query']) if __name__ == '__main__': main()