#!/usr/bin/env python """ Migraine, a Drupal test/production migration and backup tool, presented by Noosphere Networks. Copyright (C) 2007 Andrew Shearer http://shearersoftware.com/ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA. For more information and updates, see: http://shearersoftware.com/software/server-administration/migraine/ REQUIREMENTS Python 2.4 or higher, or Python 2.3 with added 'subprocess' module. MySQL 4 or higher. SETUP Configuration can be specified on the command line, but to avoid having to do so every time, create a config file and pass its name in the --config= parameter. In order for the usage examples below to work, please copy the migraine-example.ini file, rename it migraine.ini, and update it with your own settings. For the first run, you may have to adjust the config_tables or content_tables variables in the source code to add your own table names. Migraine will warn you when you try to run it if there are any such unknown tables. See comments within the source for guidance. Some kinds of CCK changes alter table schema in a way that makes the test server content schema incompatible with the prod server content schema. Migraine will attempt to detect and highlight the schema differences, but they must be manually resolved (either through MySQL commands, or through repeating the same CCK actions on the prod server). USAGE To back up test server: python migraine.py --dump-test --config=migraine.ini To back up prod server: python migraine.py --dump-prod --config=migraine.ini (Use --restore-test or --restore-prod to restore.) To migrate test to prod (preserving prod's content tables): python migraine.py --dump-test --config=migraine.ini python migraine.py --migrate-to-prod --config=migraine.ini To migrate prod to test (preserving test's config, but overwriting it with prod content): python migraine.py --dump-prod --config=migraine.ini python migraine.py --migrate-to-test --config=migraine.ini """ __author__ = 'Andrew Shearer' __version__ = '1.0a1' import getpass import os import re import subprocess import sys mysql_path = "mysql" mysqldump_path = "mysqldump" diff_path = "diff" diff_options = ['-u'] # Categorize Drupal's database tables by name. # The goal is to separate: # 1. Site building and configuration data (config_tables) # 2. Real content (content_tables) # # In addition, there are these table categories: # 3. Ephemeral data such as logs, where clearing is optional (temp_tables) # 4. Cache tables, which should always be cleared (cache_tables) # 5. Sequence tables (sequence_tables) # 6. Ignored tables (ignore_tables) # 1. Config tables. Data on test should be copied to prod. config_tables = "access actions actions_registry authmap blocks blocks_roles boxes contemplate filters flood imagecache_action imagecache_preset legal_conditions masquerade pageroute_routes profile_fields node_group node_group_fields node_type role variable view_argument view_exposed_filter view_filter view_sort view_tablefield view_view workflow_access workflow_actions workflow_fields workflow_states workflow_transitions workflow_type_map workflows zipcodes system tinymce_role tinymce_settings vocabulary vocabulary_node_types filter_formats menu permission node_field bc_blocks aggregator_category aggregator_category_feed aggregator_feed node_field_instance locales_meta locales_source locales_target" # 2. Content tables. Prod data should remain intact through migration. # (For reverse migration, prod data could be copied to test.) content_tables = "book comments contact content_field_description content_field_description_brief content_field_featured content_field_link content_field_link_userprofile content_field_reference_content content_type_blog_entry content_type_blog_post content_type_book content_type_event_info content_type_forum content_type_image content_type_news_article content_type_news_item content_type_page content_type_project_issue content_type_project_project content_type_resource_profile content_type_section_description content_type_usernode file_revisions files forum history legal_accepted location pageroute_pages profile_values node node_comment_statistics node_counter node_revisions nodefamily search_dataset search_index search_total url_alias workflow_node workflow_node_history workflow_scheduled_transition term_data term_hierarchy term_node term_relation term_synonym trackback_node trackback_received trackback_sent usernode users users_roles node_access content_type_sponsor content_field_logo content_type_faq faq_weights content_type_blogroll_entry content_type_marketplace wiki_access wiki_access_level wiki_moveref wiki_name project_comments project_issue_projects project_issue_state project_issues project_projects project_subscriptions" # 3. Temp/log tables. Should normally be left alone, but could be cleared. # No need to back up or copy between sites. temp_tables = "accesslog devel_queries devel_times sessions watchdog aggregator_item aggregator_category_item" # 4. Cache tables. Should be cleared. cache_tables = "cache cache_content cache_filter cache_menu cache_page cache_views cache_block" # 5. Sequence tables. Must be merged in a special way. sequence_tables = "sequences" # 6. Ignored tables. Any unimportant tables could be listed here to quiet # 'unknown table' warnings. ignore_tables = "" diffable = True # makes data output larger, but more suitable for diffs (ordered, one row per line) autoIncrementRE = re.compile(r' AUTO_INCREMENT=\d+') def dump_table(host, user, password, db, tablenames, filename, data = True, schema = True, tabfilename = None): print "Dumping to file '%s'..." % filename cmd = [mysqldump_path, '-h' + host, '-u' + user, '-p' + password, '--comments=FALSE'] # eliminate comments so test vs. prod database name doesn't show in output, which would # trigger a false diff positive cmd.extend(['--skip-opt', '--add-locks', '--quick', '--lock-tables', '--set-charset', '--disable-keys']) if schema: cmd.extend(['--add-drop-table', '--create-options']) if not data: cmd.append('--no-data') if not schema: cmd.append('--no-create-info') if data: if diffable: cmd.append('--order-by-primary') else: cmd.append('--extended-insert') cmd.append(db) if tabfilename: cmd.extend(['--xml']) tablenames = filter(None, tablenames) tablenames.sort() outputfile = open(filename, 'wb') for tablename in tablenames: proc = subprocess.Popen(cmd + [tablename], shell=False, #stdin=subprocess.PIPE, stdout=outputfile, close_fds=True) #proc.stdin.write(password + "\n") #proc.stdin.close() #result = proc.stdout.read() retval = proc.wait() if retval != 0: raise "Error: the mysqldump call failed, returning result code #%d" % retval outputfile.close() if schema and not data: # remove AUTO_INCREMENT=nn from table create statements, to create clean, # diff-ready schema output rawfile = open(filename, 'rb').read() rawfile = autoIncrementRE.sub('', rawfile) open(filename, 'wb').write(rawfile) def get_query_result(host, user, password, db, query): """ Return plain text resulting from running the given MySQL query. """ proc = subprocess.Popen([mysql_path, '-h' + host, '-u' + user, '-p' + password, '-D' + db, '--batch', '--skip-column-names', '--execute='+query], shell=False, stdout=subprocess.PIPE, close_fds=True) result = proc.stdout.read() retval = proc.wait() if retval != 0: raise "Error: mysql returned result code #%d" % retval return result def restore_table(host, user, password, db, filename): cmd = [mysql_path, '-h' + host, '-u' + user, '-p' + password, '-D' + db, '--batch'] retval = subprocess.Popen(cmd, stdin=open(filename, 'r'), close_fds = True).wait() if retval != 0: raise "Error: attempting to restore table returned result code #%d" % retval def dump_db(host, user, password, filepathprefix, db, noaction = 0): all_known_tables_dict = dict.fromkeys(filter(None, ' '.join( [config_tables, content_tables, temp_tables, cache_tables, sequence_tables, ignore_tables] ).split()), True) all_existing_tables_dict = {} existing_tables = get_query_result(host, user, password, db, 'SHOW TABLES').splitlines() for table in existing_tables: table = table.strip() if table not in all_known_tables_dict: print ("Warning: new, unknown table '%s' was not saved. " "It needs to be added to this script and categorized as " "config, content, temp, or cache." % table) else: all_existing_tables_dict[table] = True for tableinfo in [{'filename': 'config.sql', 'tables': config_tables}, {'filename': 'content_create.sql', 'tables': content_tables, 'data': False}, {'filename': 'content_data.sql', 'tables': content_tables, 'schema': False}, {'filename': 'temp_create.sql', 'tables': temp_tables, 'data': False}, {'filename': 'cache_create.sql', 'tables': cache_tables, 'data': False}, {'filename': 'sequences.sql', 'tables': sequence_tables, 'data': True} ]: tables = [table for table in tableinfo['tables'].split() if table and table in all_existing_tables_dict] if noaction: print ("Would have dumped %d table(s) to '%s'." % (len(tables), filepathprefix + tableinfo['filename'])) else: dump_table(host, user, password, db, tables, filepathprefix + tableinfo['filename'], data = tableinfo.get('data', True), schema = tableinfo.get('schema', True), tabfilename = tableinfo.get('tabfilename') and filepathprefix + tableinfo['tabfilename']) sequences = get_query_result(host, user, password, db, 'SELECT name, id FROM sequences ORDER BY name') open(filepathprefix + 'sequences.txt', 'w').write(sequences) def restore_db(host, user, password, filepathprefix, db, keepconfig = False, keepcontent = False, keeptemp = False, noaction = False, migrationtimestamp = False): fileinfolist = [{'filename': 'config.sql', 'tables': config_tables, 'enabled': not keepconfig}, {'filename': 'content_create.sql', 'tables': content_tables, 'enabled': not keepcontent}, {'filename': 'content_data.sql', 'tables': content_tables, 'enabled': not keepcontent}, {'filename': 'temp_create.sql', 'tables': temp_tables, 'enabled': not keeptemp}, {'filename': 'cache_create.sql', 'tables': cache_tables, 'enabled': True}, {'filename': 'sequences.sql', 'tables': sequence_tables, 'enabled': not keepcontent and not keeptemp and not keepconfig}] all_existing_tables_dict = {} existing_tables = get_query_result(host, user, password, db, 'SHOW TABLES').splitlines() for table in existing_tables: table = table.strip() all_existing_tables_dict[table] = True if keepcontent: temp_schemafile = filepathprefix + 'content_create_tempstate.sql' restore_schemafile = filepathprefix + 'content_create.sql' dump_table(host, user, password, db, [table for table in content_tables.split() if table and table in all_existing_tables_dict], temp_schemafile, data = False) temp_schema = open(temp_schemafile, 'r').read() restore_schema = open(restore_schemafile, 'r').read() if temp_schema != restore_schema: print >>sys.stderr, ("Error: Content schemas are different. " "Please make corresponding administrative changes manually on" " the destination site before attempting to migrate the" " database.") if diff_path: subprocess.call([diff_path] + diff_options + [temp_schemafile, restore_schemafile]) raise "Process could not be completed because of differing content schemas." for fileinfo in fileinfolist: if fileinfo.get('enabled', True): messagePrefix = 'Loading' if noaction: messagePrefix = "Would have loaded" print ("%s disk file '%s' into database '%s'." % (messagePrefix, filepathprefix + fileinfo['filename'], db)) if not noaction: restore_table(host, user, password, db, filepathprefix + fileinfo['filename']) # Merge sequences. if keepcontent or keeptemp or keepconfig: sequences = get_query_result(host, user, password, db, 'SELECT name, id FROM sequences ORDER BY name') open(filepathprefix + 'sequences_tempstate.txt', 'w').write(sequences) dbsequence = {} for sequenceline in sequences.split("\n"): sequenceline = sequenceline.strip() if sequenceline: col, number = sequenceline.split("\t") dbsequence[col] = int(number) importsequence = {} for sequenceline in open(filepathprefix + 'sequences.txt', 'r').readlines(): sequenceline = sequenceline.strip() if sequenceline: col, number = sequenceline.split("\t") importsequence[col] = int(number) importedtableslist = [] kepttableslist = [] for fileinfo in fileinfolist: tables = filter(None, fileinfo['tables'].split()) if fileinfo.get('enabled', True): importedtableslist.extend(tables) else: kepttableslist.extend(tables) importedtablesdict = dict.fromkeys(importedtableslist, True) kepttablesdict = dict.fromkeys(kepttableslist, True) for key in importsequence.keys(): if key not in dbsequence or importsequence[key] > dbsequence[key]: tablename = '_'.join(key.split('_')[:-1]) # work around Python 2.3's lack of rsplit is_imported = importedtablesdict.has_key(tablename) or importedtablesdict.has_key(key) is_kept = kepttablesdict.has_key(tablename) or kepttablesdict.has_key(key) if is_imported: # Insert or update this sequence number in the destination database. # Should use SQL string escaping to substitute sequence name here. # But lacking a real escaping function, and since sequence keys don't tend # to use special chars, we just raise an error if there are any. sequence_key_sql = re.match('([A-Za-z0-9_-]+)', key).group(1) if sequence_key_sql != key: raise ("Error: sequence key '%s' contains invalid characters. " "Sequence keys must currently consist of alphanumeric characters, " "underscores, or dashes." % key) sequence_number = int(importsequence[key]) if key in dbsequence: messagePrefix = 'Updating' if noaction: messagePrefix = 'Would have updated' print ("%s sequence '%s' to %d (was %r)." % (messagePrefix, key, sequence_number, dbsequence[key])) if not noaction: result = get_query_result(host, user, password, db, "UPDATE sequences SET id = %d WHERE name = '%s' AND id < %d" % (sequence_number, sequence_key_sql, sequence_number)) else: messagePrefix = 'Creating' if noaction: messagePrefix = 'Would have created' print ("%s sequence '%s' with initial value %d." % (messagePrefix, sequence_key_sql, sequence_number)) if not noaction: result = get_query_result(host, user, password, db, "INSERT INTO sequences (name, id) VALUES ('%s', %d)" % (sequence_key_sql, sequence_number)) elif not is_kept: print ("Error: unknown sequence '%s' (table '%s' is not known). " "Current database value %r. Value from import source is %r." % (key, tablename, importsequence.get(key), dbsequence.get(key))) if migrationtimestamp: # Save a Drupal variable named migraine_last_migrated to the target DB # with a value based on the current date, formatted like '2007-01-01 16:00:00' result = get_query_result(host, user, password, db, "DELETE FROM variable WHERE name = 'migraine_last_migrated';" "INSERT INTO variable (name, value) VALUES ('migraine_last_migrated'," " CONCAT('s:', LENGTH(DATE_FORMAT(NOW(), '%Y-%m-%d %k:%i:%s'))," " ':\"', DATE_FORMAT(NOW(), '%Y-%m-%d %k:%i:%s'), '\";'))"); class Usage(Exception): def __init__(self, msg): self.msg = msg def main(argv = None): import getopt if argv is None: argv = sys.argv test_db_commands = ['dump-test', 'restore-test', 'migrate-to-test'] prod_db_commands = ['dump-prod', 'restore-prod', 'migrate-to-prod'] all_commands = test_db_commands + prod_db_commands per_site_opts = ['host', 'db', 'user', 'dump-dir', 'dump-prefix'] all_config_options = (["noaction"] + per_site_opts + ['test-' + opt for opt in per_site_opts] + ['prod-' + opt for opt in per_site_opts]) try: try: opts, args = getopt.getopt(argv[1:], "hn", ["help", "config="] + all_commands + [opt + '=' for opt in all_config_options]) if args: raise Usage('unrecognized parameters: ' + args.join(', ')) except getopt.GetoptError, msg: raise Usage(msg) except Usage, err: print >>sys.stderr, err.msg print >>sys.stderr, "for help use --help" return 2 config = { 'host' : 'localhost', 'commands': 'dump-test dump-prod', 'dump-prefix': '', 'noaction': 0 } commands = [] for opt, value in opts: if opt.startswith('--'): opt = opt[2:] if opt in all_commands: commands.append(opt) elif opt == 'config': import ConfigParser parser = ConfigParser.ConfigParser() parser.read(value) for sec in parser.sections(): paramNamePrefix = {'defaults': '', 'prod': 'prod-', 'test': 'test-'}.get(sec) if paramNamePrefix is not None: for paramName in parser.options(sec): config[paramNamePrefix + paramName] = parser.get(sec, paramName) elif opt in all_config_options: config[opt] = value elif opt == 'help': help() else: raise Usage, "unrecognized option: " + opt elif opt == '-h': help() elif opt == '-n': config['noaction'] = 1 else: raise Usage, "unrecognized option: " + opt if not commands: commands = config.get('commands', '').split() for command in commands: if not command in all_commands: raise Usage, "an unrecognized default command was configured: " + command for opt in per_site_opts: for site_prefix in ('test-', 'prod-'): if config.get(site_prefix + opt) is None: config[site_prefix + opt] = config.get(opt) # Prompt for a password if one hasn't been specified in command line # or config, and there is a command that will need it. for command in commands: if command in prod_db_commands: if config.get('prod-db') is None: raise Usage, 'error: prod-db not specified' if config.get('prod-user') is None: raise Usage, 'error: prod-user not specified' if config.get('prod-password') is None: config['prod-password'] = getpass.getpass('Enter prod database server password: ') elif command in test_db_commands: if config.get('test-db') is None: raise Usage, 'error: test-db not specified' if config.get('test-user') is None: raise Usage, 'error: test-user not specified' if config.get('test-password') is None: config['test-password'] = getpass.getpass('Enter test database server password: ') for command in commands: if command in prod_db_commands: site_prefix = 'prod-' else: site_prefix = 'test-' if command == 'migrate-to-prod': file_site_prefix = 'test-' elif command == 'migrate-to-test': file_site_prefix = 'prod-' else: file_site_prefix = site_prefix params = {'host': config[site_prefix + 'host'], 'user': config[site_prefix + 'user'], 'password': config[site_prefix + 'password'], 'db': config[site_prefix + 'db'], 'filepathprefix': os.path.join(config.get(file_site_prefix + 'dump-dir') or '', config.get(file_site_prefix + 'dump-prefix') or ''), 'noaction': config['noaction']} if command == 'migrate-to-prod': params.update(dict(keepconfig = False, keepcontent = True, keeptemp = True, migrationtimestamp = True)) restore_db(**params) elif command == 'migrate-to-test': params.update(dict(keepconfig = True, keepcontent = False, keeptemp = True, migrationtimestamp = True)) restore_db(**params) elif command == 'restore-prod': params.update(dict(keepconfig = False, keepcontent = False, keeptemp = False)) restore_db(**params) elif command == 'restore-test': params.update(dict(keepconfig = False, keepcontent = False, keeptemp = False)) restore_db(**params) elif command in ('dump-test', 'dump-prod'): print "Dumping databases (%s)..." % command dump_db(**params) return 0 if __name__ == '__main__': sys.exit(main())