Revision: 9252 http://trac.macosforge.org/projects/calendarserver/changeset/9252 Author: cdaboo@apple.com Date: 2012-05-17 10:54:21 -0700 (Thu, 17 May 2012) Log Message: ----------- Tool to wipe out data from the DB directly without going through txdav store objects. Added Paths: ----------- CalendarServer/trunk/calendarserver/tools/obliterate.py Added: CalendarServer/trunk/calendarserver/tools/obliterate.py =================================================================== --- CalendarServer/trunk/calendarserver/tools/obliterate.py (rev 0) +++ CalendarServer/trunk/calendarserver/tools/obliterate.py 2012-05-17 17:54:21 UTC (rev 9252) @@ -0,0 +1,589 @@ +#!/usr/bin/env python +# -*- test-case-name: calendarserver.tools.test.test_calverify -*- +## +# Copyright (c) 2012 Apple Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +import time + +""" +This tool scans wipes out user data without using slow store object apis +that attempt to keep the DB consistent. Instead it assumes facts about the +schema and how the various table data are related. Normally the purge principal +tool should be used to "correctly" remove user data. This is an emergency tool +needed when data has been accidently migrated into the DB but no users actually +have access to it as they are not enabled on the server. +""" + +from calendarserver.tools.cmdline import utilityMain +from twext.enterprise.dal.syntax import Parameter, Delete, Select, Union,\ + CompoundComparison, ExpressionSyntax, Count +from twisted.application.service import Service +from twisted.internet.defer import inlineCallbacks, returnValue +from twisted.python import log +from twisted.python.text import wordWrap +from twisted.python.usage import Options +from twistedcaldav.stdconfig import DEFAULT_CONFIG_FILE +from txdav.common.datastore.sql_tables import schema, _BIND_MODE_OWN +import os +import sys +import uuid + +VERSION = "1" + +def usage(e=None): + if e: + print e + print "" + try: + ObliterateOptions().opt_help() + except SystemExit: + pass + if e: + sys.exit(64) + else: + sys.exit(0) + + +description = ''.join( + wordWrap( + """ + Usage: calendarserver_obliterate [options] [input specifiers] + """, + int(os.environ.get('COLUMNS', '80')) + ) +) +description += "\nVersion: %s" % (VERSION,) + + + +class ConfigError(Exception): + pass + + + +class ObliterateOptions(Options): + """ + Command-line options for 'calendarserver_obliterate' + """ + + synopsis = description + + optFlags = [ + ['verbose', 'v', "Verbose logging."], + ['fix-props', 'p', "Fix orphaned resource properties only."], + ['dry-run', 'n', "Do not make any changes."], + ] + + optParameters = [ + ['config', 'f', DEFAULT_CONFIG_FILE, "Specify caldavd.plist configuration path."], + ['data', 'd', "./uuids.txt", "Path where list of uuids to obliterate is."], + ['uuid', 'u', "", "Obliterate this user's data."], + ] + + + def __init__(self): + super(ObliterateOptions, self).__init__() + self.outputName = '-' + + + def opt_output(self, filename): + """ + Specify output file path (default: '-', meaning stdout). + """ + self.outputName = filename + + opt_o = opt_output + + + def openOutput(self): + """ + Open the appropriate output file based on the '--output' option. + """ + if self.outputName == '-': + return sys.stdout + else: + return open(self.outputName, 'wb') + + +# Need to patch this in if not present in actual server code +def NotIn(self, subselect): + # Can't be Select.__contains__ because __contains__ gets __nonzero__ + # called on its result by the 'in' syntax. + return CompoundComparison(self, 'not in', subselect) + +if not hasattr(ExpressionSyntax, "NotIn"): + ExpressionSyntax.NotIn = NotIn + + +class ObliterateService(Service, object): + """ + Service which runs, does its stuff, then stops the reactor. + """ + + def __init__(self, store, options, output, reactor, config): + super(ObliterateService, self).__init__() + self.store = store + self.options = options + self.output = output + self.reactor = reactor + self.config = config + + self.results = {} + self.summary = [] + self.totalHomes = 0 + self.totalCalendars = 0 + self.totalResources = 0 + self.attachments = set() + + + def startService(self): + """ + Start the service. + """ + super(ObliterateService, self).startService() + self.doObliterate() + + + @inlineCallbacks + def doObliterate(self): + """ + Do the work, stopping the reactor when done. + """ + self.output.write("\n---- Obliterate version: %s ----\n" % (VERSION,)) + if self.options["dry-run"]: + self.output.write("---- DRY RUN No Changes Being Made ----\n") + + try: + if self.options["fix-props"]: + yield self.obliterateOrphanedProperties() + else: + yield self.obliterateUUIDs() + + self.output.close() + except ConfigError: + pass + except: + log.err() + + self.reactor.stop() + + + @inlineCallbacks + def obliterateOrphanedProperties(self): + """ + Obliterate orphaned data in RESOURCE_PROPERTIES table. + """ + + # Get list of distinct resource_property resource_ids to delete + self.txn = self.store.newTransaction() + + ch = schema.CALENDAR_HOME + ca = schema.CALENDAR + co = schema.CALENDAR_OBJECT + ah = schema.ADDRESSBOOK_HOME + aa = schema.ADDRESSBOOK + ao = schema.ADDRESSBOOK_OBJECT + rp = schema.RESOURCE_PROPERTY + + rows = (yield Select( + [rp.RESOURCE_ID, ], + Distinct=True, + From=rp, + Where=(rp.RESOURCE_ID.NotIn( + Select( + [ch.RESOURCE_ID], + From=ch, + SetExpression=Union( + Select( + [ca.RESOURCE_ID], + From=ca, + SetExpression=Union( + Select( + [co.RESOURCE_ID], + From=co, + SetExpression=Union( + Select( + [ah.RESOURCE_ID], + From=ah, + SetExpression=Union( + Select( + [aa.RESOURCE_ID], + From=aa, + SetExpression=Union( + Select( + [ao.RESOURCE_ID], + From=ao, + ), + ), + ), + ), + ), + ), + ), + ), + ), + ), + ), + )) + ).on(self.txn)) + + if not rows: + self.output.write("No orphaned resource properties\n") + returnValue(None) + + resourceIDs = [row[0] for row in rows] + resourceIDs_len = len(resourceIDs) + t = time.time() + for ctr, resourceID in enumerate(resourceIDs): + self.output.write("%d of %d (%d%%): ResourceID: %s\n" % ( + ctr + 1, + resourceIDs_len, + ((ctr + 1) * 100 / resourceIDs_len), + resourceID, + )) + + yield self.removePropertiesForResourceID(resourceID) + + # Commit every 10 DELETEs + if divmod(ctr + 1, 10)[1] == 0: + yield self.txn.commit() + self.txn = self.store.newTransaction() + + yield self.txn.commit() + self.txn = None + + self.output.write("Obliteration time: %.1fs\n" % (time.time() - t,)) + + + @inlineCallbacks + def obliterateUUIDs(self): + """ + Obliterate specified UUIDs. + """ + if self.options["uuid"]: + uuids = [self.options["uuid"], ] + elif self.options["data"]: + if not os.path.exists(self.options["data"]): + self.output.write("%s is not a valid file\n" % (self.options["data"],)) + raise ConfigError + + uuids = open(self.options["data"]).read().split() + else: + self.output.write("One of --data or --uuid must be specified\n") + raise ConfigError + + t = time.time() + uuids_len = len(uuids) + for ctr, uuid in enumerate(uuids): + self.txn = self.store.newTransaction() + self.output.write("%d of %d (%d%%): UUID: %s - " % ( + ctr + 1, + uuids_len, + ((ctr + 1) * 100 / uuids_len), + uuid, + )) + result = (yield self.processUUID(uuid)) + self.output.write("%s\n" % (result,)) + yield self.txn.commit() + self.txn = None + + self.output.write("\nTotal Homes: %d\n" % (self.totalHomes,)) + self.output.write("Total Calendars: %d\n" % (self.totalCalendars,)) + self.output.write("Total Resources: %d\n" % (self.totalResources,)) + if self.attachments: + self.output.write("Attachments removed: %s\n" % (len(self.attachments,))) + #for attachment in self.attachments: + # self.output.write(" %s\n" % (attachment,)) + self.output.write("Obliteration time: %.1fs\n" % (time.time() - t,)) + + + @inlineCallbacks + def processUUID(self, uuid): + + # Get the resource-id for the home + ch = schema.CALENDAR_HOME + kwds = { "UUID" : uuid } + rows = (yield Select( + [ch.RESOURCE_ID, ], + From=ch, + Where=( + ch.OWNER_UID == Parameter("UUID") + ), + ).on(self.txn, **kwds)) + + if not rows: + returnValue("No home found") + homeID = rows[0][0] + self.totalHomes += 1 + + # Count resources + resourceCount = (yield self.countResources(uuid)) + self.totalResources += resourceCount + + # Remove revisions - do before deleting calendars to remove + # foreign key constraint + yield self.removeRevisionsForResourceID(homeID) + + # Look at each calendar and unbind/delete-if-owned + count = (yield self.deleteCalendars(homeID)) + self.totalCalendars += count + + # Remove properties + yield self.removePropertiesForResourceID(homeID) + + # Remove notifications + yield self.removeNotificationsForUUID(uuid) + + # Remove attachments + attachmentCount = (yield self.removeAttachments(homeID)) + + # Now remove the home + yield self.removeHomeForResourceID(homeID) + + returnValue("Home, %d calendars, %d resources%s - deleted" % ( + count, + resourceCount, + (", %d attachmensts" % (attachmentCount,)) if attachmentCount else "", + )) + + + @inlineCallbacks + def countResources(self, uuid): + ch = schema.CALENDAR_HOME + cb = schema.CALENDAR_BIND + co = schema.CALENDAR_OBJECT + kwds = { "UUID" : uuid } + rows = (yield Select( + [ + Count(co.RESOURCE_ID), + ], + From=ch.join( + cb, type="inner", on=(ch.RESOURCE_ID == cb.CALENDAR_HOME_RESOURCE_ID).And( + cb.BIND_MODE == _BIND_MODE_OWN)).join( + co, type="left", on=(cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID)), + Where=( + ch.OWNER_UID == Parameter("UUID") + ), + ).on(self.txn, **kwds)) + + returnValue(rows[0][0] if rows else 0) + + + @inlineCallbacks + def deleteCalendars(self, homeID): + + # Get list of binds and bind mode + cb = schema.CALENDAR_BIND + kwds = { "resourceID" : homeID } + rows = (yield Select( + [cb.CALENDAR_RESOURCE_ID, cb.BIND_MODE, ], + From=cb, + Where=( + cb.CALENDAR_HOME_RESOURCE_ID == Parameter("resourceID") + ), + ).on(self.txn, **kwds)) + if not rows: + returnValue(0) + + for resourceID, mode in rows: + if mode == _BIND_MODE_OWN: + yield self.deleteCalendar(resourceID) + else: + yield self.deleteBind(homeID, resourceID) + + returnValue(len(rows)) + + + @inlineCallbacks + def deleteCalendar(self, resourceID): + + # Delete the CALENDAR entry (will cascade to CALENDAR_BIND and CALENDAR_OBJECT) + if not self.options["dry-run"]: + ca = schema.CALENDAR + kwds = { + "ResourceID" : resourceID, + } + yield Delete( + From=ca, + Where=( + ca.RESOURCE_ID == Parameter("ResourceID") + ), + ).on(self.txn, **kwds) + + # Remove properties + yield self.removePropertiesForResourceID(resourceID) + + + @inlineCallbacks + def deleteBind(self, homeID, resourceID): + if not self.options["dry-run"]: + cb = schema.CALENDAR_BIND + kwds = { + "HomeID" : homeID, + "ResourceID" : resourceID, + } + yield Delete( + From=cb, + Where=( + (cb.CALENDAR_HOME_RESOURCE_ID == Parameter("HomeID")).And + (cb.CALENDAR_RESOURCE_ID == Parameter("ResourceID")) + ), + ).on(self.txn, **kwds) + + + @inlineCallbacks + def removeRevisionsForResourceID(self, resourceID): + if not self.options["dry-run"]: + rev = schema.CALENDAR_OBJECT_REVISIONS + kwds = { "ResourceID" : resourceID } + yield Delete( + From=rev, + Where=( + rev.CALENDAR_HOME_RESOURCE_ID == Parameter("ResourceID") + ), + ).on(self.txn, **kwds) + + + @inlineCallbacks + def removePropertiesForResourceID(self, resourceID): + if not self.options["dry-run"]: + props = schema.RESOURCE_PROPERTY + kwds = { "ResourceID" : resourceID } + yield Delete( + From=props, + Where=( + props.RESOURCE_ID == Parameter("ResourceID") + ), + ).on(self.txn, **kwds) + + + @inlineCallbacks + def removeNotificationsForUUID(self, uuid): + + # Get NOTIFICATION_HOME.RESOURCE_ID + nh = schema.NOTIFICATION_HOME + kwds = { "UUID" : uuid } + rows = (yield Select( + [nh.RESOURCE_ID, ], + From=nh, + Where=( + nh.OWNER_UID == Parameter("UUID") + ), + ).on(self.txn, **kwds)) + + if rows: + resourceID = rows[0][0] + + # Delete NOTIFICATION rows + if not self.options["dry-run"]: + no = schema.NOTIFICATION + kwds = { "ResourceID" : resourceID } + yield Delete( + From=no, + Where=( + no.NOTIFICATION_HOME_RESOURCE_ID == Parameter("ResourceID") + ), + ).on(self.txn, **kwds) + + # Delete NOTIFICATION_HOME (will cascade to NOTIFICATION_OBJECT_REVISIONS) + if not self.options["dry-run"]: + kwds = { "UUID" : uuid } + yield Delete( + From=nh, + Where=( + nh.OWNER_UID == Parameter("UUID") + ), + ).on(self.txn, **kwds) + + + @inlineCallbacks + def removeAttachments(self, resourceID): + + # Get ATTACHMENT paths + at = schema.ATTACHMENT + kwds = { "resourceID" : resourceID } + rows = (yield Select( + [at.PATH, ], + From=at, + Where=( + at.CALENDAR_HOME_RESOURCE_ID == Parameter("resourceID") + ), + ).on(self.txn, **kwds)) + + if rows: + self.attachments.update([row[0] for row in rows]) + + # Delete ATTACHMENT rows + if not self.options["dry-run"]: + at = schema.ATTACHMENT + kwds = { "resourceID" : resourceID } + yield Delete( + From=at, + Where=( + at.CALENDAR_HOME_RESOURCE_ID == Parameter("resourceID") + ), + ).on(self.txn, **kwds) + + returnValue(len(rows) if rows else 0) + + + @inlineCallbacks + def removeHomeForResourceID(self, resourceID): + if not self.options["dry-run"]: + ch = schema.CALENDAR_HOME + kwds = { "ResourceID" : resourceID } + yield Delete( + From=ch, + Where=( + ch.RESOURCE_ID == Parameter("ResourceID") + ), + ).on(self.txn, **kwds) + + + def stopService(self): + """ + Stop the service. Nothing to do; everything should be finished by this + time. + """ + # TODO: stopping this service mid-export should really stop the export + # loop, but this is not implemented because nothing will actually do it + # except hitting ^C (which also calls reactor.stop(), so that will exit + # anyway). + + + +def main(argv=sys.argv, stderr=sys.stderr, reactor=None): + """ + Do the export. + """ + if reactor is None: + from twisted.internet import reactor + options = ObliterateOptions() + options.parseOptions(argv[1:]) + try: + output = options.openOutput() + except IOError, e: + stderr.write("Unable to open output file for writing: %s\n" % (e)) + sys.exit(1) + + def makeService(store): + from twistedcaldav.config import config + config.TransactionTimeoutSeconds = 0 + return ObliterateService(store, options, output, reactor, config) + + utilityMain(options['config'], makeService, reactor) + +if __name__ == '__main__': + main() Property changes on: CalendarServer/trunk/calendarserver/tools/obliterate.py ___________________________________________________________________ Added: svn:executable + *
participants (1)
-
source_changes@macosforge.org