[CalendarServer-changes] [9252] CalendarServer/trunk/calendarserver/tools/obliterate.py

source_changes at macosforge.org source_changes at macosforge.org
Thu May 17 10:54:21 PDT 2012


Revision: 9252
          http://trac.macosforge.org/projects/calendarserver/changeset/9252
Author:   cdaboo at apple.com
Date:     2012-05-17 10:54:21 -0700 (Thu, 17 May 2012)
Log Message:
-----------
Tool to wipe out data from the DB directly without going through txdav store objects.

Added Paths:
-----------
    CalendarServer/trunk/calendarserver/tools/obliterate.py

Added: CalendarServer/trunk/calendarserver/tools/obliterate.py
===================================================================
--- CalendarServer/trunk/calendarserver/tools/obliterate.py	                        (rev 0)
+++ CalendarServer/trunk/calendarserver/tools/obliterate.py	2012-05-17 17:54:21 UTC (rev 9252)
@@ -0,0 +1,589 @@
+#!/usr/bin/env python
+# -*- test-case-name: calendarserver.tools.test.test_calverify -*-
+##
+# Copyright (c) 2012 Apple Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##
+import time
+
+"""
+This tool scans wipes out user data without using slow store object apis
+that attempt to keep the DB consistent. Instead it assumes facts about the
+schema and how the various table data are related. Normally the purge principal
+tool should be used to "correctly" remove user data. This is an emergency tool
+needed when data has been accidently migrated into the DB but no users actually
+have access to it as they are not enabled on the server.
+"""
+
+from calendarserver.tools.cmdline import utilityMain
+from twext.enterprise.dal.syntax import Parameter, Delete, Select, Union,\
+    CompoundComparison, ExpressionSyntax, Count
+from twisted.application.service import Service
+from twisted.internet.defer import inlineCallbacks, returnValue
+from twisted.python import log
+from twisted.python.text import wordWrap
+from twisted.python.usage import Options
+from twistedcaldav.stdconfig import DEFAULT_CONFIG_FILE
+from txdav.common.datastore.sql_tables import schema, _BIND_MODE_OWN
+import os
+import sys
+import uuid
+
+VERSION = "1"
+
+def usage(e=None):
+    if e:
+        print e
+        print ""
+    try:
+        ObliterateOptions().opt_help()
+    except SystemExit:
+        pass
+    if e:
+        sys.exit(64)
+    else:
+        sys.exit(0)
+
+
+description = ''.join(
+    wordWrap(
+        """
+        Usage: calendarserver_obliterate [options] [input specifiers]
+        """,
+        int(os.environ.get('COLUMNS', '80'))
+    )
+)
+description += "\nVersion: %s" % (VERSION,)
+
+
+
+class ConfigError(Exception):
+    pass
+
+
+
+class ObliterateOptions(Options):
+    """
+    Command-line options for 'calendarserver_obliterate'
+    """
+
+    synopsis = description
+
+    optFlags = [
+        ['verbose', 'v', "Verbose logging."],
+        ['fix-props', 'p', "Fix orphaned resource properties only."],
+        ['dry-run', 'n', "Do not make any changes."],
+    ]
+
+    optParameters = [
+        ['config', 'f', DEFAULT_CONFIG_FILE, "Specify caldavd.plist configuration path."],
+        ['data', 'd', "./uuids.txt", "Path where list of uuids to obliterate is."],
+        ['uuid', 'u', "", "Obliterate this user's data."],
+    ]
+
+
+    def __init__(self):
+        super(ObliterateOptions, self).__init__()
+        self.outputName = '-'
+
+
+    def opt_output(self, filename):
+        """
+        Specify output file path (default: '-', meaning stdout).
+        """
+        self.outputName = filename
+
+    opt_o = opt_output
+
+
+    def openOutput(self):
+        """
+        Open the appropriate output file based on the '--output' option.
+        """
+        if self.outputName == '-':
+            return sys.stdout
+        else:
+            return open(self.outputName, 'wb')
+
+
+# Need to patch this in if not present in actual server code
+def NotIn(self, subselect):
+    # Can't be Select.__contains__ because __contains__ gets __nonzero__
+    # called on its result by the 'in' syntax.
+    return CompoundComparison(self, 'not in', subselect)
+
+if not hasattr(ExpressionSyntax, "NotIn"):
+    ExpressionSyntax.NotIn = NotIn
+
+
+class ObliterateService(Service, object):
+    """
+    Service which runs, does its stuff, then stops the reactor.
+    """
+
+    def __init__(self, store, options, output, reactor, config):
+        super(ObliterateService, self).__init__()
+        self.store = store
+        self.options = options
+        self.output = output
+        self.reactor = reactor
+        self.config = config
+
+        self.results = {}
+        self.summary = []
+        self.totalHomes = 0
+        self.totalCalendars = 0
+        self.totalResources = 0
+        self.attachments = set()
+
+
+    def startService(self):
+        """
+        Start the service.
+        """
+        super(ObliterateService, self).startService()
+        self.doObliterate()
+
+
+    @inlineCallbacks
+    def doObliterate(self):
+        """
+        Do the work, stopping the reactor when done.
+        """
+        self.output.write("\n---- Obliterate version: %s ----\n" % (VERSION,))
+        if self.options["dry-run"]:
+            self.output.write("---- DRY RUN No Changes Being Made ----\n")
+
+        try:
+            if self.options["fix-props"]:
+                yield self.obliterateOrphanedProperties()
+            else:
+                yield self.obliterateUUIDs()
+
+            self.output.close()
+        except ConfigError:
+            pass
+        except:
+            log.err()
+
+        self.reactor.stop()
+
+
+    @inlineCallbacks
+    def obliterateOrphanedProperties(self):
+        """
+        Obliterate orphaned data in RESOURCE_PROPERTIES table.
+        """
+        
+        # Get list of distinct resource_property resource_ids to delete
+        self.txn = self.store.newTransaction()
+        
+        ch = schema.CALENDAR_HOME
+        ca = schema.CALENDAR
+        co = schema.CALENDAR_OBJECT
+        ah = schema.ADDRESSBOOK_HOME
+        aa = schema.ADDRESSBOOK
+        ao = schema.ADDRESSBOOK_OBJECT
+        rp = schema.RESOURCE_PROPERTY
+
+        rows = (yield Select(
+            [rp.RESOURCE_ID, ],
+            Distinct=True,
+            From=rp,
+            Where=(rp.RESOURCE_ID.NotIn(
+                Select(
+                    [ch.RESOURCE_ID],
+                    From=ch,
+                    SetExpression=Union(
+                        Select(
+                            [ca.RESOURCE_ID],
+                            From=ca,
+                            SetExpression=Union(
+                                Select(
+                                    [co.RESOURCE_ID],
+                                    From=co,
+                                    SetExpression=Union(
+                                        Select(
+                                            [ah.RESOURCE_ID],
+                                            From=ah,
+                                            SetExpression=Union(
+                                                Select(
+                                                    [aa.RESOURCE_ID],
+                                                    From=aa,
+                                                    SetExpression=Union(
+                                                        Select(
+                                                            [ao.RESOURCE_ID],
+                                                            From=ao,
+                                                        ),
+                                                    ),
+                                                ),
+                                            ),
+                                        ),
+                                    ),
+                                ),
+                            ),
+                        ),
+                    ),
+                ),
+            ))
+        ).on(self.txn))
+
+        if not rows:
+            self.output.write("No orphaned resource properties\n")
+            returnValue(None)
+
+        resourceIDs = [row[0] for row in rows]
+        resourceIDs_len = len(resourceIDs)
+        t = time.time()
+        for ctr, resourceID in enumerate(resourceIDs):
+            self.output.write("%d of %d (%d%%): ResourceID: %s\n" % (
+                ctr + 1,
+                resourceIDs_len,
+                ((ctr + 1) * 100 / resourceIDs_len),
+                resourceID,
+            ))
+
+            yield self.removePropertiesForResourceID(resourceID)
+
+            # Commit every 10 DELETEs
+            if divmod(ctr + 1, 10)[1] == 0:
+                yield self.txn.commit()
+                self.txn = self.store.newTransaction()
+
+        yield self.txn.commit()
+        self.txn = None
+
+        self.output.write("Obliteration time: %.1fs\n" % (time.time() - t,))
+
+
+    @inlineCallbacks
+    def obliterateUUIDs(self):
+        """
+        Obliterate specified UUIDs.
+        """
+        if self.options["uuid"]:
+            uuids = [self.options["uuid"], ]
+        elif self.options["data"]:
+            if not os.path.exists(self.options["data"]):
+                self.output.write("%s is not a valid file\n" % (self.options["data"],))
+                raise ConfigError
+
+            uuids = open(self.options["data"]).read().split()
+        else:
+            self.output.write("One of --data or --uuid must be specified\n")
+            raise ConfigError
+
+        t = time.time()
+        uuids_len = len(uuids)
+        for ctr, uuid in enumerate(uuids):
+            self.txn = self.store.newTransaction()
+            self.output.write("%d of %d (%d%%): UUID: %s - " % (
+                ctr + 1,
+                uuids_len,
+                ((ctr + 1) * 100 / uuids_len),
+                uuid,
+            ))
+            result = (yield self.processUUID(uuid))
+            self.output.write("%s\n" % (result,))
+            yield self.txn.commit()
+            self.txn = None
+
+        self.output.write("\nTotal Homes: %d\n" % (self.totalHomes,))
+        self.output.write("Total Calendars: %d\n" % (self.totalCalendars,))
+        self.output.write("Total Resources: %d\n" % (self.totalResources,))
+        if self.attachments:
+            self.output.write("Attachments removed: %s\n" % (len(self.attachments,)))
+            #for attachment in self.attachments:
+            #    self.output.write("    %s\n" % (attachment,))
+        self.output.write("Obliteration time: %.1fs\n" % (time.time() - t,))
+
+
+    @inlineCallbacks
+    def processUUID(self, uuid):
+
+        # Get the resource-id for the home
+        ch = schema.CALENDAR_HOME
+        kwds = { "UUID" : uuid }
+        rows = (yield Select(
+            [ch.RESOURCE_ID, ],
+            From=ch,
+            Where=(
+                ch.OWNER_UID == Parameter("UUID")
+            ),
+        ).on(self.txn, **kwds))
+
+        if not rows:
+            returnValue("No home found")
+        homeID = rows[0][0]
+        self.totalHomes += 1
+
+        # Count resources
+        resourceCount = (yield self.countResources(uuid))
+        self.totalResources += resourceCount
+
+        # Remove revisions - do before deleting calendars to remove
+        # foreign key constraint
+        yield self.removeRevisionsForResourceID(homeID)
+
+        # Look at each calendar and unbind/delete-if-owned
+        count = (yield self.deleteCalendars(homeID))
+        self.totalCalendars += count
+
+        # Remove properties
+        yield self.removePropertiesForResourceID(homeID)
+
+        # Remove notifications
+        yield self.removeNotificationsForUUID(uuid)
+
+        # Remove attachments
+        attachmentCount = (yield self.removeAttachments(homeID))
+
+        # Now remove the home
+        yield self.removeHomeForResourceID(homeID)
+
+        returnValue("Home, %d calendars, %d resources%s - deleted" % (
+            count,
+            resourceCount,
+            (", %d attachmensts" % (attachmentCount,)) if attachmentCount else "",
+        ))
+
+
+    @inlineCallbacks
+    def countResources(self, uuid):
+        ch = schema.CALENDAR_HOME
+        cb = schema.CALENDAR_BIND
+        co = schema.CALENDAR_OBJECT
+        kwds = { "UUID" : uuid }
+        rows = (yield Select(
+            [
+                Count(co.RESOURCE_ID),
+            ],
+            From=ch.join(
+                cb, type="inner", on=(ch.RESOURCE_ID == cb.CALENDAR_HOME_RESOURCE_ID).And(
+                    cb.BIND_MODE == _BIND_MODE_OWN)).join(
+                co, type="left", on=(cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID)),
+            Where=(
+                ch.OWNER_UID == Parameter("UUID")
+            ),
+        ).on(self.txn, **kwds))
+
+        returnValue(rows[0][0] if rows else 0)
+
+
+    @inlineCallbacks
+    def deleteCalendars(self, homeID):
+
+        # Get list of binds and bind mode
+        cb = schema.CALENDAR_BIND
+        kwds = { "resourceID" : homeID }
+        rows = (yield Select(
+            [cb.CALENDAR_RESOURCE_ID, cb.BIND_MODE, ],
+            From=cb,
+            Where=(
+                cb.CALENDAR_HOME_RESOURCE_ID == Parameter("resourceID")
+            ),
+        ).on(self.txn, **kwds))
+        if not rows:
+            returnValue(0)
+
+        for resourceID, mode in rows:
+            if mode == _BIND_MODE_OWN:
+                yield self.deleteCalendar(resourceID)
+            else:
+                yield self.deleteBind(homeID, resourceID)
+
+        returnValue(len(rows))
+
+
+    @inlineCallbacks
+    def deleteCalendar(self, resourceID):
+
+        # Delete the CALENDAR entry (will cascade to CALENDAR_BIND and CALENDAR_OBJECT)
+        if not self.options["dry-run"]:
+            ca = schema.CALENDAR
+            kwds = {
+                "ResourceID" : resourceID,
+            }
+            yield Delete(
+                From=ca,
+                Where=(
+                    ca.RESOURCE_ID == Parameter("ResourceID")
+                ),
+            ).on(self.txn, **kwds)
+
+        # Remove properties
+        yield self.removePropertiesForResourceID(resourceID)
+
+
+    @inlineCallbacks
+    def deleteBind(self, homeID, resourceID):
+        if not self.options["dry-run"]:
+            cb = schema.CALENDAR_BIND
+            kwds = {
+                "HomeID" : homeID,
+                "ResourceID" : resourceID,
+            }
+            yield Delete(
+                From=cb,
+                Where=(
+                    (cb.CALENDAR_HOME_RESOURCE_ID == Parameter("HomeID")).And
+                    (cb.CALENDAR_RESOURCE_ID == Parameter("ResourceID"))
+                ),
+            ).on(self.txn, **kwds)
+
+
+    @inlineCallbacks
+    def removeRevisionsForResourceID(self, resourceID):
+        if not self.options["dry-run"]:
+            rev = schema.CALENDAR_OBJECT_REVISIONS
+            kwds = { "ResourceID" : resourceID }
+            yield Delete(
+                From=rev,
+                Where=(
+                    rev.CALENDAR_HOME_RESOURCE_ID == Parameter("ResourceID")
+                ),
+            ).on(self.txn, **kwds)
+
+
+    @inlineCallbacks
+    def removePropertiesForResourceID(self, resourceID):
+        if not self.options["dry-run"]:
+            props = schema.RESOURCE_PROPERTY
+            kwds = { "ResourceID" : resourceID }
+            yield Delete(
+                From=props,
+                Where=(
+                    props.RESOURCE_ID == Parameter("ResourceID")
+                ),
+            ).on(self.txn, **kwds)
+
+
+    @inlineCallbacks
+    def removeNotificationsForUUID(self, uuid):
+
+        # Get NOTIFICATION_HOME.RESOURCE_ID
+        nh = schema.NOTIFICATION_HOME
+        kwds = { "UUID" : uuid }
+        rows = (yield Select(
+            [nh.RESOURCE_ID, ],
+            From=nh,
+            Where=(
+                nh.OWNER_UID == Parameter("UUID")
+            ),
+        ).on(self.txn, **kwds))
+
+        if rows:
+            resourceID = rows[0][0]
+
+            # Delete NOTIFICATION rows
+            if not self.options["dry-run"]:
+                no = schema.NOTIFICATION
+                kwds = { "ResourceID" : resourceID }
+                yield Delete(
+                    From=no,
+                    Where=(
+                        no.NOTIFICATION_HOME_RESOURCE_ID == Parameter("ResourceID")
+                    ),
+                ).on(self.txn, **kwds)
+
+            # Delete NOTIFICATION_HOME (will cascade to NOTIFICATION_OBJECT_REVISIONS)
+            if not self.options["dry-run"]:
+                kwds = { "UUID" : uuid }
+                yield Delete(
+                    From=nh,
+                    Where=(
+                        nh.OWNER_UID == Parameter("UUID")
+                    ),
+                ).on(self.txn, **kwds)
+
+
+    @inlineCallbacks
+    def removeAttachments(self, resourceID):
+
+        # Get ATTACHMENT paths
+        at = schema.ATTACHMENT
+        kwds = { "resourceID" : resourceID }
+        rows = (yield Select(
+            [at.PATH, ],
+            From=at,
+            Where=(
+                at.CALENDAR_HOME_RESOURCE_ID == Parameter("resourceID")
+            ),
+        ).on(self.txn, **kwds))
+
+        if rows:
+            self.attachments.update([row[0] for row in rows])
+
+            # Delete ATTACHMENT rows
+            if not self.options["dry-run"]:
+                at = schema.ATTACHMENT
+                kwds = { "resourceID" : resourceID }
+                yield Delete(
+                    From=at,
+                    Where=(
+                        at.CALENDAR_HOME_RESOURCE_ID == Parameter("resourceID")
+                    ),
+                ).on(self.txn, **kwds)
+        
+        returnValue(len(rows) if rows else 0)
+
+
+    @inlineCallbacks
+    def removeHomeForResourceID(self, resourceID):
+        if not self.options["dry-run"]:
+            ch = schema.CALENDAR_HOME
+            kwds = { "ResourceID" : resourceID }
+            yield Delete(
+                From=ch,
+                Where=(
+                    ch.RESOURCE_ID == Parameter("ResourceID")
+                ),
+            ).on(self.txn, **kwds)
+
+
+    def stopService(self):
+        """
+        Stop the service.  Nothing to do; everything should be finished by this
+        time.
+        """
+        # TODO: stopping this service mid-export should really stop the export
+        # loop, but this is not implemented because nothing will actually do it
+        # except hitting ^C (which also calls reactor.stop(), so that will exit
+        # anyway).
+
+
+
+def main(argv=sys.argv, stderr=sys.stderr, reactor=None):
+    """
+    Do the export.
+    """
+    if reactor is None:
+        from twisted.internet import reactor
+    options = ObliterateOptions()
+    options.parseOptions(argv[1:])
+    try:
+        output = options.openOutput()
+    except IOError, e:
+        stderr.write("Unable to open output file for writing: %s\n" % (e))
+        sys.exit(1)
+
+    def makeService(store):
+        from twistedcaldav.config import config
+        config.TransactionTimeoutSeconds = 0
+        return ObliterateService(store, options, output, reactor, config)
+
+    utilityMain(options['config'], makeService, reactor)
+
+if __name__ == '__main__':
+    main()


Property changes on: CalendarServer/trunk/calendarserver/tools/obliterate.py
___________________________________________________________________
Added: svn:executable
   + *
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20120517/48747047/attachment-0001.html>


More information about the calendarserver-changes mailing list