[CalendarServer-changes] [9360] CalendarServer/trunk/calendarserver/tools/migrate_verify.py

source_changes at macosforge.org source_changes at macosforge.org
Thu Jun 14 15:57:09 PDT 2012


Revision: 9360
          http://trac.macosforge.org/projects/calendarserver/changeset/9360
Author:   cdaboo at apple.com
Date:     2012-06-14 15:57:07 -0700 (Thu, 14 Jun 2012)
Log Message:
-----------
Tool to help verify that a migration was successful.

Added Paths:
-----------
    CalendarServer/trunk/calendarserver/tools/migrate_verify.py

Added: CalendarServer/trunk/calendarserver/tools/migrate_verify.py
===================================================================
--- CalendarServer/trunk/calendarserver/tools/migrate_verify.py	                        (rev 0)
+++ CalendarServer/trunk/calendarserver/tools/migrate_verify.py	2012-06-14 22:57:07 UTC (rev 9360)
@@ -0,0 +1,389 @@
+#!/usr/bin/env python
+# -*- test-case-name: calendarserver.tools.test.test_calverify -*-
+##
+# Copyright (c) 2012 Apple Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##
+from txdav.common.datastore.sql_tables import schema, _BIND_MODE_OWN
+from twext.enterprise.dal.syntax import Select, Parameter
+
+"""
+This tool takes a list of files paths from a file store being migrated
+and compares that to the results of a migration to an SQL store. Items
+not migrated are logged.
+"""
+
+from calendarserver.tools.cmdline import utilityMain
+from twisted.application.service import Service
+from twisted.internet.defer import inlineCallbacks, returnValue
+from twisted.python import log
+from twisted.python.text import wordWrap
+from twisted.python.usage import Options
+from twistedcaldav.stdconfig import DEFAULT_CONFIG_FILE
+import os
+import sys
+
+VERSION = "1"
+
+def usage(e=None):
+    if e:
+        print e
+        print ""
+    try:
+        MigrateVerifyOptions().opt_help()
+    except SystemExit:
+        pass
+    if e:
+        sys.exit(64)
+    else:
+        sys.exit(0)
+
+
+description = ''.join(
+    wordWrap(
+        """
+        Usage: calendarserver_migrate_verify [options] [input specifiers]
+        """,
+        int(os.environ.get('COLUMNS', '80'))
+    )
+)
+description += "\nVersion: %s" % (VERSION,)
+
+
+
+class ConfigError(Exception):
+    pass
+
+
+
+class MigrateVerifyOptions(Options):
+    """
+    Command-line options for 'calendarserver_migrate_verify'
+    """
+
+    synopsis = description
+
+    optFlags = [
+    ]
+
+    optParameters = [
+        ['config', 'f', DEFAULT_CONFIG_FILE, "Specify caldavd.plist configuration path."],
+        ['data', 'd', "./paths.txt", "List of file paths for migrated data."],
+    ]
+
+
+    def __init__(self):
+        super(MigrateVerifyOptions, self).__init__()
+        self.outputName = '-'
+
+
+    def opt_output(self, filename):
+        """
+        Specify output file path (default: '-', meaning stdout).
+        """
+        self.outputName = filename
+
+    opt_o = opt_output
+
+
+    def openOutput(self):
+        """
+        Open the appropriate output file based on the '--output' option.
+        """
+        if self.outputName == '-':
+            return sys.stdout
+        else:
+            return open(self.outputName, 'wb')
+
+
+class MigrateVerifyService(Service, object):
+    """
+    Service which runs, does its stuff, then stops the reactor.
+    """
+
+    def __init__(self, store, options, output, reactor, config):
+        super(MigrateVerifyService, self).__init__()
+        self.store = store
+        self.options = options
+        self.output = output
+        self.reactor = reactor
+        self.config = config
+
+        self.pathsByGUID = {}
+        self.badPaths = []
+        self.validPaths = 0
+        self.ignoreInbox = 0
+        self.ignoreDropbox = 0
+        self.missingGUIDs = []
+        self.missingCalendars = []
+        self.missingResources = []
+
+    def startService(self):
+        """
+        Start the service.
+        """
+        super(MigrateVerifyService, self).startService()
+        self.doMigrateVerify()
+
+
+    @inlineCallbacks
+    def doMigrateVerify(self):
+        """
+        Do the work, stopping the reactor when done.
+        """
+        self.output.write("\n---- Migrate Verify version: %s ----\n" % (VERSION,))
+
+        try:
+            self.readPaths()
+            yield self.doCheck()
+            self.output.close()
+        except ConfigError:
+            pass
+        except:
+            log.err()
+
+        self.reactor.stop()
+
+
+    def readPaths(self):
+        
+        self.output.write("-- Reading data file: %s\n" % (self.options["data"]))
+
+        datafile = open(os.path.expanduser(self.options["data"]))
+        total = 0
+        invalidGUIDs = set()
+        for line in datafile:
+            line = line.strip()
+            total += 1
+            segments = line.split("/")
+            while segments and segments[0] != "__uids__":
+                segments.pop(0)
+            if segments and len(segments) >= 6:
+                guid = segments[3]
+                calendar = segments[4]
+                resource = segments[5]
+
+                if calendar == "inbox":
+                    self.ignoreInbox += 1
+                    invalidGUIDs.add(guid)
+                elif calendar == "dropbox":
+                    self.ignoreDropbox += 1
+                    invalidGUIDs.add(guid)
+                elif len(segments) > 6:
+                    self.badPaths.append(line)
+                    invalidGUIDs.add(guid)
+                else:                
+                    self.pathsByGUID.setdefault(guid, {}).setdefault(calendar, set()).add(resource)
+                    self.validPaths += 1
+            else:
+                if segments and len(segments) >= 4:
+                    invalidGUIDs.add(segments[3])
+                self.badPaths.append(line)
+
+        # Remove any invalid GUIDs that actuall were valid
+        invalidGUIDs = [guid for guid in invalidGUIDs if guid not in self.pathsByGUID]
+
+        self.output.write("\nTotal lines read: %d\n" % (total,))
+        self.output.write("Total guids: valid: %d  invalid: %d  overall: %d\n" % (
+            len(self.pathsByGUID),
+            len(invalidGUIDs),
+            len(self.pathsByGUID) + len(invalidGUIDs),
+        ))
+        self.output.write("Total valid calendars: %d\n" % (sum([len(v) for v in self.pathsByGUID.values()]),))
+        self.output.write("Total valid resources: %d\n" % (self.validPaths,))
+        self.output.write("Total inbox resources: %d\n" % (self.ignoreInbox,))
+        self.output.write("Total dropbox resources: %d\n" % (self.ignoreDropbox,))
+        self.output.write("Total bad paths: %d\n" % (len(self.badPaths),))
+
+        self.output.write("\n-- Invalid GUIDs\n")
+        for invalidGUID in sorted(invalidGUIDs):
+            self.output.write("Invalid GUID: %s\n" % (invalidGUID,))
+
+
+        self.output.write("\n-- Bad paths\n")
+        for badPath in sorted(self.badPaths):
+            self.output.write("Bad path: %s\n" % (badPath,))
+
+    @inlineCallbacks
+    def doCheck(self):
+        """
+        Check path data against the SQL store.
+        """
+        
+        self.output.write("\n-- Scanning database for missed migrations\n")
+
+        # Get list of distinct resource_property resource_ids to delete
+        self.txn = self.store.newTransaction()
+        
+        total = len(self.pathsByGUID)
+        totalMissingCalendarResources = 0
+        count = 0
+        for guid in self.pathsByGUID:
+            
+            if divmod(count, 10)[1] == 0:
+                self.output.write(("\r%d of %d (%d%%)" % (
+                    count,
+                    total,
+                    (count * 100 / total),
+                )).ljust(80))
+                self.output.flush()
+
+            # First check the presence of each guid and the calendar count
+            homeID = (yield self.guid2ResourceID(guid))
+            if homeID is None:
+                self.missingGUIDs.append(guid)
+                continue
+            
+            # Now get the list of calendar names and calendar resource IDs
+            results = (yield self.calendarsForUser(homeID))
+            if results is None:
+                results = []
+            calendars = dict(results)
+            for calendar in self.pathsByGUID[guid].keys():
+                if calendar not in calendars:
+                    self.missingCalendars.append("%s/%s (resources: %d)" % (guid, calendar, len(self.pathsByGUID[guid][calendar])))
+                    totalMissingCalendarResources += len(self.pathsByGUID[guid][calendar])
+                else:
+                    # Now get list of all calendar resources
+                    results = (yield self.resourcesForCalendar(calendars[calendar]))
+                    if results is None:
+                        results = []
+                    results = [result[0] for result in results]
+                    db_resources = set(results)
+                    
+                    # Also check for split calendar
+                    if "%s-vtodo" % (calendar,) in calendars:
+                        results = (yield self.resourcesForCalendar(calendars["%s-vtodo" % (calendar,)]))
+                        if results is None:
+                            results = []
+                        results = [result[0] for result in results]
+                        db_resources.update(results)
+                    
+                    # Also check for split calendar
+                    if "%s-vevent" % (calendar,) in calendars:
+                        results = (yield self.resourcesForCalendar(calendars["%s-vevent" % (calendar,)]))
+                        if results is None:
+                            results = []
+                        results = [result[0] for result in results]
+                        db_resources.update(results)
+                    
+                    old_resources = set(self.pathsByGUID[guid][calendar])
+                    self.missingResources.extend(["%s/%s/%s" % (guid, calendar, resource,) for resource in old_resources.difference(db_resources)])
+
+            # Commit every 10 time through
+            if divmod(count + 1, 10)[1] == 0:
+                yield self.txn.commit()
+                self.txn = self.store.newTransaction()
+            
+            count += 1
+
+        yield self.txn.commit()
+        self.txn = None
+
+        self.output.write("\n\nTotal missing GUIDs: %d\n" % (len(self.missingGUIDs),))
+        for guid in sorted(self.missingGUIDs):
+            self.output.write("%s\n" % (guid,))
+
+        self.output.write("\nTotal missing Calendars: %d (resources: %d)\n" % (len(self.missingCalendars), totalMissingCalendarResources,))
+        for calendar in sorted(self.missingCalendars):
+            self.output.write("%s\n" % (calendar,))
+
+        self.output.write("\nTotal missing Resources: %d\n" % (len(self.missingResources),))
+        for resource in sorted(self.missingResources):
+            self.output.write("%s\n" % (resource,))
+                    
+
+    @inlineCallbacks
+    def guid2ResourceID(self, guid):
+        ch = schema.CALENDAR_HOME
+        kwds = { "GUID" : guid }
+        rows = (yield Select(
+            [
+                ch.RESOURCE_ID,
+            ],
+            From=ch,
+            Where=(
+                ch.OWNER_UID == Parameter("GUID")
+            ),
+        ).on(self.txn, **kwds))
+
+        returnValue(rows[0][0] if rows else None)
+
+
+    @inlineCallbacks
+    def calendarsForUser(self, rid):
+        cb = schema.CALENDAR_BIND
+        kwds = { "RID" : rid }
+        rows = (yield Select(
+            [
+                cb.CALENDAR_RESOURCE_NAME,
+                cb.CALENDAR_RESOURCE_ID,
+            ],
+            From=cb,
+            Where=(
+                cb.CALENDAR_HOME_RESOURCE_ID == Parameter("RID")
+            ).And(cb.BIND_MODE == _BIND_MODE_OWN),
+        ).on(self.txn, **kwds))
+
+        returnValue(rows)
+
+    
+    @inlineCallbacks
+    def resourcesForCalendar(self, rid):
+        co = schema.CALENDAR_OBJECT
+        kwds = { "RID" : rid }
+        rows = (yield Select(
+            [
+                co.RESOURCE_NAME,
+            ],
+            From=co,
+            Where=(
+                co.CALENDAR_RESOURCE_ID == Parameter("RID")
+            ),
+        ).on(self.txn, **kwds))
+
+        returnValue(rows)
+
+    
+    def stopService(self):
+        """
+        Stop the service.  Nothing to do; everything should be finished by this
+        time.
+        """
+
+
+
+def main(argv=sys.argv, stderr=sys.stderr, reactor=None):
+    """
+    Do the export.
+    """
+    if reactor is None:
+        from twisted.internet import reactor
+    options = MigrateVerifyOptions()
+    options.parseOptions(argv[1:])
+    try:
+        output = options.openOutput()
+    except IOError, e:
+        stderr.write("Unable to open output file for writing: %s\n" % (e))
+        sys.exit(1)
+
+    def makeService(store):
+        from twistedcaldav.config import config
+        config.TransactionTimeoutSeconds = 0
+        return MigrateVerifyService(store, options, output, reactor, config)
+
+    utilityMain(options['config'], makeService, reactor)
+
+if __name__ == '__main__':
+    main()


Property changes on: CalendarServer/trunk/calendarserver/tools/migrate_verify.py
___________________________________________________________________
Added: svn:executable
   + *
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20120614/a5e19f6a/attachment-0001.html>


More information about the calendarserver-changes mailing list