[CalendarServer-changes] [14970] CalendarServer/trunk/calendarserver/tools

source_changes at macosforge.org source_changes at macosforge.org
Wed Jul 15 10:25:11 PDT 2015


Revision: 14970
          http://trac.calendarserver.org//changeset/14970
Author:   cdaboo at apple.com
Date:     2015-07-15 10:25:10 -0700 (Wed, 15 Jul 2015)
Log Message:
-----------
Merge old event purge changes from -5.4-dev branch.

Modified Paths:
--------------
    CalendarServer/trunk/calendarserver/tools/gateway.py
    CalendarServer/trunk/calendarserver/tools/purge.py
    CalendarServer/trunk/calendarserver/tools/test/test_purge_old_events.py

Modified: CalendarServer/trunk/calendarserver/tools/gateway.py
===================================================================
--- CalendarServer/trunk/calendarserver/tools/gateway.py	2015-07-15 16:10:33 UTC (rev 14969)
+++ CalendarServer/trunk/calendarserver/tools/gateway.py	2015-07-15 17:25:10 UTC (rev 14970)
@@ -577,7 +577,7 @@
         cutoff = DateTime.getToday()
         cutoff.setDateOnly(False)
         cutoff.offsetDay(-retainDays)
-        eventCount = (yield PurgeOldEventsService.purgeOldEvents(self.store, cutoff, DEFAULT_BATCH_SIZE))
+        eventCount = (yield PurgeOldEventsService.purgeOldEvents(self.store, None, cutoff, DEFAULT_BATCH_SIZE))
         self.respond(command, {'EventsRemoved': eventCount, "RetainDays": retainDays})
 
 

Modified: CalendarServer/trunk/calendarserver/tools/purge.py
===================================================================
--- CalendarServer/trunk/calendarserver/tools/purge.py	2015-07-15 16:10:33 UTC (rev 14969)
+++ CalendarServer/trunk/calendarserver/tools/purge.py	2015-07-15 17:25:10 UTC (rev 14970)
@@ -29,7 +29,7 @@
 from pycalendar.datetime import DateTime
 
 from twext.enterprise.dal.record import fromTable
-from twext.enterprise.dal.syntax import Delete, Select, Union
+from twext.enterprise.dal.syntax import Delete, Select, Union, Parameter, Max
 from twext.enterprise.jobqueue import WorkItem, RegeneratingWorkItem
 from twext.python.log import Logger
 
@@ -37,9 +37,11 @@
 
 from twistedcaldav import caldavxml
 from twistedcaldav.config import config
+from twistedcaldav.dateops import parseSQLDateToPyCalendar, pyCalendarToSQLTimestamp
+from twistedcaldav.ical import Component, InvalidICalendarDataError
 
 from txdav.caldav.datastore.query.filter import Filter
-from txdav.common.datastore.sql_tables import schema, _HOME_STATUS_NORMAL
+from txdav.common.datastore.sql_tables import schema, _HOME_STATUS_NORMAL, _BIND_MODE_OWN
 
 log = Logger()
 
@@ -277,10 +279,11 @@
 
 class PurgeOldEventsService(WorkerService):
 
+    uuid = None
     cutoff = None
     batchSize = None
     dryrun = False
-    verbose = False
+    debug = False
 
     @classmethod
     def usage(cls, e=None):
@@ -293,8 +296,8 @@
         print("options:")
         print("  -h --help: print this help and exit")
         print("  -f --config <path>: Specify caldavd.plist configuration path")
+        print("  -u --uuid <uuid>: Only process this user(s) [REQUIRED]")
         print("  -d --days <number>: specify how many days in the past to retain (default=%d)" % (DEFAULT_RETAIN_DAYS,))
-        # print("  -b --batch <number>: number of events to remove in each transaction (default=%d)" % (DEFAULT_BATCH_SIZE,))
         print("  -n --dry-run: calculate how many events to purge, but do not purge data")
         print("  -v --verbose: print progress information")
         print("  -D --debug: debug logging")
@@ -312,11 +315,12 @@
 
         try:
             (optargs, args) = getopt(
-                sys.argv[1:], "Dd:b:f:hnv", [
+                sys.argv[1:], "Dd:b:f:hnu:v", [
                     "days=",
                     "batch=",
                     "dry-run",
                     "config=",
+                    "uuid=",
                     "help",
                     "verbose",
                     "debug",
@@ -329,6 +333,7 @@
         # Get configuration
         #
         configFileName = None
+        uuid = None
         days = DEFAULT_RETAIN_DAYS
         batchSize = DEFAULT_BATCH_SIZE
         dryrun = False
@@ -365,12 +370,19 @@
             elif opt in ("-f", "--config"):
                 configFileName = arg
 
+            elif opt in ("-u", "--uuid"):
+                uuid = arg
+
             else:
                 raise NotImplementedError(opt)
 
         if args:
             cls.usage("Too many arguments: %s" % (args,))
 
+        if uuid is None:
+            cls.usage("uuid must be specified")
+        cls.uuid = uuid
+
         if dryrun:
             verbose = True
 
@@ -380,68 +392,321 @@
         cls.cutoff = cutoff
         cls.batchSize = batchSize
         cls.dryrun = dryrun
-        cls.verbose = verbose
+        cls.debug = debug
 
         utilityMain(
             configFileName,
             cls,
-            verbose=debug,
+            verbose=verbose,
         )
 
 
     @classmethod
     @inlineCallbacks
-    def purgeOldEvents(cls, store, cutoff, batchSize, verbose=False, dryrun=False):
+    def purgeOldEvents(cls, store, uuid, cutoff, batchSize, debug=False, dryrun=False):
 
         service = cls(store)
+        service.uuid = uuid
         service.cutoff = cutoff
         service.batchSize = batchSize
         service.dryrun = dryrun
-        service.verbose = verbose
+        service.debug = debug
         result = yield service.doWork()
         returnValue(result)
 
 
     @inlineCallbacks
+    def getMatchingHomeUIDs(self):
+        """
+        Find all the calendar homes that match the uuid cli argument.
+        """
+        log.debug("Searching for calendar homes matching: '{}'".format(self.uuid))
+        txn = self.store.newTransaction(label="Find matching homes")
+        ch = schema.CALENDAR_HOME
+        if self.uuid:
+            kwds = {"uuid": self.uuid}
+            rows = (yield Select(
+                [ch.RESOURCE_ID, ch.OWNER_UID, ],
+                From=ch,
+                Where=(ch.OWNER_UID.StartsWith(Parameter("uuid"))),
+            ).on(txn, **kwds))
+        else:
+            rows = (yield Select(
+                [ch.RESOURCE_ID, ch.OWNER_UID, ],
+                From=ch,
+            ).on(txn))
+
+        yield txn.commit()
+        log.debug("  Found {} calendar homes".format(len(rows)))
+        returnValue(sorted(rows, key=lambda x: x[1]))
+
+
+    @inlineCallbacks
+    def getMatchingCalendarIDs(self, home_id, owner_uid):
+        """
+        Find all the owned calendars for the specified calendar home.
+
+        @param home_id: resource-id of calendar home to check
+        @type home_id: L{int}
+        @param owner_uid: owner UUID of home to check
+        @type owner_uid: L{str}
+        """
+        log.debug("Checking calendar home: {} '{}'".format(home_id, owner_uid))
+        txn = self.store.newTransaction(label="Find matching calendars")
+        cb = schema.CALENDAR_BIND
+        kwds = {"home_id": home_id}
+        rows = (yield Select(
+            [cb.CALENDAR_RESOURCE_ID, cb.CALENDAR_RESOURCE_NAME, ],
+            From=cb,
+            Where=(cb.CALENDAR_HOME_RESOURCE_ID == Parameter("home_id")).And(
+                cb.BIND_MODE == _BIND_MODE_OWN
+            ),
+        ).on(txn, **kwds))
+        yield txn.commit()
+        log.debug("  Found {} calendars".format(len(rows)))
+        returnValue(rows)
+
+
+    PurgeEvent = collections.namedtuple("PurgeEvent", ("home", "calendar", "resource",))
+
+    @inlineCallbacks
+    def getResourceIDsToPurge(self, home_id, calendar_id, calendar_name):
+        """
+        For the given calendar find which calendar objects are older than the cut-off and return the
+        resource-ids of those.
+
+        @param home_id: resource-id of calendar home
+        @type home_id: L{int}
+        @param calendar_id: resource-id of the calendar to check
+        @type calendar_id: L{int}
+        @param calendar_name: name of the calendar to check
+        @type calendar_name: L{str}
+        """
+
+        log.debug("  Checking calendar: {} '{}'".format(calendar_id, calendar_name))
+        purge = set()
+        txn = self.store.newTransaction(label="Find matching resources")
+        co = schema.CALENDAR_OBJECT
+        tr = schema.TIME_RANGE
+        kwds = {"calendar_id": calendar_id}
+        rows = (yield Select(
+            [co.RESOURCE_ID, co.RECURRANCE_MAX, co.RECURRANCE_MIN, Max(tr.END_DATE)],
+            From=co.join(tr, on=(co.RESOURCE_ID == tr.CALENDAR_OBJECT_RESOURCE_ID)),
+            Where=(co.CALENDAR_RESOURCE_ID == Parameter("calendar_id")).And(
+                co.ICALENDAR_TYPE == "VEVENT"
+            ),
+            GroupBy=(co.RESOURCE_ID, co.RECURRANCE_MAX, co.RECURRANCE_MIN,),
+            Having=(
+                (co.RECURRANCE_MAX == None).And(Max(tr.END_DATE) < pyCalendarToSQLTimestamp(self.cutoff))
+            ).Or(
+                (co.RECURRANCE_MAX != None).And(co.RECURRANCE_MAX < pyCalendarToSQLTimestamp(self.cutoff))
+            ),
+        ).on(txn, **kwds))
+
+        log.debug("    Found {} resources to check".format(len(rows)))
+        for resource_id, recurrence_max, recurrence_min, max_end_date in rows:
+
+            recurrence_max = parseSQLDateToPyCalendar(recurrence_max) if recurrence_max else None
+            recurrence_min = parseSQLDateToPyCalendar(recurrence_min) if recurrence_min else None
+            max_end_date = parseSQLDateToPyCalendar(max_end_date) if max_end_date else None
+
+            # Find events where we know the max(end_date) represents a valid,
+            # untruncated expansion
+            if recurrence_min is None or recurrence_min < self.cutoff:
+                if recurrence_max is None:
+                    # Here we know max_end_date is the fully expand final instance
+                    if max_end_date < self.cutoff:
+                        purge.add(self.PurgeEvent(home_id, calendar_id, resource_id,))
+                    continue
+                elif recurrence_max > self.cutoff:
+                    # Here we know that there are instances newer than the cut-off
+                    # but they have not yet been indexed out that far
+                    continue
+
+            # Manually detect the max_end_date from the actual calendar data
+            calendar = yield self.getCalendar(txn, resource_id)
+            if calendar is not None:
+                if self.checkLastInstance(calendar):
+                    purge.add(self.PurgeEvent(home_id, calendar_id, resource_id,))
+
+        yield txn.commit()
+        log.debug("    Found {} resources to purge".format(len(purge)))
+        returnValue(purge)
+
+
+    @inlineCallbacks
+    def getCalendar(self, txn, resid):
+        """
+        Get the calendar data for a calendar object resource.
+
+        @param resid: resource-id of the calendar object resource to load
+        @type resid: L{int}
+        """
+        co = schema.CALENDAR_OBJECT
+        kwds = {"ResourceID" : resid}
+        rows = (yield Select(
+            [co.ICALENDAR_TEXT],
+            From=co,
+            Where=(
+                co.RESOURCE_ID == Parameter("ResourceID")
+            ),
+        ).on(txn, **kwds))
+        try:
+            caldata = Component.fromString(rows[0][0]) if rows else None
+        except InvalidICalendarDataError:
+            returnValue(None)
+
+        returnValue(caldata)
+
+
+    def checkLastInstance(self, calendar):
+        """
+        Determine the last instance of a calendar event. Try a "static" analysis of the data first,
+        and only if needed, do an instance expansion.
+
+        @param calendar: the calendar object to examine
+        @type calendar: L{Component}
+        """
+
+        # Is it recurring
+        master = calendar.masterComponent()
+        if not calendar.isRecurring() or master is None:
+            # Just check the end date
+            for comp in calendar.subcomponents():
+                if comp.name() == "VEVENT":
+                    if comp.getEndDateUTC() > self.cutoff:
+                        return False
+            else:
+                return True
+        elif calendar.isRecurringUnbounded():
+            return False
+        else:
+            # First test all sub-components
+            # Just check the end date
+            for comp in calendar.subcomponents():
+                if comp.name() == "VEVENT":
+                    if comp.getEndDateUTC() > self.cutoff:
+                        return False
+
+            # If we get here we need to test the RRULE - if there is an until use
+            # that as the end point, if a count, we have to expand
+            rrules = tuple(master.properties("RRULE"))
+            if len(rrules):
+                if rrules[0].value().getUseUntil():
+                    return rrules[0].value().getUntil() < self.cutoff
+                else:
+                    return not calendar.hasInstancesAfter(self.cutoff)
+
+        return True
+
+
+    @inlineCallbacks
+    def getResourcesToPurge(self, home_id, owner_uid):
+        """
+        Find all the resource-ids of calendar object resources that need to be purged in the specified home.
+
+        @param home_id: resource-id of calendar home to check
+        @type home_id: L{int}
+        @param owner_uid: owner UUID of home to check
+        @type owner_uid: L{str}
+        """
+
+        purge = set()
+        calendars = yield self.getMatchingCalendarIDs(home_id, owner_uid)
+        for calendar_id, calendar_name in calendars:
+            purge.update((yield self.getResourceIDsToPurge(home_id, calendar_id, calendar_name)))
+
+        returnValue(purge)
+
+
+    @inlineCallbacks
+    def purgeResources(self, events):
+        """
+        Remove up to batchSize events and return how
+        many were removed.
+        """
+
+        txn = self.store.newTransaction(label="Remove old events")
+        count = 0
+        last_home = None
+        last_calendar = None
+        for event in events:
+            if event.home != last_home:
+                home = (yield txn.calendarHomeWithResourceID(event.home))
+                last_home = event.home
+            if event.calendar != last_calendar:
+                calendar = (yield home.childWithID(event.calendar))
+                last_calendar = event.calendar
+            resource = (yield calendar.objectResourceWithID(event.resource))
+            yield resource.purge(implicitly=False)
+            log.debug("Removed resource {} '{}' from calendar {} '{}' of calendar home '{}'".format(
+                resource.id(),
+                resource.name(),
+                resource.parentCollection().id(),
+                resource.parentCollection().name(),
+                resource.parentCollection().ownerHome().uid()
+            ))
+            count += 1
+        yield txn.commit()
+        returnValue(count)
+
+
+    @inlineCallbacks
     def doWork(self):
 
+        if self.debug:
+            # Turn on debug logging for this module
+            config.LogLevels[__name__] = "debug"
+        else:
+            config.LogLevels[__name__] = "info"
+        config.update()
+
+        homes = yield self.getMatchingHomeUIDs()
+        if not homes:
+            log.info("No homes to process")
+            returnValue(0)
+
         if self.dryrun:
-            if self.verbose:
-                print("(Dry run) Searching for old events...")
-            txn = self.store.newTransaction(label="Find old events")
-            oldEvents = yield txn.eventsOlderThan(self.cutoff)
-            eventCount = len(oldEvents)
-            if self.verbose:
-                if eventCount == 0:
-                    print("No events are older than %s" % (self.cutoff,))
-                elif eventCount == 1:
-                    print("1 event is older than %s" % (self.cutoff,))
-                else:
-                    print("%d events are older than %s" % (eventCount, self.cutoff))
+            log.info("Purge dry run only")
+
+        log.info("Searching for old events...")
+
+        purge = set()
+        homes = yield self.getMatchingHomeUIDs()
+        for home_id, owner_uid in homes:
+            purge.update((yield self.getResourcesToPurge(home_id, owner_uid)))
+
+        if self.dryrun:
+            eventCount = len(purge)
+            if eventCount == 0:
+                log.info("No events are older than %s" % (self.cutoff,))
+            elif eventCount == 1:
+                log.info("1 event is older than %s" % (self.cutoff,))
+            else:
+                log.info("%d events are older than %s" % (eventCount, self.cutoff))
             returnValue(eventCount)
 
-        if self.verbose:
-            print("Removing events older than %s..." % (self.cutoff,))
+        purge = list(purge)
+        purge.sort()
+        totalEvents = len(purge)
 
+        log.info("Removing {} events older than {}...".format(len(purge), self.cutoff,))
+
         numEventsRemoved = -1
         totalRemoved = 0
         while numEventsRemoved:
-            txn = self.store.newTransaction(label="Remove old events")
-            numEventsRemoved = yield txn.removeOldEvents(self.cutoff, batchSize=self.batchSize)
-            yield txn.commit()
+            numEventsRemoved = (yield self.purgeResources(purge[:self.batchSize]))
             if numEventsRemoved:
                 totalRemoved += numEventsRemoved
-                if self.verbose:
-                    print("%d," % (totalRemoved,),)
+                log.debug("  Removed {} of {} events...".format(totalRemoved, totalEvents))
+                purge = purge[numEventsRemoved:]
 
-        if self.verbose:
-            print("")
-            if totalRemoved == 0:
-                print("No events were removed")
-            elif totalRemoved == 1:
-                print("1 event was removed in total")
-            else:
-                print("%d events were removed in total" % (totalRemoved,))
+        if totalRemoved == 0:
+            log.info("No events were removed")
+        elif totalRemoved == 1:
+            log.info("1 event was removed in total")
+        else:
+            log.info("%d events were removed in total" % (totalRemoved,))
 
         returnValue(totalRemoved)
 
@@ -671,20 +936,18 @@
             # Print table of results
             table = tables.Table()
             table.addHeader(("User", "Current Quota", "Orphan Size", "Orphan Count", "Dropbox Size", "Dropbox Count", "Managed Size", "Managed Count", "Total Size", "Total Count"))
-            table.setDefaultColumnFormats(
-                (
-                    tables.Table.ColumnFormat("%s", tables.Table.ColumnFormat.LEFT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                    tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
-                )
-            )
+            table.setDefaultColumnFormats((
+                tables.Table.ColumnFormat("%s", tables.Table.ColumnFormat.LEFT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+                tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+            ))
 
             totals = [0] * 8
             for user, data in sorted(byuser.items(), key=lambda x: x[0]):

Modified: CalendarServer/trunk/calendarserver/tools/test/test_purge_old_events.py
===================================================================
--- CalendarServer/trunk/calendarserver/tools/test/test_purge_old_events.py	2015-07-15 16:10:33 UTC (rev 14969)
+++ CalendarServer/trunk/calendarserver/tools/test/test_purge_old_events.py	2015-07-15 17:25:10 UTC (rev 14970)
@@ -642,33 +642,123 @@
         # Dry run
         total = (yield PurgeOldEventsService.purgeOldEvents(
             self._sqlCalendarStore,
+            None,
             DateTime(now, 4, 1, 0, 0, 0),
             2,
             dryrun=True,
-            verbose=False
+            debug=True
         ))
         self.assertEquals(total, 13)
 
         # Actually remove
         total = (yield PurgeOldEventsService.purgeOldEvents(
             self._sqlCalendarStore,
+            None,
             DateTime(now, 4, 1, 0, 0, 0),
             2,
-            verbose=False
+            debug=True
         ))
         self.assertEquals(total, 13)
 
         # There should be no more left
         total = (yield PurgeOldEventsService.purgeOldEvents(
             self._sqlCalendarStore,
+            None,
             DateTime(now, 4, 1, 0, 0, 0),
             2,
-            verbose=False
+            debug=True
         ))
         self.assertEquals(total, 0)
 
 
     @inlineCallbacks
+    def test_purgeOldEvents_home_filtering(self):
+
+        # Dry run
+        total = (yield PurgeOldEventsService.purgeOldEvents(
+            self._sqlCalendarStore,
+            "ho",
+            DateTime(now, 4, 1, 0, 0, 0),
+            2,
+            dryrun=True,
+            debug=True
+        ))
+        self.assertEquals(total, 13)
+
+        # Dry run
+        total = (yield PurgeOldEventsService.purgeOldEvents(
+            self._sqlCalendarStore,
+            "home",
+            DateTime(now, 4, 1, 0, 0, 0),
+            2,
+            dryrun=True,
+            debug=True
+        ))
+        self.assertEquals(total, 13)
+
+        # Dry run
+        total = (yield PurgeOldEventsService.purgeOldEvents(
+            self._sqlCalendarStore,
+            "home1",
+            DateTime(now, 4, 1, 0, 0, 0),
+            2,
+            dryrun=True,
+            debug=True
+        ))
+        self.assertEquals(total, 5)
+
+        # Dry run
+        total = (yield PurgeOldEventsService.purgeOldEvents(
+            self._sqlCalendarStore,
+            "home2",
+            DateTime(now, 4, 1, 0, 0, 0),
+            2,
+            dryrun=True,
+            debug=True
+        ))
+        self.assertEquals(total, 8)
+
+
+    @inlineCallbacks
+    def test_purgeOldEvents_old_cutoff(self):
+
+        # Dry run
+        cutoff = DateTime.getToday()
+        cutoff.setDateOnly(False)
+        cutoff.offsetDay(-400)
+
+        total = (yield PurgeOldEventsService.purgeOldEvents(
+            self._sqlCalendarStore,
+            "ho",
+            cutoff,
+            2,
+            dryrun=True,
+            debug=True
+        ))
+        self.assertEquals(total, 12)
+
+        # Actually remove
+        total = (yield PurgeOldEventsService.purgeOldEvents(
+            self._sqlCalendarStore,
+            None,
+            cutoff,
+            2,
+            debug=True
+        ))
+        self.assertEquals(total, 12)
+
+        total = (yield PurgeOldEventsService.purgeOldEvents(
+            self._sqlCalendarStore,
+            "ho",
+            cutoff,
+            2,
+            dryrun=True,
+            debug=True
+        ))
+        self.assertEquals(total, 0)
+
+
+    @inlineCallbacks
     def test_purgeUID(self):
         txn = self._sqlCalendarStore.newTransaction()
 
@@ -753,9 +843,10 @@
         # Remove old events first
         total = (yield PurgeOldEventsService.purgeOldEvents(
             self._sqlCalendarStore,
+            None,
             DateTime(now, 4, 1, 0, 0, 0),
             2,
-            verbose=False
+            debug=False
         ))
         self.assertEquals(total, 13)
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20150715/cfc125ab/attachment-0001.html>


More information about the calendarserver-changes mailing list