[CalendarServer-changes] [15629] CalendarServer/trunk

source_changes at macosforge.org source_changes at macosforge.org
Sat May 21 06:30:05 PDT 2016


Revision: 15629
          http://trac.calendarserver.org//changeset/15629
Author:   cdaboo at apple.com
Date:     2016-05-21 06:30:05 -0700 (Sat, 21 May 2016)
Log Message:
-----------
OPtimize the inbox cleanup process by staggering the work and reducing the type of queries done to detect what needs to be cleaned.

Modified Paths:
--------------
    CalendarServer/trunk/conf/caldavd-stdconfig.plist
    CalendarServer/trunk/twistedcaldav/stdconfig.py
    CalendarServer/trunk/txdav/common/datastore/sql.py
    CalendarServer/trunk/txdav/common/datastore/sql_schema/current-oracle-dialect.sql
    CalendarServer/trunk/txdav/common/datastore/sql_schema/current.sql
    CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/oracle-dialect/upgrade_from_60_to_61.sql
    CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/postgres-dialect/upgrade_from_60_to_61.sql
    CalendarServer/trunk/txdav/common/datastore/work/inbox_cleanup.py
    CalendarServer/trunk/txdav/common/datastore/work/test/test_inbox_cleanup.py

Modified: CalendarServer/trunk/conf/caldavd-stdconfig.plist
===================================================================
--- CalendarServer/trunk/conf/caldavd-stdconfig.plist	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/conf/caldavd-stdconfig.plist	2016-05-21 13:30:05 UTC (rev 15629)
@@ -1069,14 +1069,27 @@
 		<key>ItemLifetimeDays</key>
 		<real>14.0</real>
 
-		<!-- Number of days to keep an inbox item past the time when its referenced
-		     event ends -->
-		<key>ItemLifeBeyondEventEndDays</key>
-		<real>14.0</real>
-
 		<!-- Number of days between inbox cleanups -->
 		<key>CleanupPeriodDays</key>
 		<real>2.0</real>
+
+		<!-- Number of seconds before CleanupOneInboxWork starts after
+		     InboxCleanupWork -->
+		<key>StartDelaySeconds</key>
+		<integer>300</integer>
+
+		<!-- Number of seconds between each CleanupOneInboxWork (fractional) -->
+		<key>StaggerSeconds</key>
+		<real>0.5</real>
+
+		<!-- Number of items above which inbox removals will be deferred to a work
+		     item -->
+		<key>InboxRemoveWorkThreshold</key>
+		<integer>5</integer>
+
+		<!-- Number of seconds between each InboxRemoveWork -->
+		<key>RemovalStaggerSeconds</key>
+		<real>0.5</real>
 	</dict>
 
 	<!-- CardDAV Features -->

Modified: CalendarServer/trunk/twistedcaldav/stdconfig.py
===================================================================
--- CalendarServer/trunk/twistedcaldav/stdconfig.py	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/twistedcaldav/stdconfig.py	2016-05-21 13:30:05 UTC (rev 15629)
@@ -645,9 +645,12 @@
 
     "InboxCleanup": {
         "Enabled": True,
-        "ItemLifetimeDays" : 14.0,             # Number of days before deleting a new inbox item
-        "ItemLifeBeyondEventEndDays" : 14.0, # Number of days to keep an inbox item past the time when its referenced event ends
-        "CleanupPeriodDays" : 2.0,             # Number of days between inbox cleanups
+        "ItemLifetimeDays" : 14.0,              # Number of days before deleting a new inbox item
+        "CleanupPeriodDays" : 2.0,              # Number of days between inbox cleanups
+        "StartDelaySeconds": 5 * 60,            # Number of seconds before CleanupOneInboxWork starts after InboxCleanupWork
+        "StaggerSeconds": 0.5,                  # Number of seconds between each CleanupOneInboxWork (fractional)
+        "InboxRemoveWorkThreshold": 5,          # Number of items above which inbox removals will be deferred to a work item
+        "RemovalStaggerSeconds": 0.5,           # Number of seconds between each InboxRemoveWork
     },
 
     # CardDAV Features

Modified: CalendarServer/trunk/txdav/common/datastore/sql.py
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql.py	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql.py	2016-05-21 13:30:05 UTC (rev 15629)
@@ -1472,97 +1472,6 @@
 
 
     @classproperty
-    def _orphanedInboxItemsInHomeIDQuery(cls):
-        """
-        DAL query to select inbox items that refer to nonexistent events in a
-        given home identified by the home resource ID.
-        """
-        co = schema.CALENDAR_OBJECT
-        cb = schema.CALENDAR_BIND
-        return Select(
-            [co.RESOURCE_NAME],
-            From=co.join(cb),
-            Where=(
-                cb.CALENDAR_HOME_RESOURCE_ID == Parameter("homeID")).And(
-                cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
-                cb.BIND_MODE == _BIND_MODE_OWN).And(
-                cb.CALENDAR_RESOURCE_NAME == 'inbox').And(
-                co.ICALENDAR_UID.NotIn(
-                    Select(
-                        [co.ICALENDAR_UID],
-                        From=co.join(cb),
-                        Where=(
-                            cb.CALENDAR_HOME_RESOURCE_ID == Parameter("homeID")).And(
-                            cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
-                            cb.BIND_MODE == _BIND_MODE_OWN).And(
-                            cb.CALENDAR_RESOURCE_NAME != 'inbox')
-                    )
-                )
-            ),
-        )
-
-
-    @inlineCallbacks
-    def orphanedInboxItemsInHomeID(self, homeID):
-        """
-        Find inbox item names that refer to nonexistent events in a given home.
-
-        Returns a deferred to a list of orphaned inbox item names
-        """
-        rows = yield self._orphanedInboxItemsInHomeIDQuery.on(self, homeID=homeID)
-        names = [row[0] for row in rows]
-        returnValue(names)
-
-
-    @classproperty
-    def _inboxItemsInHomeIDForEventsBeforeCutoffQuery(cls):
-        """
-        DAL query to select inbox items that refer to events in a before a
-        given date.
-        """
-        co = schema.CALENDAR_OBJECT
-        cb = schema.CALENDAR_BIND
-        tr = schema.TIME_RANGE
-        return Select(
-            [co.RESOURCE_NAME],
-            From=co.join(cb),
-            Where=(
-                cb.CALENDAR_HOME_RESOURCE_ID == Parameter("homeID")).And(
-                cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
-                cb.BIND_MODE == _BIND_MODE_OWN).And(
-                cb.CALENDAR_RESOURCE_NAME == 'inbox').And(
-                co.ICALENDAR_UID.In(
-                    Select(
-                        [co.ICALENDAR_UID],
-                        From=tr.join(co.join(cb)),
-                        Where=(
-                            cb.CALENDAR_HOME_RESOURCE_ID == Parameter("homeID")).And(
-                            cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
-                            cb.BIND_MODE == _BIND_MODE_OWN).And(
-                            cb.CALENDAR_RESOURCE_NAME != 'inbox').And(
-                            tr.CALENDAR_OBJECT_RESOURCE_ID == co.RESOURCE_ID).And(
-                            tr.END_DATE < Parameter("cutoff"))
-                    )
-                )
-            ),
-        )
-
-
-    @inlineCallbacks
-    def listInboxItemsInHomeForEventsBefore(self, homeID, cutoff):
-        """
-        return a list of inbox item names that refer to events before a given
-        date in a given home.
-
-        Returns a deferred to a list of orphaned inbox item names
-        """
-        rows = yield self._inboxItemsInHomeIDForEventsBeforeCutoffQuery.on(
-            self, homeID=homeID, cutoff=cutoff)
-        names = [row[0] for row in rows]
-        returnValue(names)
-
-
-    @classproperty
     def _inboxItemsInHomeIDCreatedBeforeCutoffQuery(cls):
         """
         DAL query to select inbox items created before a given date.
@@ -1584,7 +1493,7 @@
     @inlineCallbacks
     def listInboxItemsInHomeCreatedBefore(self, homeID, cutoff):
         """
-        return a list of inbox item names that creaed before a given date in a
+        return a list of inbox item names that created before a given date in a
         given home.
 
         Returns a deferred to a list of orphaned inbox item names

Modified: CalendarServer/trunk/txdav/common/datastore/sql_schema/current-oracle-dialect.sql
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql_schema/current-oracle-dialect.sql	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql_schema/current-oracle-dialect.sql	2016-05-21 13:30:05 UTC (rev 15629)
@@ -557,6 +557,14 @@
     "HOME_ID" integer not null unique references CALENDAR_HOME on delete cascade
 );
 
+create table INBOX_REMOVE_WORK (
+    "WORK_ID" integer primary key,
+    "JOB_ID" integer not null references JOB,
+    "HOME_ID" integer not null references CALENDAR_HOME on delete cascade,
+    "RESOURCE_NAME" nvarchar2(255), 
+    unique ("HOME_ID", "RESOURCE_NAME")
+);
+
 create table SCHEDULE_WORK (
     "WORK_ID" integer primary key,
     "JOB_ID" integer not null references JOB,
@@ -971,6 +979,10 @@
     "JOB_ID"
 );
 
+create index INBOX_REMOVE_WORK_JOB_4b627f1e on INBOX_REMOVE_WORK (
+    "JOB_ID"
+);
+
 create index SCHEDULE_WORK_JOB_ID_65e810ee on SCHEDULE_WORK (
     "JOB_ID"
 );

Modified: CalendarServer/trunk/txdav/common/datastore/sql_schema/current.sql
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql_schema/current.sql	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql_schema/current.sql	2016-05-21 13:30:05 UTC (rev 15629)
@@ -1053,6 +1053,18 @@
 create index CLEANUP_ONE_INBOX_WORK_JOB_ID on
   CLEANUP_ONE_INBOX_WORK(JOB_ID);
 
+create table INBOX_REMOVE_WORK (
+  WORK_ID                       integer      primary key default nextval('WORKITEM_SEQ'), -- implicit index
+  JOB_ID                        integer      references JOB not null,
+  HOME_ID                       integer      not null references CALENDAR_HOME on delete cascade,
+  RESOURCE_NAME                 varchar(255) not null,
+  
+  unique (HOME_ID, RESOURCE_NAME)    -- implicit index
+);
+
+create index INBOX_REMOVE_WORK_JOB_ID on
+  INBOX_REMOVE_WORK(JOB_ID);
+
 -------------------
 -- Schedule Work --
 -------------------

Modified: CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/oracle-dialect/upgrade_from_60_to_61.sql
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/oracle-dialect/upgrade_from_60_to_61.sql	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/oracle-dialect/upgrade_from_60_to_61.sql	2016-05-21 13:30:05 UTC (rev 15629)
@@ -31,6 +31,18 @@
     "REVISION"
 );
 
+-- New table
+create table INBOX_REMOVE_WORK (
+    "WORK_ID" integer primary key,
+    "JOB_ID" integer not null references JOB,
+    "HOME_ID" integer not null references CALENDAR_HOME on delete cascade,
+    "RESOURCE_NAME" nvarchar2(255), 
+    unique ("HOME_ID", "RESOURCE_NAME")
+);
 
+create index INBOX_REMOVE_WORK_JOB_4b627f1e on INBOX_REMOVE_WORK (
+    "JOB_ID"
+);
+
 -- update the version
 update CALENDARSERVER set VALUE = '61' where NAME = 'VERSION';

Modified: CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/postgres-dialect/upgrade_from_60_to_61.sql
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/postgres-dialect/upgrade_from_60_to_61.sql	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/postgres-dialect/upgrade_from_60_to_61.sql	2016-05-21 13:30:05 UTC (rev 15629)
@@ -28,5 +28,18 @@
 create index NOTIFICATION_OBJECT_REVISIONS_REVISION
   on NOTIFICATION_OBJECT_REVISIONS(REVISION);
 
+-- New table
+create table INBOX_REMOVE_WORK (
+  WORK_ID                       integer      primary key default nextval('WORKITEM_SEQ'), -- implicit index
+  JOB_ID                        integer      references JOB not null,
+  HOME_ID                       integer      not null references CALENDAR_HOME on delete cascade,
+  RESOURCE_NAME                 varchar(255) not null,
+  
+  unique (HOME_ID, RESOURCE_NAME)    -- implicit index
+);
+
+create index INBOX_REMOVE_WORK_JOB_ID on
+  INBOX_REMOVE_WORK(JOB_ID);
+
 -- update the version
 update CALENDARSERVER set VALUE = '61' where NAME = 'VERSION';

Modified: CalendarServer/trunk/txdav/common/datastore/work/inbox_cleanup.py
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/work/inbox_cleanup.py	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/work/inbox_cleanup.py	2016-05-21 13:30:05 UTC (rev 15629)
@@ -77,8 +77,11 @@
                 Where=ch.STATUS == _HOME_STATUS_NORMAL,
             ).on(self.transaction)
 
+            # Add an initial delay to the start of the first work item, then add an offset between each item
+            seconds = config.InboxCleanup.StartDelaySeconds
             for homeRow in homeRows:
-                yield CleanupOneInboxWork.reschedule(self.transaction, seconds=0, homeID=homeRow[0])
+                yield CleanupOneInboxWork.reschedule(self.transaction, seconds=seconds, homeID=homeRow[0])
+                seconds += config.InboxCleanup.StaggerSeconds
 
 
 
@@ -91,51 +94,46 @@
 
         # No need to delete other work items.  They are unique
 
-        # get orphan names
-        orphanNames = set((
-            yield self.transaction.orphanedInboxItemsInHomeID(self.homeID)
-        ))
-        if orphanNames:
-            home = yield self.transaction.calendarHomeWithResourceID(self.homeID)
-            log.info(
-                "Inbox cleanup work in home: {homeUID}, deleting orphaned items: {orphanNames}",
-                homeUID=home.uid(), orphanNames=orphanNames,
-            )
-
         # get old item names
         if float(config.InboxCleanup.ItemLifetimeDays) >= 0: # use -1 to disable; 0 is test case
             cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=float(config.InboxCleanup.ItemLifetimeDays))
             oldItemNames = set((
                 yield self.transaction.listInboxItemsInHomeCreatedBefore(self.homeID, cutoff)
             ))
-            newDeleters = oldItemNames - orphanNames
-            if newDeleters:
+            if oldItemNames:
                 home = yield self.transaction.calendarHomeWithResourceID(self.homeID)
                 log.info(
-                    "Inbox cleanup work in home: {homeUID}, deleting old items: {newDeleters}",
-                    homeUID=home.uid(), newDeleters=newDeleters,
+                    "Inbox cleanup work in home: {homeUID}, deleting old items: {oldItemNames}",
+                    homeUID=home.uid(), newDeleters=oldItemNames,
                 )
-        else:
-            oldItemNames = set()
 
-        # get item name for old events
-        if float(config.InboxCleanup.ItemLifeBeyondEventEndDays) >= 0: # use -1 to disable; 0 is test case
-            cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=float(config.InboxCleanup.ItemLifeBeyondEventEndDays))
-            itemNamesForOldEvents = set((
-                yield self.transaction.listInboxItemsInHomeForEventsBefore(self.homeID, cutoff)
-            ))
-            newDeleters = itemNamesForOldEvents - oldItemNames - orphanNames
-            if newDeleters:
-                home = yield self.transaction.calendarHomeWithResourceID(self.homeID)
-                log.info(
-                    "Inbox cleanup work in home: {homeUID}, deleting items for old events: {newDeleters}",
-                    homeUID=home.uid(), newDeleters=newDeleters,
-                )
-        else:
-            itemNamesForOldEvents = set()
+                # If the number to delete is below our threshold then delete right away,
+                # otherwise queue up more work items to delete these
+                if len(oldItemNames) < config.InboxCleanup.InboxRemoveWorkThreshold:
+                    inbox = yield home.childWithName("inbox")
+                    for item in (yield inbox.objectResourcesWithNames(oldItemNames)):
+                        yield item.remove()
+                else:
+                    seconds = config.InboxCleanup.RemovalStaggerSeconds
+                    for item in oldItemNames:
+                        yield InboxRemoveWork.reschedule(self.transaction, seconds=seconds, homeID=self.homeID, resourceName=item)
+                        seconds += config.InboxCleanup.RemovalStaggerSeconds
 
-        itemNamesToDelete = orphanNames | itemNamesForOldEvents | oldItemNames
-        if itemNamesToDelete:
+
+
+class InboxRemoveWork(WorkItem, fromTable(schema.INBOX_REMOVE_WORK)):
+
+    group = property(lambda self: (self.table.HOME_ID == self.homeID).And(self.table.RESOURCE_NAME == self.resourceName))
+
+    @inlineCallbacks
+    def doWork(self):
+
+        # Some of the resources may no longer exist by the time this work item runs
+        # so simply ignore that and let the work complete without doing anything
+        home = yield self.transaction.calendarHomeWithResourceID(self.homeID)
+        if home is not None:
             inbox = yield home.childWithName("inbox")
-            for item in (yield inbox.objectResourcesWithNames(itemNamesToDelete)):
-                yield item.remove()
+            if inbox is not None:
+                item = yield inbox.objectResourceWithName(self.resourceName)
+                if item is not None:
+                    yield item.remove()

Modified: CalendarServer/trunk/txdav/common/datastore/work/test/test_inbox_cleanup.py
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/work/test/test_inbox_cleanup.py	2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/work/test/test_inbox_cleanup.py	2016-05-21 13:30:05 UTC (rev 15629)
@@ -155,36 +155,10 @@
 
 
     @inlineCallbacks
-    def test_orphans(self):
-        """
-        Verify that orphaned Inbox items are removed
-        """
-        self.patch(config.InboxCleanup, "ItemLifetimeDays", -1)
-        self.patch(config.InboxCleanup, "ItemLifeBeyondEventEndDays", -1)
-
-        # create orphans by deleting events
-        cal = yield self.calendarUnderTest(home="user01", name="calendar")
-        for item in (yield cal.objectResourcesWithNames(["cal1.ics", "cal3.ics"])):
-            yield item.purge()
-
-        # do cleanup
-        yield self.transactionUnderTest().enqueue(CleanupOneInboxWork, homeID=cal.ownerHome()._resourceID, notBefore=datetime.datetime.utcnow())
-        yield self.commit()
-        yield JobItem.waitEmpty(self.storeUnderTest().newTransaction, reactor, 60)
-
-        # check that orphans are deleted
-        inbox = yield self.calendarUnderTest(home="user01", name="inbox")
-        items = yield inbox.objectResources()
-        names = [item.name() for item in items]
-        self.assertEqual(set(names), set(["cal2.ics"]))
-
-
-    @inlineCallbacks
     def test_old(self):
         """
         Verify that old inbox items are removed
         """
-        self.patch(config.InboxCleanup, "ItemLifeBeyondEventEndDays", -1)
 
         # Predate some inbox items
         inbox = yield self.calendarUnderTest(home="user01", name="inbox")
@@ -211,21 +185,28 @@
 
 
     @inlineCallbacks
-    def test_referenceOldEvent(self):
+    def test_old_queued(self):
         """
-        Verify that inbox items references old events are removed
+        Verify that old inbox items are removed
         """
-        # events are already too old, so make one event end now
-        calendar = yield self.calendarUnderTest(home="user01", name="calendar")
-        cal3Event = yield calendar.objectResourceWithName("cal3.ics")
 
-        tr = schema.TIME_RANGE
+        # Patch to force remove work items
+        self.patch(config.InboxCleanup, "InboxRemoveWorkThreshold", 0)
+
+        # Predate some inbox items
+        inbox = yield self.calendarUnderTest(home="user01", name="inbox")
+        oldDate = datetime.datetime.utcnow() - datetime.timedelta(days=float(config.InboxCleanup.ItemLifetimeDays), seconds=10)
+
+        itemsToPredate = ["cal2.ics", "cal3.ics"]
+        co = schema.CALENDAR_OBJECT
         yield Update(
-            {tr.END_DATE: datetime.datetime.utcnow()},
-            Where=tr.CALENDAR_OBJECT_RESOURCE_ID == cal3Event._resourceID
-        ).on(self.transactionUnderTest())
+            {co.CREATED: oldDate},
+            Where=co.RESOURCE_NAME.In(Parameter("itemsToPredate", len(itemsToPredate))).And(
+                co.CALENDAR_RESOURCE_ID == inbox._resourceID)
+        ).on(self.transactionUnderTest(), itemsToPredate=itemsToPredate)
+
         # do cleanup
-        yield self.transactionUnderTest().enqueue(CleanupOneInboxWork, homeID=calendar.ownerHome()._resourceID, notBefore=datetime.datetime.utcnow())
+        yield self.transactionUnderTest().enqueue(CleanupOneInboxWork, homeID=inbox.ownerHome()._resourceID, notBefore=datetime.datetime.utcnow())
         yield self.commit()
         yield JobItem.waitEmpty(self.storeUnderTest().newTransaction, reactor, 60)
 
@@ -233,4 +214,4 @@
         inbox = yield self.calendarUnderTest(home="user01", name="inbox")
         items = yield inbox.objectResources()
         names = [item.name() for item in items]
-        self.assertEqual(set(names), set(["cal3.ics"]))
+        self.assertEqual(set(names), set(["cal1.ics"]))
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20160521/8a939bc4/attachment-0001.html>


More information about the calendarserver-changes mailing list