[CalendarServer-changes] [15629] CalendarServer/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Sat May 21 06:30:05 PDT 2016
Revision: 15629
http://trac.calendarserver.org//changeset/15629
Author: cdaboo at apple.com
Date: 2016-05-21 06:30:05 -0700 (Sat, 21 May 2016)
Log Message:
-----------
OPtimize the inbox cleanup process by staggering the work and reducing the type of queries done to detect what needs to be cleaned.
Modified Paths:
--------------
CalendarServer/trunk/conf/caldavd-stdconfig.plist
CalendarServer/trunk/twistedcaldav/stdconfig.py
CalendarServer/trunk/txdav/common/datastore/sql.py
CalendarServer/trunk/txdav/common/datastore/sql_schema/current-oracle-dialect.sql
CalendarServer/trunk/txdav/common/datastore/sql_schema/current.sql
CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/oracle-dialect/upgrade_from_60_to_61.sql
CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/postgres-dialect/upgrade_from_60_to_61.sql
CalendarServer/trunk/txdav/common/datastore/work/inbox_cleanup.py
CalendarServer/trunk/txdav/common/datastore/work/test/test_inbox_cleanup.py
Modified: CalendarServer/trunk/conf/caldavd-stdconfig.plist
===================================================================
--- CalendarServer/trunk/conf/caldavd-stdconfig.plist 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/conf/caldavd-stdconfig.plist 2016-05-21 13:30:05 UTC (rev 15629)
@@ -1069,14 +1069,27 @@
<key>ItemLifetimeDays</key>
<real>14.0</real>
- <!-- Number of days to keep an inbox item past the time when its referenced
- event ends -->
- <key>ItemLifeBeyondEventEndDays</key>
- <real>14.0</real>
-
<!-- Number of days between inbox cleanups -->
<key>CleanupPeriodDays</key>
<real>2.0</real>
+
+ <!-- Number of seconds before CleanupOneInboxWork starts after
+ InboxCleanupWork -->
+ <key>StartDelaySeconds</key>
+ <integer>300</integer>
+
+ <!-- Number of seconds between each CleanupOneInboxWork (fractional) -->
+ <key>StaggerSeconds</key>
+ <real>0.5</real>
+
+ <!-- Number of items above which inbox removals will be deferred to a work
+ item -->
+ <key>InboxRemoveWorkThreshold</key>
+ <integer>5</integer>
+
+ <!-- Number of seconds between each InboxRemoveWork -->
+ <key>RemovalStaggerSeconds</key>
+ <real>0.5</real>
</dict>
<!-- CardDAV Features -->
Modified: CalendarServer/trunk/twistedcaldav/stdconfig.py
===================================================================
--- CalendarServer/trunk/twistedcaldav/stdconfig.py 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/twistedcaldav/stdconfig.py 2016-05-21 13:30:05 UTC (rev 15629)
@@ -645,9 +645,12 @@
"InboxCleanup": {
"Enabled": True,
- "ItemLifetimeDays" : 14.0, # Number of days before deleting a new inbox item
- "ItemLifeBeyondEventEndDays" : 14.0, # Number of days to keep an inbox item past the time when its referenced event ends
- "CleanupPeriodDays" : 2.0, # Number of days between inbox cleanups
+ "ItemLifetimeDays" : 14.0, # Number of days before deleting a new inbox item
+ "CleanupPeriodDays" : 2.0, # Number of days between inbox cleanups
+ "StartDelaySeconds": 5 * 60, # Number of seconds before CleanupOneInboxWork starts after InboxCleanupWork
+ "StaggerSeconds": 0.5, # Number of seconds between each CleanupOneInboxWork (fractional)
+ "InboxRemoveWorkThreshold": 5, # Number of items above which inbox removals will be deferred to a work item
+ "RemovalStaggerSeconds": 0.5, # Number of seconds between each InboxRemoveWork
},
# CardDAV Features
Modified: CalendarServer/trunk/txdav/common/datastore/sql.py
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql.py 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql.py 2016-05-21 13:30:05 UTC (rev 15629)
@@ -1472,97 +1472,6 @@
@classproperty
- def _orphanedInboxItemsInHomeIDQuery(cls):
- """
- DAL query to select inbox items that refer to nonexistent events in a
- given home identified by the home resource ID.
- """
- co = schema.CALENDAR_OBJECT
- cb = schema.CALENDAR_BIND
- return Select(
- [co.RESOURCE_NAME],
- From=co.join(cb),
- Where=(
- cb.CALENDAR_HOME_RESOURCE_ID == Parameter("homeID")).And(
- cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
- cb.BIND_MODE == _BIND_MODE_OWN).And(
- cb.CALENDAR_RESOURCE_NAME == 'inbox').And(
- co.ICALENDAR_UID.NotIn(
- Select(
- [co.ICALENDAR_UID],
- From=co.join(cb),
- Where=(
- cb.CALENDAR_HOME_RESOURCE_ID == Parameter("homeID")).And(
- cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
- cb.BIND_MODE == _BIND_MODE_OWN).And(
- cb.CALENDAR_RESOURCE_NAME != 'inbox')
- )
- )
- ),
- )
-
-
- @inlineCallbacks
- def orphanedInboxItemsInHomeID(self, homeID):
- """
- Find inbox item names that refer to nonexistent events in a given home.
-
- Returns a deferred to a list of orphaned inbox item names
- """
- rows = yield self._orphanedInboxItemsInHomeIDQuery.on(self, homeID=homeID)
- names = [row[0] for row in rows]
- returnValue(names)
-
-
- @classproperty
- def _inboxItemsInHomeIDForEventsBeforeCutoffQuery(cls):
- """
- DAL query to select inbox items that refer to events in a before a
- given date.
- """
- co = schema.CALENDAR_OBJECT
- cb = schema.CALENDAR_BIND
- tr = schema.TIME_RANGE
- return Select(
- [co.RESOURCE_NAME],
- From=co.join(cb),
- Where=(
- cb.CALENDAR_HOME_RESOURCE_ID == Parameter("homeID")).And(
- cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
- cb.BIND_MODE == _BIND_MODE_OWN).And(
- cb.CALENDAR_RESOURCE_NAME == 'inbox').And(
- co.ICALENDAR_UID.In(
- Select(
- [co.ICALENDAR_UID],
- From=tr.join(co.join(cb)),
- Where=(
- cb.CALENDAR_HOME_RESOURCE_ID == Parameter("homeID")).And(
- cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
- cb.BIND_MODE == _BIND_MODE_OWN).And(
- cb.CALENDAR_RESOURCE_NAME != 'inbox').And(
- tr.CALENDAR_OBJECT_RESOURCE_ID == co.RESOURCE_ID).And(
- tr.END_DATE < Parameter("cutoff"))
- )
- )
- ),
- )
-
-
- @inlineCallbacks
- def listInboxItemsInHomeForEventsBefore(self, homeID, cutoff):
- """
- return a list of inbox item names that refer to events before a given
- date in a given home.
-
- Returns a deferred to a list of orphaned inbox item names
- """
- rows = yield self._inboxItemsInHomeIDForEventsBeforeCutoffQuery.on(
- self, homeID=homeID, cutoff=cutoff)
- names = [row[0] for row in rows]
- returnValue(names)
-
-
- @classproperty
def _inboxItemsInHomeIDCreatedBeforeCutoffQuery(cls):
"""
DAL query to select inbox items created before a given date.
@@ -1584,7 +1493,7 @@
@inlineCallbacks
def listInboxItemsInHomeCreatedBefore(self, homeID, cutoff):
"""
- return a list of inbox item names that creaed before a given date in a
+ return a list of inbox item names that created before a given date in a
given home.
Returns a deferred to a list of orphaned inbox item names
Modified: CalendarServer/trunk/txdav/common/datastore/sql_schema/current-oracle-dialect.sql
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql_schema/current-oracle-dialect.sql 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql_schema/current-oracle-dialect.sql 2016-05-21 13:30:05 UTC (rev 15629)
@@ -557,6 +557,14 @@
"HOME_ID" integer not null unique references CALENDAR_HOME on delete cascade
);
+create table INBOX_REMOVE_WORK (
+ "WORK_ID" integer primary key,
+ "JOB_ID" integer not null references JOB,
+ "HOME_ID" integer not null references CALENDAR_HOME on delete cascade,
+ "RESOURCE_NAME" nvarchar2(255),
+ unique ("HOME_ID", "RESOURCE_NAME")
+);
+
create table SCHEDULE_WORK (
"WORK_ID" integer primary key,
"JOB_ID" integer not null references JOB,
@@ -971,6 +979,10 @@
"JOB_ID"
);
+create index INBOX_REMOVE_WORK_JOB_4b627f1e on INBOX_REMOVE_WORK (
+ "JOB_ID"
+);
+
create index SCHEDULE_WORK_JOB_ID_65e810ee on SCHEDULE_WORK (
"JOB_ID"
);
Modified: CalendarServer/trunk/txdav/common/datastore/sql_schema/current.sql
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql_schema/current.sql 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql_schema/current.sql 2016-05-21 13:30:05 UTC (rev 15629)
@@ -1053,6 +1053,18 @@
create index CLEANUP_ONE_INBOX_WORK_JOB_ID on
CLEANUP_ONE_INBOX_WORK(JOB_ID);
+create table INBOX_REMOVE_WORK (
+ WORK_ID integer primary key default nextval('WORKITEM_SEQ'), -- implicit index
+ JOB_ID integer references JOB not null,
+ HOME_ID integer not null references CALENDAR_HOME on delete cascade,
+ RESOURCE_NAME varchar(255) not null,
+
+ unique (HOME_ID, RESOURCE_NAME) -- implicit index
+);
+
+create index INBOX_REMOVE_WORK_JOB_ID on
+ INBOX_REMOVE_WORK(JOB_ID);
+
-------------------
-- Schedule Work --
-------------------
Modified: CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/oracle-dialect/upgrade_from_60_to_61.sql
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/oracle-dialect/upgrade_from_60_to_61.sql 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/oracle-dialect/upgrade_from_60_to_61.sql 2016-05-21 13:30:05 UTC (rev 15629)
@@ -31,6 +31,18 @@
"REVISION"
);
+-- New table
+create table INBOX_REMOVE_WORK (
+ "WORK_ID" integer primary key,
+ "JOB_ID" integer not null references JOB,
+ "HOME_ID" integer not null references CALENDAR_HOME on delete cascade,
+ "RESOURCE_NAME" nvarchar2(255),
+ unique ("HOME_ID", "RESOURCE_NAME")
+);
+create index INBOX_REMOVE_WORK_JOB_4b627f1e on INBOX_REMOVE_WORK (
+ "JOB_ID"
+);
+
-- update the version
update CALENDARSERVER set VALUE = '61' where NAME = 'VERSION';
Modified: CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/postgres-dialect/upgrade_from_60_to_61.sql
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/postgres-dialect/upgrade_from_60_to_61.sql 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/sql_schema/upgrades/postgres-dialect/upgrade_from_60_to_61.sql 2016-05-21 13:30:05 UTC (rev 15629)
@@ -28,5 +28,18 @@
create index NOTIFICATION_OBJECT_REVISIONS_REVISION
on NOTIFICATION_OBJECT_REVISIONS(REVISION);
+-- New table
+create table INBOX_REMOVE_WORK (
+ WORK_ID integer primary key default nextval('WORKITEM_SEQ'), -- implicit index
+ JOB_ID integer references JOB not null,
+ HOME_ID integer not null references CALENDAR_HOME on delete cascade,
+ RESOURCE_NAME varchar(255) not null,
+
+ unique (HOME_ID, RESOURCE_NAME) -- implicit index
+);
+
+create index INBOX_REMOVE_WORK_JOB_ID on
+ INBOX_REMOVE_WORK(JOB_ID);
+
-- update the version
update CALENDARSERVER set VALUE = '61' where NAME = 'VERSION';
Modified: CalendarServer/trunk/txdav/common/datastore/work/inbox_cleanup.py
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/work/inbox_cleanup.py 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/work/inbox_cleanup.py 2016-05-21 13:30:05 UTC (rev 15629)
@@ -77,8 +77,11 @@
Where=ch.STATUS == _HOME_STATUS_NORMAL,
).on(self.transaction)
+ # Add an initial delay to the start of the first work item, then add an offset between each item
+ seconds = config.InboxCleanup.StartDelaySeconds
for homeRow in homeRows:
- yield CleanupOneInboxWork.reschedule(self.transaction, seconds=0, homeID=homeRow[0])
+ yield CleanupOneInboxWork.reschedule(self.transaction, seconds=seconds, homeID=homeRow[0])
+ seconds += config.InboxCleanup.StaggerSeconds
@@ -91,51 +94,46 @@
# No need to delete other work items. They are unique
- # get orphan names
- orphanNames = set((
- yield self.transaction.orphanedInboxItemsInHomeID(self.homeID)
- ))
- if orphanNames:
- home = yield self.transaction.calendarHomeWithResourceID(self.homeID)
- log.info(
- "Inbox cleanup work in home: {homeUID}, deleting orphaned items: {orphanNames}",
- homeUID=home.uid(), orphanNames=orphanNames,
- )
-
# get old item names
if float(config.InboxCleanup.ItemLifetimeDays) >= 0: # use -1 to disable; 0 is test case
cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=float(config.InboxCleanup.ItemLifetimeDays))
oldItemNames = set((
yield self.transaction.listInboxItemsInHomeCreatedBefore(self.homeID, cutoff)
))
- newDeleters = oldItemNames - orphanNames
- if newDeleters:
+ if oldItemNames:
home = yield self.transaction.calendarHomeWithResourceID(self.homeID)
log.info(
- "Inbox cleanup work in home: {homeUID}, deleting old items: {newDeleters}",
- homeUID=home.uid(), newDeleters=newDeleters,
+ "Inbox cleanup work in home: {homeUID}, deleting old items: {oldItemNames}",
+ homeUID=home.uid(), newDeleters=oldItemNames,
)
- else:
- oldItemNames = set()
- # get item name for old events
- if float(config.InboxCleanup.ItemLifeBeyondEventEndDays) >= 0: # use -1 to disable; 0 is test case
- cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=float(config.InboxCleanup.ItemLifeBeyondEventEndDays))
- itemNamesForOldEvents = set((
- yield self.transaction.listInboxItemsInHomeForEventsBefore(self.homeID, cutoff)
- ))
- newDeleters = itemNamesForOldEvents - oldItemNames - orphanNames
- if newDeleters:
- home = yield self.transaction.calendarHomeWithResourceID(self.homeID)
- log.info(
- "Inbox cleanup work in home: {homeUID}, deleting items for old events: {newDeleters}",
- homeUID=home.uid(), newDeleters=newDeleters,
- )
- else:
- itemNamesForOldEvents = set()
+ # If the number to delete is below our threshold then delete right away,
+ # otherwise queue up more work items to delete these
+ if len(oldItemNames) < config.InboxCleanup.InboxRemoveWorkThreshold:
+ inbox = yield home.childWithName("inbox")
+ for item in (yield inbox.objectResourcesWithNames(oldItemNames)):
+ yield item.remove()
+ else:
+ seconds = config.InboxCleanup.RemovalStaggerSeconds
+ for item in oldItemNames:
+ yield InboxRemoveWork.reschedule(self.transaction, seconds=seconds, homeID=self.homeID, resourceName=item)
+ seconds += config.InboxCleanup.RemovalStaggerSeconds
- itemNamesToDelete = orphanNames | itemNamesForOldEvents | oldItemNames
- if itemNamesToDelete:
+
+
+class InboxRemoveWork(WorkItem, fromTable(schema.INBOX_REMOVE_WORK)):
+
+ group = property(lambda self: (self.table.HOME_ID == self.homeID).And(self.table.RESOURCE_NAME == self.resourceName))
+
+ @inlineCallbacks
+ def doWork(self):
+
+ # Some of the resources may no longer exist by the time this work item runs
+ # so simply ignore that and let the work complete without doing anything
+ home = yield self.transaction.calendarHomeWithResourceID(self.homeID)
+ if home is not None:
inbox = yield home.childWithName("inbox")
- for item in (yield inbox.objectResourcesWithNames(itemNamesToDelete)):
- yield item.remove()
+ if inbox is not None:
+ item = yield inbox.objectResourceWithName(self.resourceName)
+ if item is not None:
+ yield item.remove()
Modified: CalendarServer/trunk/txdav/common/datastore/work/test/test_inbox_cleanup.py
===================================================================
--- CalendarServer/trunk/txdav/common/datastore/work/test/test_inbox_cleanup.py 2016-05-21 13:29:05 UTC (rev 15628)
+++ CalendarServer/trunk/txdav/common/datastore/work/test/test_inbox_cleanup.py 2016-05-21 13:30:05 UTC (rev 15629)
@@ -155,36 +155,10 @@
@inlineCallbacks
- def test_orphans(self):
- """
- Verify that orphaned Inbox items are removed
- """
- self.patch(config.InboxCleanup, "ItemLifetimeDays", -1)
- self.patch(config.InboxCleanup, "ItemLifeBeyondEventEndDays", -1)
-
- # create orphans by deleting events
- cal = yield self.calendarUnderTest(home="user01", name="calendar")
- for item in (yield cal.objectResourcesWithNames(["cal1.ics", "cal3.ics"])):
- yield item.purge()
-
- # do cleanup
- yield self.transactionUnderTest().enqueue(CleanupOneInboxWork, homeID=cal.ownerHome()._resourceID, notBefore=datetime.datetime.utcnow())
- yield self.commit()
- yield JobItem.waitEmpty(self.storeUnderTest().newTransaction, reactor, 60)
-
- # check that orphans are deleted
- inbox = yield self.calendarUnderTest(home="user01", name="inbox")
- items = yield inbox.objectResources()
- names = [item.name() for item in items]
- self.assertEqual(set(names), set(["cal2.ics"]))
-
-
- @inlineCallbacks
def test_old(self):
"""
Verify that old inbox items are removed
"""
- self.patch(config.InboxCleanup, "ItemLifeBeyondEventEndDays", -1)
# Predate some inbox items
inbox = yield self.calendarUnderTest(home="user01", name="inbox")
@@ -211,21 +185,28 @@
@inlineCallbacks
- def test_referenceOldEvent(self):
+ def test_old_queued(self):
"""
- Verify that inbox items references old events are removed
+ Verify that old inbox items are removed
"""
- # events are already too old, so make one event end now
- calendar = yield self.calendarUnderTest(home="user01", name="calendar")
- cal3Event = yield calendar.objectResourceWithName("cal3.ics")
- tr = schema.TIME_RANGE
+ # Patch to force remove work items
+ self.patch(config.InboxCleanup, "InboxRemoveWorkThreshold", 0)
+
+ # Predate some inbox items
+ inbox = yield self.calendarUnderTest(home="user01", name="inbox")
+ oldDate = datetime.datetime.utcnow() - datetime.timedelta(days=float(config.InboxCleanup.ItemLifetimeDays), seconds=10)
+
+ itemsToPredate = ["cal2.ics", "cal3.ics"]
+ co = schema.CALENDAR_OBJECT
yield Update(
- {tr.END_DATE: datetime.datetime.utcnow()},
- Where=tr.CALENDAR_OBJECT_RESOURCE_ID == cal3Event._resourceID
- ).on(self.transactionUnderTest())
+ {co.CREATED: oldDate},
+ Where=co.RESOURCE_NAME.In(Parameter("itemsToPredate", len(itemsToPredate))).And(
+ co.CALENDAR_RESOURCE_ID == inbox._resourceID)
+ ).on(self.transactionUnderTest(), itemsToPredate=itemsToPredate)
+
# do cleanup
- yield self.transactionUnderTest().enqueue(CleanupOneInboxWork, homeID=calendar.ownerHome()._resourceID, notBefore=datetime.datetime.utcnow())
+ yield self.transactionUnderTest().enqueue(CleanupOneInboxWork, homeID=inbox.ownerHome()._resourceID, notBefore=datetime.datetime.utcnow())
yield self.commit()
yield JobItem.waitEmpty(self.storeUnderTest().newTransaction, reactor, 60)
@@ -233,4 +214,4 @@
inbox = yield self.calendarUnderTest(home="user01", name="inbox")
items = yield inbox.objectResources()
names = [item.name() for item in items]
- self.assertEqual(set(names), set(["cal3.ics"]))
+ self.assertEqual(set(names), set(["cal1.ics"]))
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20160521/8a939bc4/attachment-0001.html>
More information about the calendarserver-changes
mailing list