[CalendarServer-changes] [8504] CalendarServer/trunk/calendarserver/tools/calverify.py
source_changes at macosforge.org
source_changes at macosforge.org
Tue Jan 10 12:01:59 PST 2012
Revision: 8504
http://trac.macosforge.org/projects/calendarserver/changeset/8504
Author: cdaboo at apple.com
Date: 2012-01-10 12:01:59 -0800 (Tue, 10 Jan 2012)
Log Message:
-----------
New version that has the option to validate data already in the store.
Modified Paths:
--------------
CalendarServer/trunk/calendarserver/tools/calverify.py
Modified: CalendarServer/trunk/calendarserver/tools/calverify.py
===================================================================
--- CalendarServer/trunk/calendarserver/tools/calverify.py 2012-01-10 19:55:28 UTC (rev 8503)
+++ CalendarServer/trunk/calendarserver/tools/calverify.py 2012-01-10 20:01:59 UTC (rev 8504)
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- test-case-name: calendarserver.tools.test.test_calverify -*-
##
-# Copyright (c) 2011 Apple Inc. All rights reserved.
+# Copyright (c) 2011-2012 Apple Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -44,6 +44,7 @@
from pycalendar import definitions
from pycalendar.calendar import PyCalendar
from pycalendar.datetime import PyCalendarDateTime
+from pycalendar.exceptions import PyCalendarError
from pycalendar.period import PyCalendarPeriod
from twext.enterprise.dal.syntax import Select, Parameter, Count
from twisted.application.service import Service
@@ -52,8 +53,10 @@
from twisted.python.text import wordWrap
from twisted.python.usage import Options
from twistedcaldav.dateops import pyCalendarTodatetime
+from twistedcaldav.ical import Component
from twistedcaldav.stdconfig import DEFAULT_CONFIG_FILE
from txdav.common.datastore.sql_tables import schema, _BIND_MODE_OWN
+from txdav.common.icommondatastore import InternalDataStoreError
import collections
import os
import sys
@@ -90,20 +93,24 @@
synopsis = description
optFlags = [
- ['fix', 'x', "Fix mismatches."],
- ['missing', 'v', "Show 'orphaned' homes."],
+ ['ical', 'i', "Calendar data check."],
+ ['fix', 'x', "Fix problems."],
+ ['missing', 'm', "Show 'orphaned' homes."],
['verbose', 'v', "Verbose logging."],
]
optParameters = [
['config', 'f', DEFAULT_CONFIG_FILE, "Specify caldavd.plist configuration path."],
['data', 'd', "./calverify-data", "Path where ancillary data is stored."],
+ ['uuid', 'u', "", "Only check this user."],
]
+
def __init__(self):
super(CalVerifyOptions, self).__init__()
self.outputName = '-'
+
def opt_output(self, filename):
"""
Specify output file path (default: '-', meaning stdout).
@@ -112,6 +119,7 @@
opt_o = opt_output
+
def openOutput(self):
"""
Open the appropriate output file based on the '--output' option.
@@ -151,13 +159,10 @@
"""
Do the export, stopping the reactor when done.
"""
- self.txn = self.store.newTransaction()
try:
if self.options["missing"]:
yield self.doOrphans()
- yield self.doScan(self.options["fix"])
- yield self.txn.commit()
- self.txn = None
+ yield self.doScan(self.options["ical"], self.options["fix"])
self.output.close()
except:
@@ -172,6 +177,7 @@
Report on home collections for which there are no directory records.
"""
print "\n---- Finding calendar homes with no directory record ----"
+ self.txn = self.store.newTransaction()
if self.options["verbose"]:
t = time.time()
@@ -193,6 +199,14 @@
if self.directoryService().recordWithGUID(uid[0]) is None:
contents = yield self.countHomeContents(uid)
missing.append((uid[0], contents,))
+
+ # To avoid holding locks on all the rows scanned, commit every 100 resources
+ if divmod(ctr, 100)[1] == 0:
+ yield self.txn.commit()
+ self.txn = self.store.newTransaction()
+
+ yield self.txn.commit()
+ self.txn = None
# Print table of results
table = tables.Table()
@@ -217,6 +231,7 @@
).on(self.txn))
returnValue(tuple(rows))
+
@inlineCallbacks
def countHomeContents(self, uid):
ch = schema.CALENDAR_HOME
@@ -233,8 +248,9 @@
).on(self.txn, **kwds))
returnValue(int(rows[0][0]) if rows else 0)
+
@inlineCallbacks
- def doScan(self, fix):
+ def doScan(self, ical, fix):
print "\n---- Scanning calendar data ----"
@@ -244,11 +260,23 @@
self.end.offsetYear(1)
self.fix = fix
+ self.txn = self.store.newTransaction()
+
if self.options["verbose"]:
t = time.time()
- rows = yield self.getAllResourceInfo(self.start)
+ if ical:
+ if self.options["uuid"]:
+ rows = yield self.getAllResourceInfoWithUUID(self.options["uuid"])
+ else:
+ rows = yield self.getAllResourceInfo()
+ else:
+ rows = yield self.getAllResourceInfoTimeRange(self.start)
+
+ yield self.txn.commit()
+ self.txn = None
+
if self.options["verbose"]:
- print "getAllResourceInfo time: %.1fs" % (time.time() - t,)
+ print "getAllResourceInfoTimeRange time: %.1fs" % (time.time() - t,)
print "Number of events to process: %s" % (len(rows,))
# Split into organizer events and attendee events
@@ -268,20 +296,61 @@
print "Number of organizer events to process: %s" % (len(self.organized),)
print "Number of attendee events to process: %s" % (len(self.attended,))
- yield self.verifyAllAttendeesForOrganizer()
- yield self.verifyAllOrganizersForAttendee()
+ if ical:
+ yield self.calendarDataCheck(rows)
+ else:
+ yield self.verifyAllAttendeesForOrganizer()
+ yield self.verifyAllOrganizersForAttendee()
yield succeed(None)
+
@inlineCallbacks
- def getAllResourceInfo(self, start):
+ def getAllResourceInfo(self):
co = schema.CALENDAR_OBJECT
cb = schema.CALENDAR_BIND
ch = schema.CALENDAR_HOME
+ kwds = {}
+ rows = (yield Select(
+ [ch.OWNER_UID, co.RESOURCE_ID, co.ICALENDAR_UID, co.MD5, co.ORGANIZER,],
+ From=ch.join(
+ cb, type="inner", on=(ch.RESOURCE_ID == cb.CALENDAR_HOME_RESOURCE_ID)).join(
+ co, type="inner", on=(cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
+ cb.BIND_MODE == _BIND_MODE_OWN).And(
+ cb.CALENDAR_RESOURCE_NAME != "inbox")),
+ GroupBy=(ch.OWNER_UID, co.RESOURCE_ID, co.ICALENDAR_UID, co.MD5, co.ORGANIZER,),
+ ).on(self.txn, **kwds))
+ returnValue(tuple(rows))
+
+
+ @inlineCallbacks
+ def getAllResourceInfoWithUUID(self, uuid):
+ co = schema.CALENDAR_OBJECT
+ cb = schema.CALENDAR_BIND
+ ch = schema.CALENDAR_HOME
+ kwds = {"uuid": uuid}
+ rows = (yield Select(
+ [ch.OWNER_UID, co.RESOURCE_ID, co.ICALENDAR_UID, co.MD5, co.ORGANIZER,],
+ From=ch.join(
+ cb, type="inner", on=(ch.RESOURCE_ID == cb.CALENDAR_HOME_RESOURCE_ID)).join(
+ co, type="inner", on=(cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
+ cb.BIND_MODE == _BIND_MODE_OWN).And(
+ cb.CALENDAR_RESOURCE_NAME != "inbox")),
+ Where=(ch.OWNER_UID == Parameter("uuid")),
+ GroupBy=(ch.OWNER_UID, co.RESOURCE_ID, co.ICALENDAR_UID, co.MD5, co.ORGANIZER,),
+ ).on(self.txn, **kwds))
+ returnValue(tuple(rows))
+
+
+ @inlineCallbacks
+ def getAllResourceInfoTimeRange(self, start):
+ co = schema.CALENDAR_OBJECT
+ cb = schema.CALENDAR_BIND
+ ch = schema.CALENDAR_HOME
tr = schema.TIME_RANGE
kwds = {
"Start" : pyCalendarTodatetime(start),
- "Max" : pyCalendarTodatetime(PyCalendarDateTime(1900, 1, 1))
+ "Max" : pyCalendarTodatetime(PyCalendarDateTime(1900, 1, 1, 0, 0, 0))
}
rows = (yield Select(
[ch.OWNER_UID, co.RESOURCE_ID, co.ICALENDAR_UID, co.MD5, co.ORGANIZER,],
@@ -297,7 +366,157 @@
).on(self.txn, **kwds))
returnValue(tuple(rows))
+
@inlineCallbacks
+ def getAllResourceInfoForResourceID(self, resid):
+ co = schema.CALENDAR_OBJECT
+ cb = schema.CALENDAR_BIND
+ ch = schema.CALENDAR_HOME
+ kwds = {"resid": resid}
+ rows = (yield Select(
+ [ch.RESOURCE_ID, cb.CALENDAR_RESOURCE_ID,],
+ From=ch.join(
+ cb, type="inner", on=(ch.RESOURCE_ID == cb.CALENDAR_HOME_RESOURCE_ID)).join(
+ co, type="inner", on=(cb.CALENDAR_RESOURCE_ID == co.CALENDAR_RESOURCE_ID).And(
+ cb.BIND_MODE == _BIND_MODE_OWN).And(
+ cb.CALENDAR_RESOURCE_NAME != "inbox")),
+ Where=(co.RESOURCE_ID == Parameter("resid")),
+ ).on(self.txn, **kwds))
+ returnValue(rows[0])
+
+
+ @inlineCallbacks
+ def calendarDataCheck(self, rows):
+ """
+ Check each calendar resource for valid iCalendar data.
+ """
+
+ print "\n---- Verifying each calendar object resource ----"
+ self.txn = self.store.newTransaction()
+
+ if self.options["verbose"]:
+ t = time.time()
+
+ results_bad = []
+ count = 0
+ total = len(rows)
+ badlen = 0
+ for owner, resid, uid, _ignore_md5, _ignore_organizer in rows:
+ result, message = yield self.validCalendarData(resid)
+ if not result:
+ results_bad.append((owner, uid, resid, message))
+ badlen += 1
+ count += 1
+ if self.options["verbose"]:
+ if count == 1:
+ print "Bad/Current/Total"
+ if divmod(count, 100)[1] == 0:
+ print "%s/%s/%s" % (badlen, count, total,)
+
+ # To avoid holding locks on all the rows scanned, commit every 100 resources
+ if divmod(count, 100)[1] == 0:
+ yield self.txn.commit()
+ self.txn = self.store.newTransaction()
+
+ yield self.txn.commit()
+ self.txn = None
+
+ # Print table of results
+ table = tables.Table()
+ table.addHeader(("Owner", "Event UID", "RID", "Problem",))
+ for item in results_bad:
+ owner, uid, resid, message = item
+ owner_record = self.directoryService().recordWithGUID(owner)
+ table.addRow((
+ "%s/%s (%s)" % (owner_record.recordType if owner_record else "-", owner_record.shortNames[0] if owner_record else "-", owner,),
+ uid,
+ resid,
+ message,
+ ))
+
+ self.output.write("\n")
+ self.output.write("Bad iCalendar data (total=%d):\n" % (len(results_bad),))
+ table.printTable(os=self.output)
+
+ if self.options["verbose"]:
+ diff_time = time.time() - t
+ print "Time: %.2f s Average: %.1f ms/resource" % (
+ diff_time,
+ (1000.0 * diff_time) / total,
+ )
+
+ errorPrefix = "Calendar data had unfixable problems:\n "
+
+ @inlineCallbacks
+ def validCalendarData(self, resid):
+ """
+ Check the calendar resource for valid iCalendar data.
+ """
+
+ caldata = yield self.getCalendar(resid)
+ if caldata is None:
+ returnValue((False, "Failed to parse"))
+
+ component = Component(None, pycalendar=caldata)
+ result = True
+ message = ""
+ try:
+ component.validCalendarData(doFix=False, validateRecurrences=True)
+ component.validCalendarForCalDAV(methodAllowed=False)
+ component.validOrganizerForScheduling(doFix=False)
+ except ValueError, e:
+ result = False
+ message = str(e)
+ if message.startswith(self.errorPrefix):
+ message = message[len(self.errorPrefix):]
+ lines = message.splitlines()
+ message = lines[0] + (" ++" if len(lines) > 1 else "")
+ if self.fix:
+ fixresult, fixmessage = yield self.fixCalendarData(resid)
+ if fixresult:
+ message = "Fixed: " + message
+ else:
+ message = fixmessage + message
+
+ returnValue((result, message,))
+
+
+ @inlineCallbacks
+ def fixCalendarData(self, resid):
+ """
+ Fix problems in calendar data using store APIs.
+ """
+
+ homeID, calendarID = yield self.getAllResourceInfoForResourceID(resid)
+ home = yield self.txn.calendarHomeWithResourceID(homeID)
+ calendar = yield home.childWithID(calendarID)
+ calendarObj = yield calendar.objectResourceWithID(resid)
+
+ try:
+ component = yield calendarObj.component()
+ except InternalDataStoreError:
+ returnValue((False, "Failed parse: "))
+
+ result = True
+ message = ""
+ try:
+ component.validCalendarData(doFix=True, validateRecurrences=True)
+ component.validCalendarForCalDAV(methodAllowed=False)
+ component.validOrganizerForScheduling(doFix=True)
+ except ValueError:
+ result = False
+ message = "Failed fix: "
+
+ if result:
+ # Write out fix, commit and get a new transaction
+ component = yield calendarObj.setComponent(component)
+ #yield self.txn.commit()
+ #self.txn = self.store.newTransaction()
+
+ returnValue((result, message,))
+
+
+ @inlineCallbacks
def verifyAllAttendeesForOrganizer(self):
"""
Make sure that for each organizer, each referenced attendee has a consistent view of the organizer's event.
@@ -306,6 +525,7 @@
"""
print "\n---- Verifying Organizer events against Attendee copies ----"
+ self.txn = self.store.newTransaction()
results_missing = []
results_mismatch = []
@@ -325,9 +545,16 @@
len(results_mismatch),
)
+ # To avoid holding locks on all the rows scanned, commit every 100 resources
+ if divmod(ctr, 100)[1] == 0:
+ yield self.txn.commit()
+ self.txn = self.store.newTransaction()
+
# Get the organizer's view of attendee states
organizer, resid, uid, _ignore_md5, _ignore_organizer = organizerEvent
calendar = yield self.getCalendar(resid)
+ if calendar is None:
+ continue
organizerViewOfAttendees = self.buildAttendeeStates(calendar, self.start, self.end)
try:
del organizerViewOfAttendees[organizer]
@@ -342,6 +569,8 @@
for attendeeEvent in self.attended_byuid.get(uid, ()):
owner, resid, uid, _ignore_md5, _ignore_organizer = attendeeEvent
calendar = yield self.getCalendar(resid)
+ if calendar is None:
+ continue
eachAttendeesOwnStatus[owner] = self.buildAttendeeStates(calendar, self.start, self.end, attendee_only=owner)
attendeeResIDs[(owner, uid)] = resid
@@ -387,6 +616,9 @@
# TODO: This is where we attempt a fix
#self.fixEvent(organizer, organizerAttendee, eventpath, attendeePaths.get(organizerAttendee, None))
pass
+
+ yield self.txn.commit()
+ self.txn = None
# Print table of results
table = tables.Table()
@@ -425,6 +657,7 @@
self.output.write("Events mismatched between Organizer's and Attendee's calendars (total=%d):\n" % (len(results_mismatch),))
table.printTable(os=self.output)
+
@inlineCallbacks
def verifyAllOrganizersForAttendee(self):
"""
@@ -432,6 +665,7 @@
"""
print "\n---- Verifying Attendee events against Organizer copies ----"
+ self.txn = self.store.newTransaction()
# Now try to match up each attendee event
missing = []
@@ -450,8 +684,15 @@
len(mismatched),
)
+ # To avoid holding locks on all the rows scanned, commit every 100 resources
+ if divmod(ctr, 100)[1] == 0:
+ yield self.txn.commit()
+ self.txn = self.store.newTransaction()
+
attendee, resid, uid, _ignore_md5, organizer = attendeeEvent
calendar = yield self.getCalendar(resid)
+ if calendar is None:
+ continue
eachAttendeesOwnStatus = self.buildAttendeeStates(calendar, self.start, self.end, attendee_only=attendee)
if attendee not in eachAttendeesOwnStatus:
continue
@@ -462,7 +703,7 @@
organizer = organizer[9:]
organizerRecord = self.directoryService().recordWithGUID(organizer)
- if not organizerRecord.thisServer():
+ if organizerRecord is None or not organizerRecord.thisServer():
continue
if uid not in self.organized_byuid:
@@ -490,6 +731,9 @@
# TODO: This is where we attempt a fix
pass
+ yield self.txn.commit()
+ self.txn = None
+
# Print table of results
table = tables.Table()
table.addHeader(("Organizer", "Attendee", "UID", "Attendee RID",))
@@ -547,8 +791,13 @@
co.RESOURCE_ID == Parameter("ResourceID")
),
).on(self.txn, **kwds))
- returnValue(PyCalendar.parseText(rows[0][0]) if rows else None)
+ try:
+ caldata = PyCalendar.parseText(rows[0][0]) if rows else None
+ except PyCalendarError:
+ caldata = None
+ returnValue(caldata)
+
def buildAttendeeStates(self, calendar, start, end, attendee_only=None):
# Expand events into instances in the start/end range
results = []
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20120110/90dcb70e/attachment-0001.html>
More information about the calendarserver-changes
mailing list