[CalendarServer-changes] [5963] CalendarServer/branches/users/washort/use-lxml

source_changes at macosforge.org source_changes at macosforge.org
Fri Jul 30 15:52:50 PDT 2010


Revision: 5963
          http://trac.macosforge.org/projects/calendarserver/changeset/5963
Author:   william_short at apple.com
Date:     2010-07-30 15:52:49 -0700 (Fri, 30 Jul 2010)
Log Message:
-----------
add support for lxml

Modified Paths:
--------------
    CalendarServer/branches/users/washort/use-lxml/support/build.sh
    CalendarServer/branches/users/washort/use-lxml/twext/web2/dav/element/parser.py
    CalendarServer/branches/users/washort/use-lxml/twistedcaldav/test/test_calendarquery.py

Property Changed:
----------------
    CalendarServer/branches/users/washort/use-lxml/


Property changes on: CalendarServer/branches/users/washort/use-lxml
___________________________________________________________________
Modified: svn:mergeinfo
   - /CalendarServer/branches/config-separation:4379-4443
/CalendarServer/branches/egg-info-351:4589-4625
/CalendarServer/branches/new-store:5594-5919
/CalendarServer/branches/users/cdaboo/cached-subscription-calendars-5692:5693-5702
/CalendarServer/branches/users/cdaboo/directory-cache-on-demand-3627:3628-3644
/CalendarServer/branches/users/cdaboo/more-sharing-5591:5592-5601
/CalendarServer/branches/users/cdaboo/partition-4464:4465-4957
/CalendarServer/branches/users/cdaboo/relative-config-paths-5070:5071-5105
/CalendarServer/branches/users/cdaboo/shared-calendars-5187:5188-5440
/CalendarServer/branches/users/glyph/contacts-server-merge:4971-5080
/CalendarServer/branches/users/glyph/sendfdport:5388-5424
/CalendarServer/branches/users/glyph/use-system-twisted:5084-5149
/CalendarServer/branches/users/sagen/locations-resources:5032-5051
/CalendarServer/branches/users/sagen/locations-resources-2:5052-5061
/CalendarServer/branches/users/sagen/resource-delegates-4038:4040-4067
/CalendarServer/branches/users/sagen/resource-delegates-4066:4068-4075
/CalendarServer/branches/users/sagen/resources-2:5084-5093
/CalendarServer/branches/users/wsanchez/transations:5515-5593
   + /CalendarServer/branches/new-store:5594-5919
/CalendarServer/branches/users/cdaboo/directory-cache-on-demand-3627:3628-3644
/CalendarServer/branches/egg-info-351:4589-4625
/CalendarServer/branches/users/sagen/resource-delegates-4038:4040-4067
/CalendarServer/branches/users/glyph/use-system-twisted:5084-5149
/CalendarServer/branches/users/sagen/resource-delegates-4066:4068-4075
/CalendarServer/branches/users/cdaboo/cached-subscription-calendars-5692:5693-5702
/CalendarServer/branches/users/glyph/contacts-server-merge:4971-5080
/CalendarServer/branches/users/cdaboo/shared-calendars-5187:5188-5440
/CalendarServer/branches/users/sagen/locations-resources-2:5052-5061
/CalendarServer/branches/users/sagen/locations-resources:5032-5051
/CalendarServer/branches/config-separation:4379-4443
/CalendarServer/branches/users/cdaboo/more-sharing-5591:5592-5601
/CalendarServer/branches/users/cdaboo/relative-config-paths-5070:5071-5105
/CalendarServer/branches/users/sagen/resources-2:5084-5093
/CalendarServer/branches/users/wsanchez/transations:5515-5593
/CalendarServer/branches/users/glyph/sendfdport:5388-5424
/CalendarServer/branches/users/cdaboo/partition-4464:4465-4957


Modified: CalendarServer/branches/users/washort/use-lxml/support/build.sh
===================================================================
--- CalendarServer/branches/users/washort/use-lxml/support/build.sh	2010-07-30 22:50:29 UTC (rev 5962)
+++ CalendarServer/branches/users/washort/use-lxml/support/build.sh	2010-07-30 22:52:49 UTC (rev 5963)
@@ -357,9 +357,10 @@
   local get_type="www";   # Protocol to use
   local  version="";      # Minimum version required
   local   f_hash="";      # Checksum
+  local build_args="";     # Args to pass to py_build
 
   OPTIND=1;
-  while getopts "ofier:v:m:" option; do
+  while getopts "ofier:v:m:b:" option; do
     case "${option}" in
       'o') optional="true"; ;;
       'f') override="true"; ;;
@@ -368,6 +369,7 @@
       'r') get_type="svn"; revision="${OPTARG}"; ;;
       'v')  version="-v ${OPTARG}"; ;;
       'm')   f_hash="-m ${OPTARG}"; ;;
+      'b') build_args="${OPTARG}"; ;;
     esac;
   done;
   shift $((${OPTIND} - 1));
@@ -397,9 +399,9 @@
         fi;
       fi;
     else
-      py_build "${name}" "${srcdir}" "${optional}";
+      py_build "${name}" "${srcdir}" "${optional}" $build_args;
     fi;
-    py_install "${name}" "${srcdir}";
+    py_install "${name}" "${srcdir}" $build_args;
 
     if "${inplace}"; then
       local add_pythonpath="${srcdir}";
@@ -511,6 +513,12 @@
     "PyXML" "xml.dom.ext" "${px}" \
     "http://internap.dl.sourceforge.net/sourceforge/pyxml/${px}.tar.gz";
 
+  local lxml="lxml-2.2.7";
+  py_dependency \
+    -b "--static-deps --libxml2-version=2.7.7 --libxslt-version=1.1.26" \
+    "lxml" "lxml" "${lxml}" \
+    "http://pypi.python.org/packages/source/l/lxml/lxml-2.2.7.tar.gz";
+
   local po="pyOpenSSL-0.10";
   py_dependency -v 0.9 \
     "PyOpenSSL" "OpenSSL" "${po}" \

Modified: CalendarServer/branches/users/washort/use-lxml/twext/web2/dav/element/parser.py
===================================================================
--- CalendarServer/branches/users/washort/use-lxml/twext/web2/dav/element/parser.py	2010-07-30 22:50:29 UTC (rev 5962)
+++ CalendarServer/branches/users/washort/use-lxml/twext/web2/dav/element/parser.py	2010-07-30 22:52:49 UTC (rev 5963)
@@ -1,5 +1,5 @@
 ##
-# Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+# Copyright (c) 2005, 2010 Apple Computer, Inc. All rights reserved.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -41,6 +41,7 @@
 import xml.dom.minidom
 import xml.sax
 
+import lxml.etree
 from twext.web2.dav.element.base import WebDAVElement, WebDAVUnknownElement, PCDATAElement
 from twext.web2.dav.element.util import PrintXML
 
@@ -54,8 +55,7 @@
     """
     element_names = []
 
-    items = module.__all__ if hasattr(module, "__all__") else dir(module)
-    for element_class_name in items:
+    for element_class_name in dir(module):
         element_class = getattr(module, element_class_name)
 
         if type(element_class) is type and issubclass(element_class, WebDAVElement):
@@ -72,7 +72,6 @@
     Register the supplied XML elements with the parser.
     """
     qname = element_class.namespace, element_class.name
-    
     if qname in elements_by_tag_name:
         raise AssertionError(
             "Attempting to register qname %s multiple times: (%r, %r)"
@@ -92,7 +91,7 @@
 
 elements_by_tag_name = {}
 
-class WebDAVContentHandler (xml.sax.handler.ContentHandler):
+class PyXMLWebDAVContentHandler (xml.sax.handler.ContentHandler):
     def setDocumentLocator(self, locator): self.locator = locator
     locator = None
 
@@ -132,7 +131,6 @@
                 attributes_dict[attr_name.encode("utf-8")] = attributes.getValueByQName(attr_name)
 
         tag_namespace, tag_name = name
-
         if name in elements_by_tag_name:
             element_class = elements_by_tag_name[name]
         elif name in self.unknownElementClasses:
@@ -140,8 +138,8 @@
         else:
             def element_class(*args, **kwargs):
                 element = WebDAVUnknownElement(*args, **kwargs)
-                element.namespace = tag_namespace
-                element.name      = tag_name
+                element.namespace = tag_namespace.encode('utf-8') if tag_namespace else tag_namespace
+                element.name      = tag_name.encode('utf-8')
                 return element
             self.unknownElementClasses[name] = element_class
 
@@ -192,34 +190,161 @@
     def skippedEntity(self, name):
         raise AssertionError("skipped entities are not allowed")
 
-class WebDAVDocument (object):
-    """
-    WebDAV XML document.
-    """
-    def _parse(source_is_string):
-        def parse(source):
-            handler = WebDAVContentHandler()
-            parser  = xml.sax.make_parser()
+class pyxmlparser(object):
+    def parseElement(self, source):
+        raise NotImplementedError('PyXML parser does not have support for parsing from an arbitrary element')
 
-            parser.setContentHandler(handler)
-            parser.setFeature(xml.sax.handler.feature_namespaces, True)
+    def parseString(self, source):
+        return self.parse(StringIO.StringIO(source))
 
-            if source_is_string: source = StringIO.StringIO(source)
+    def parse(self, source):
+        handler = PyXMLWebDAVContentHandler()
+        parser  = xml.sax.make_parser()
 
-            try:
-                parser.parse(source)
-            except xml.sax.SAXParseException, e:
-                raise ValueError(e)
+        parser.setContentHandler(handler)
+        parser.setFeature(xml.sax.handler.feature_namespaces, True)
 
-            #handler.dom.root_element.validate()
+        try:
+            parser.parse(source)
+        except xml.sax.SAXParseException, e:
+            raise ValueError(e)
 
-            return handler.dom
+        return handler.dom
 
-        return parse
+    parseStream = parse
+
+
+class WebDAVContentHandler(object):
+    class StackData(object):
+        def __init__(self, name, klass, attributes, children):
+            self.name = name
+            self.klass = klass
+            self.attributes = attributes
+            self.children = children
+
+    def __init__(self):
+        self.handlers = {'start' : self.startElement,
+                         'end'   : self.endElement}
+
+        self.stack = [self.StackData(None, None, None, [])]
+
+        # Keep a cache of the subclasses we create for unknown XML
+        # elements, so that we don't create multiple classes for the
+        # same element; it's fairly typical for elements to appear
+        # multiple times in a document.
+        self.unknownElementClasses = {}
+
+    def handle(self, event, element):
+        handlerMethod = self.handlers.get(event, lambda *args: None)
+        handlerMethod(element)
+
+    def getQname(self, element):
+        tag_namespace = element.nsmap[element.prefix]
+        if tag_namespace:
+            tag_name = element.tag.split("{%s}" % tag_namespace)[1]
+        else:
+            tag_name = element.tag
+        return (tag_namespace, tag_name)
+
+    def endDocument(self):
+        top = self.stack[-1]
+
+        assert top.name is None
+        assert top.klass is None
+        assert top.attributes is None
+        assert len(top.children) is 1, "Must have exactly one root element, got %d" % len(top.children)
+
+        self.dom = WebDAVDocument(top.children[0])
+        del(self.unknownElementClasses)
+
+    def startElement(self, element):
+        name = self.getQname(element)
+        if name in elements_by_tag_name:
+            element_class = elements_by_tag_name[name]
+        elif name in self.unknownElementClasses:
+            element_class = self.unknownElementClasses[name]
+        else:
+            (tag_namespace, tag_name) = name
+
+            def element_class(*args, **kwargs):
+                element = WebDAVUnknownElement(*args, **kwargs)
+                element.namespace = tag_namespace.encode('utf-8') if tag_namespace else tag_namespace
+                element.name      = tag_name.encode('utf-8')
+                return element
+
+            self.unknownElementClasses[name] = element_class
+        attributes = {}
+        for k, v in element.items():
+            #Cheat a little.
+            k = k.replace('{http://www.w3.org/XML/1998/namespace}', 'xml:')
+            attributes[k] = v
+
+        stackData = self.StackData(name, element_class, attributes, [])
+        self.stack.append(stackData)
+
+    def endElement(self, element):
+        # Pop the current element from the stack...
+        top = self.stack[-1]
+        del(self.stack[-1])
+
+        name = self.getQname(element)
+        assert top.name == name, "Last item on stack is %s while closing %s" % (top.name, name)
+
+        if element.text:
+            text = element.text.strip()
+            if text:
+                top.children.append(PCDATAElement(text))
+
+        # ...then instantiate the element and add it to the parent's list of
+        # children.
+        try:
+            davElement = top.klass(*top.children, **top.attributes)
+        except ValueError, e:
+            e.args = ("%s at %s" % (e.args[0], element.sourceline)) + e.args[1:]
+            raise # Re-raises modified e, but preserves traceback
         
-    fromStream = staticmethod(_parse(False))
-    fromString = staticmethod(_parse(True ))
+        self.stack[-1].children.append(davElement)
+        element.clear()
 
+class lxmlparser(object):
+    def parseElement(self, source):
+        return self.parse(source, lxml.etree.iterwalk)
+    
+    def parseString(self, source):
+        return self.parse(StringIO.StringIO(source))
+
+    def parse(self, source, iterate=lxml.etree.iterparse):
+        handler = WebDAVContentHandler()
+        try: 
+            context = iterate(source, events=('start', 'end'))
+            [handler.handle(event, element) for (event, element) in context]
+            handler.endDocument()
+        except lxml.etree.Error as e:
+            raise ValueError(e)
+
+        return handler.dom
+
+    parseStream = parse
+
+# Hook to override if needed
+ParserClass = lxmlparser
+
+class WebDAVDocument (object):
+    """
+    WebDAV XML document.
+    """
+    @classmethod
+    def fromStream(cls, source):
+        return ParserClass().parseStream(source)
+
+    @classmethod
+    def fromString(cls, source):
+        return ParserClass().parseString(source)
+
+    @classmethod
+    def fromElement(cls, source):
+        return ParserClass().parseElement(source)
+
     def __init__(self, root_element):
         """
         root_element must be a WebDAVElement instance.

Modified: CalendarServer/branches/users/washort/use-lxml/twistedcaldav/test/test_calendarquery.py
===================================================================
--- CalendarServer/branches/users/washort/use-lxml/twistedcaldav/test/test_calendarquery.py	2010-07-30 22:50:29 UTC (rev 5962)
+++ CalendarServer/branches/users/washort/use-lxml/twistedcaldav/test/test_calendarquery.py	2010-07-30 22:52:49 UTC (rev 5963)
@@ -22,6 +22,7 @@
 from twext.web2.iweb import IResponse
 from twext.web2.stream import MemoryStream
 from twext.web2.dav import davxml
+import twext.web2.dav
 from twext.web2.dav.fileop import rmdir
 from twext.web2.dav.util import davXMLFromStream
 from twext.web2.test.test_server import SimpleRequest
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20100730/54a0d3e5/attachment-0001.html>


More information about the calendarserver-changes mailing list