[CalendarServer-changes] [11580] CalendarServer/trunk/twistedcaldav/directory

source_changes at macosforge.org source_changes at macosforge.org
Tue Aug 6 01:44:24 PDT 2013


Revision: 11580
          http://trac.calendarserver.org//changeset/11580
Author:   sagen at apple.com
Date:     2013-08-06 01:44:24 -0700 (Tue, 06 Aug 2013)
Log Message:
-----------
Optimize calendarserver-principal-search in both the OD and LDAP implementations.  In OD we now use nested expressions where possible, and in LDAP we manage the query result limits so we don't ask for more results than the client asked for.

Modified Paths:
--------------
    CalendarServer/trunk/twistedcaldav/directory/appleopendirectory.py
    CalendarServer/trunk/twistedcaldav/directory/ldapdirectory.py
    CalendarServer/trunk/twistedcaldav/directory/test/test_buildquery.py
    CalendarServer/trunk/twistedcaldav/directory/test/test_ldapdirectory.py

Modified: CalendarServer/trunk/twistedcaldav/directory/appleopendirectory.py
===================================================================
--- CalendarServer/trunk/twistedcaldav/directory/appleopendirectory.py	2013-08-05 17:55:30 UTC (rev 11579)
+++ CalendarServer/trunk/twistedcaldav/directory/appleopendirectory.py	2013-08-06 08:44:24 UTC (rev 11580)
@@ -121,7 +121,22 @@
             self.restrictToGUID = True
         self.restrictedTimestamp = 0
 
+        # Set up the /Local/Default node if it's in the search path so we can 
+        # send custom queries to it
+        self.localNode = None
+        try:
+            if self.node == "/Search":
+                result = self.odModule.getNodeAttributes(self.directory, "/Search",
+                    (dsattributes.kDS1AttrSearchPath,))
+                if "/Local/Default" in result[dsattributes.kDS1AttrSearchPath]:
+                    try:
+                        self.localNode = self.odModule.odInit("/Local/Default")
+                    except self.odModule.ODError, e:
+                        self.log.error("Failed to open /Local/Default): %s" % (e,))
+        except AttributeError:
+            pass
 
+
     @property
     def restrictedGUIDs(self):
         """
@@ -567,7 +582,7 @@
         def collectResults(results):
             self.log.debug("Got back %d records from OD" % (len(results),))
             for key, value in results:
-                self.log.debug("OD result: %s %s" % (key, value))
+                # self.log.debug("OD result: {key} {value}", key=key, value=value)
                 try:
                     recordNodeName = value.get(
                         dsattributes.kDSNAttrMetaNodeLocation)
@@ -664,10 +679,8 @@
             for compound in queries:
                 compound = compound.generate()
 
-                self.log.debug("Calling OD: Types %s, Query %s" %
-                    (recordTypes, compound))
-
                 try:
+                    startTime = time.time()
                     queryResults = lookupMethod(
                         directory,
                         compound,
@@ -675,6 +688,7 @@
                         recordTypes,
                         attrs,
                     )
+                    totalTime = time.time() - startTime
 
                     newSet = set()
                     for recordName, data in queryResults:
@@ -683,6 +697,8 @@
                             byGUID[guid] = (recordName, data)
                             newSet.add(guid)
 
+                    self.log.debug("Attendee OD query: Types %s, Query %s, %.2f sec, %d results" %
+                        (recordTypes, compound, totalTime, len(queryResults)))
                     sets.append(newSet)
 
                 except self.odModule.ODError, e:
@@ -697,7 +713,8 @@
                     results.append((data[dsattributes.kDSNAttrRecordName], data))
             return results
 
-        queries = buildQueriesFromTokens(tokens, self._ODFields)
+        localQueries = buildLocalQueriesFromTokens(tokens, self._ODFields)
+        nestedQuery = buildNestedQueryFromTokens(tokens, self._ODFields)
 
         # Starting with the record types corresponding to the context...
         recordTypes = self.recordTypesForSearchContext(context)
@@ -707,9 +724,13 @@
         recordTypes = [self._toODRecordTypes[r] for r in recordTypes]
 
         if recordTypes:
+            # Perform the complex/nested query.  If there was more than one
+            # token, this won't match anything in /Local, therefore we run
+            # the un-nested queries below and AND the results ourselves in
+            # multiQuery.
             results = multiQuery(
                 self.directory,
-                queries,
+                [nestedQuery],
                 recordTypes,
                 [
                     dsattributes.kDS1AttrGeneratedUID,
@@ -725,6 +746,30 @@
                     dsattributes.kDSNAttrNestedGroups,
                 ]
             )
+            if self.localNode is not None and len(tokens) > 1:
+                # /Local is in our search path and the complex query above
+                # would not have matched anything in /Local.  So now run
+                # the un-nested queries.
+                results.extend(
+                    multiQuery(
+                        self.localNode,
+                        localQueries,
+                        recordTypes,
+                        [
+                            dsattributes.kDS1AttrGeneratedUID,
+                            dsattributes.kDSNAttrRecordName,
+                            dsattributes.kDSNAttrAltSecurityIdentities,
+                            dsattributes.kDSNAttrRecordType,
+                            dsattributes.kDS1AttrDistinguishedName,
+                            dsattributes.kDS1AttrFirstName,
+                            dsattributes.kDS1AttrLastName,
+                            dsattributes.kDSNAttrEMailAddress,
+                            dsattributes.kDSNAttrMetaNodeLocation,
+                            dsattributes.kDSNAttrGroupMembers,
+                            dsattributes.kDSNAttrNestedGroups,
+                        ]
+                    )
+                )
             return succeed(collectResults(results))
         else:
             return succeed([])
@@ -743,7 +788,7 @@
         def collectResults(results):
             self.log.debug("Got back %d records from OD" % (len(results),))
             for key, value in results:
-                self.log.debug("OD result: %s %s" % (key, value))
+                # self.log.debug("OD result: {key} {value}", key=key, value=value)
                 try:
                     recordNodeName = value.get(
                         dsattributes.kDSNAttrMetaNodeLocation)
@@ -1298,7 +1343,7 @@
 
 
 
-def buildQueriesFromTokens(tokens, mapping):
+def buildLocalQueriesFromTokens(tokens, mapping):
     """
     OD /Local doesn't support nested complex queries, so create a list of
     complex queries that will be ANDed together in recordsMatchingTokens()
@@ -1330,7 +1375,38 @@
     return results
 
 
+def buildNestedQueryFromTokens(tokens, mapping):
+    """
+    Build a DS query espression such that all the tokens must appear in either
+    the fullName (anywhere) or emailAddresses (at the beginning).
+    
+    @param tokens: The tokens to search on
+    @type tokens: C{list} of C{str}
+    @param mapping: The mapping of DirectoryRecord attributes to OD attributes
+    @type mapping: C{dict}
+    @return: The nested expression object
+    @type: dsquery.expression
+    """
 
+    if len(tokens) == 0:
+        return None
+
+    fields = [
+        ("fullName", dsattributes.eDSContains),
+        ("emailAddresses", dsattributes.eDSStartsWith),
+    ]
+
+    outer = []
+    for token in tokens:
+        inner = []
+        for field, comparison in fields:
+            ODField = mapping[field]['odField']
+            query = dsquery.match(ODField, token, comparison)
+            inner.append(query)
+        outer.append(dsquery.expression(dsquery.expression.OR, inner))
+    return dsquery.expression(dsquery.expression.AND, outer)
+
+
 class OpenDirectoryRecord(CachingDirectoryRecord):
     """
     OpenDirectory implementation of L{IDirectoryRecord}.

Modified: CalendarServer/trunk/twistedcaldav/directory/ldapdirectory.py
===================================================================
--- CalendarServer/trunk/twistedcaldav/directory/ldapdirectory.py	2013-08-05 17:55:30 UTC (rev 11579)
+++ CalendarServer/trunk/twistedcaldav/directory/ldapdirectory.py	2013-08-06 08:44:24 UTC (rev 11580)
@@ -1071,8 +1071,10 @@
                         % (recordTypes, indexType, indexKey))
 
 
-    def recordsMatchingTokens(self, tokens, context=None):
+    def recordsMatchingTokens(self, tokens, context=None, limitResults=50, timeoutSeconds=30):
         """
+        # TODO: hook up limitResults to the client limit in the query
+
         @param tokens: The tokens to search on
         @type tokens: C{list} of C{str} (utf-8 bytes)
         @param context: An indication of what the end user is searching
@@ -1091,29 +1093,34 @@
         are considered.
         """
         self.log.debug("Peforming calendar user search for %s (%s)" % (tokens, context))
-
+        startTime = time.time()
         records = []
         recordTypes = self.recordTypesForSearchContext(context)
         recordTypes = [r for r in recordTypes if r in self.recordTypes()]
         guidAttr = self.rdnSchema["guidAttr"]
 
+        typeCounts = {}
         for recordType in recordTypes:
+            if limitResults == 0:
+                self.log.debug("LDAP search aggregate limit reached")
+                break
+            typeCounts[recordType] = 0
             base = self.typeDNs[recordType]
             scope = ldap.SCOPE_SUBTREE
-            filterstr = buildFilterFromTokens(self.rdnSchema[recordType]["mapping"],
-                tokens)
+            filterstr = buildFilterFromTokens(recordType, self.rdnSchema[recordType]["mapping"],
+                tokens, ("(%s=*)" % (guidAttr,)))
 
             if filterstr is not None:
                 # Query the LDAP server
-                self.log.debug("LDAP search %s %s %s" %
-                    (ldap.dn.dn2str(base), scope, filterstr))
+                self.log.debug("LDAP search %s %s (limit=%d)" %
+                    (ldap.dn.dn2str(base), filterstr, limitResults))
                 results = self.timedSearch(ldap.dn.dn2str(base), scope,
                     filterstr=filterstr, attrlist=self.attrlist,
-                    timeoutSeconds=self.requestTimeoutSeconds,
-                    resultLimit=self.requestResultsLimit)
-                self.log.debug("LDAP search returned %d results" % (len(results),))
+                    timeoutSeconds=timeoutSeconds,
+                    resultLimit=limitResults)
                 numMissingGuids = 0
                 numMissingRecordNames = 0
+                numNotEnabled = 0
                 for dn, attrs in results:
                     dn = normalizeDNstr(dn)
                     # Skip if group restriction is in place and guid is not
@@ -1129,9 +1136,12 @@
                         # not include in principal property search results
                         if (recordType != self.recordType_groups):
                             if not record.enabledForCalendaring:
+                                numNotEnabled += 1
                                 continue
 
                         records.append(record)
+                        typeCounts[recordType] += 1
+                        limitResults -= 1
 
                     except MissingGuidException:
                         numMissingGuids += 1
@@ -1139,15 +1149,12 @@
                     except MissingRecordNameException:
                         numMissingRecordNames += 1
 
-                if numMissingGuids:
-                    self.log.warn("%d %s records are missing %s" %
-                        (numMissingGuids, recordType, guidAttr))
+                self.log.debug("LDAP search returned %d results, %d usable" % (len(results), typeCounts[recordType]))
 
-                if numMissingRecordNames:
-                    self.log.warn("%d %s records are missing record name" %
-                        (numMissingRecordNames, recordType))
 
-        self.log.debug("Calendar user search matched %d records" % (len(records),))
+        typeCountsStr = ", ".join(["%s:%d" % (rt, ct) for (rt, ct) in typeCounts.iteritems()])
+        totalTime = time.time() - startTime
+        self.log.info("Calendar user search for %s matched %d records (%s) in %.2f seconds" % (tokens, len(records), typeCountsStr, totalTime))
         return succeed(records)
 
 
@@ -1422,12 +1429,13 @@
     return filterstr
 
 
-def buildFilterFromTokens(mapping, tokens):
+def buildFilterFromTokens(recordType, mapping, tokens, extras=()):
     """
     Create an LDAP filter string from a list of query tokens.  Each token is
     searched for in each LDAP attribute corresponding to "fullName" and
     "emailAddresses" (could be multiple LDAP fields for either).
 
+    @param recordType: The recordType to use to customize the filter
     @param mapping: A dict mapping internal directory attribute names to ldap names.
     @type mapping: C{dict}
     @param tokens: The list of tokens to search for
@@ -1460,6 +1468,8 @@
         return None
 
     tokenFragments = []
+    tokenFragments.extend(extras)
+
     for token in tokens:
         fragments = []
         for ldapField, template in ldapFields:
@@ -1478,6 +1488,7 @@
     return filterStr
 
 
+
 class LdapDirectoryRecord(CachingDirectoryRecord):
     """
     LDAP implementation of L{IDirectoryRecord}.

Modified: CalendarServer/trunk/twistedcaldav/directory/test/test_buildquery.py
===================================================================
--- CalendarServer/trunk/twistedcaldav/directory/test/test_buildquery.py	2013-08-05 17:55:30 UTC (rev 11579)
+++ CalendarServer/trunk/twistedcaldav/directory/test/test_buildquery.py	2013-08-06 08:44:24 UTC (rev 11580)
@@ -15,7 +15,8 @@
 ##
 
 from twistedcaldav.test.util import TestCase
-from twistedcaldav.directory.appleopendirectory import buildQueries, buildQueriesFromTokens, OpenDirectoryService
+from twistedcaldav.directory.appleopendirectory import (buildQueries,
+    buildLocalQueriesFromTokens, OpenDirectoryService, buildNestedQueryFromTokens)
 from calendarserver.platform.darwin.od import dsattributes
 
 class BuildQueryTests(TestCase):
@@ -104,17 +105,21 @@
             }
         )
 
-    def test_buildQueryFromTokens(self):
-        results = buildQueriesFromTokens([], OpenDirectoryService._ODFields)
+
+    def test_buildLocalQueryFromTokens(self):
+        """
+        Verify the generating of the simpler queries passed to /Local/Default
+        """
+        results = buildLocalQueriesFromTokens([], OpenDirectoryService._ODFields)
         self.assertEquals(results, None)
 
-        results = buildQueriesFromTokens(["foo"], OpenDirectoryService._ODFields)
+        results = buildLocalQueriesFromTokens(["foo"], OpenDirectoryService._ODFields)
         self.assertEquals(
             results[0].generate(),
             "(|(dsAttrTypeStandard:RealName=*foo*)(dsAttrTypeStandard:EMailAddress=foo*))"
         )
 
-        results = buildQueriesFromTokens(["foo", "bar"], OpenDirectoryService._ODFields)
+        results = buildLocalQueriesFromTokens(["foo", "bar"], OpenDirectoryService._ODFields)
         self.assertEquals(
             results[0].generate(),
             "(|(dsAttrTypeStandard:RealName=*foo*)(dsAttrTypeStandard:EMailAddress=foo*))"
@@ -123,3 +128,29 @@
             results[1].generate(),
             "(|(dsAttrTypeStandard:RealName=*bar*)(dsAttrTypeStandard:EMailAddress=bar*))"
         )
+
+
+    def test_buildNestedQueryFromTokens(self):
+        """
+        Verify the generating of the complex nested queries
+        """
+        query = buildNestedQueryFromTokens([], OpenDirectoryService._ODFields)
+        self.assertEquals(query, None)
+
+        query = buildNestedQueryFromTokens(["foo"], OpenDirectoryService._ODFields)
+        self.assertEquals(
+            query.generate(),
+            "(|(dsAttrTypeStandard:RealName=*foo*)(dsAttrTypeStandard:EMailAddress=foo*))"
+        )
+
+        query = buildNestedQueryFromTokens(["foo", "bar"], OpenDirectoryService._ODFields)
+        self.assertEquals(
+            query.generate(),
+            "(&(|(dsAttrTypeStandard:RealName=*foo*)(dsAttrTypeStandard:EMailAddress=foo*))(|(dsAttrTypeStandard:RealName=*bar*)(dsAttrTypeStandard:EMailAddress=bar*)))"
+        )
+
+        query = buildNestedQueryFromTokens(["foo", "bar", "baz"], OpenDirectoryService._ODFields)
+        self.assertEquals(
+            query.generate(),
+            "(&(|(dsAttrTypeStandard:RealName=*foo*)(dsAttrTypeStandard:EMailAddress=foo*))(|(dsAttrTypeStandard:RealName=*bar*)(dsAttrTypeStandard:EMailAddress=bar*))(|(dsAttrTypeStandard:RealName=*baz*)(dsAttrTypeStandard:EMailAddress=baz*)))"
+        )

Modified: CalendarServer/trunk/twistedcaldav/directory/test/test_ldapdirectory.py
===================================================================
--- CalendarServer/trunk/twistedcaldav/directory/test/test_ldapdirectory.py	2013-08-05 17:55:30 UTC (rev 11579)
+++ CalendarServer/trunk/twistedcaldav/directory/test/test_ldapdirectory.py	2013-08-06 08:44:24 UTC (rev 11580)
@@ -264,7 +264,7 @@
             ]
             for entry in entries:
                 self.assertEquals(
-                    buildFilterFromTokens(entry["mapping"], entry["tokens"]),
+                    buildFilterFromTokens(None, entry["mapping"], entry["tokens"]),
                     entry["expected"]
                 )
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20130806/2c631948/attachment-0001.html>


More information about the calendarserver-changes mailing list