[CalendarServer-changes] [12845] CalendarServer/trunk

source_changes at macosforge.org source_changes at macosforge.org
Fri Mar 7 12:48:43 PST 2014


Revision: 12845
          http://trac.calendarserver.org//changeset/12845
Author:   cdaboo at apple.com
Date:     2014-03-07 12:48:43 -0800 (Fri, 07 Mar 2014)
Log Message:
-----------
Fix master-child slot counting issues.

Modified Paths:
--------------
    CalendarServer/trunk/calendarserver/tap/caldav.py
    CalendarServer/trunk/calendarserver/tap/test/test_caldav.py
    CalendarServer/trunk/txweb2/metafd.py
    CalendarServer/trunk/txweb2/test/test_metafd.py

Modified: CalendarServer/trunk/calendarserver/tap/caldav.py
===================================================================
--- CalendarServer/trunk/calendarserver/tap/caldav.py	2014-03-07 20:48:27 UTC (rev 12844)
+++ CalendarServer/trunk/calendarserver/tap/caldav.py	2014-03-07 20:48:43 UTC (rev 12845)
@@ -536,7 +536,9 @@
     def startService(self):
         for slaveNumber in xrange(0, config.MultiProcess.ProcessCount):
             if config.UseMetaFD:
-                extraArgs = dict(metaSocket=self.dispatcher.addSocket())
+                extraArgs = dict(
+                    metaSocket=self.dispatcher.addSocket(slaveNumber)
+                )
             else:
                 extraArgs = dict(inheritFDs=self.inheritFDs,
                                  inheritSSLFDs=self.inheritSSLFDs)
@@ -2025,7 +2027,6 @@
     @ivar metaSocket: an AF_UNIX/SOCK_DGRAM socket (initialized from the
         dispatcher passed to C{__init__}) that is to be inherited by the
         subprocess and used to accept incoming connections.
-
     @type metaSocket: L{socket.socket}
 
     @ivar ampSQLDispenser: a factory for AF_UNIX/SOCK_STREAM sockets that are
@@ -2060,6 +2061,30 @@
         self.ampDBSocket = None
 
 
+    def starting(self):
+        """
+        Called when the process is being started (or restarted). Allows for various initialization
+        operations to be done. The child process will itself signal back to the master when it is ready
+        to accept sockets - until then the master socket is marked as "starting" which means it is not
+        active and won't be dispatched to.
+        """
+
+        # Always tell any metafd socket that we have started, so it can re-initialize state.
+        if self.metaSocket is not None:
+            self.metaSocket.start()
+
+
+    def stopped(self):
+        """
+        Called when the process has stopped (died). The socket is marked as "stopped" which means it is
+        not active and won't be dispatched to.
+        """
+
+        # Always tell any metafd socket that we have started, so it can re-initialize state.
+        if self.metaSocket is not None:
+            self.metaSocket.stop()
+
+
     def getName(self):
         return "{}-{}".format(self.prefix, self.id)
 
@@ -2088,7 +2113,7 @@
         fds = {}
         extraFDs = []
         if self.metaSocket is not None:
-            extraFDs.append(self.metaSocket.fileno())
+            extraFDs.append(self.metaSocket.childSocket().fileno())
         if self.ampSQLDispenser is not None:
             self.ampDBSocket = self.ampSQLDispenser.dispense()
             extraFDs.append(self.ampDBSocket.fileno())
@@ -2150,7 +2175,7 @@
 
         if self.metaSocket is not None:
             args.extend([
-                "-o", "MetaFD={}".format(self.metaSocket.fileno())
+                "-o", "MetaFD={}".format(self.metaSocket.childSocket().fileno())
             ])
         if self.ampDBSocket is not None:
             args.extend([
@@ -2243,6 +2268,13 @@
             exists
         """
         class SimpleProcessObject(object):
+
+            def starting(self):
+                pass
+
+            def stopped(self):
+                pass
+
             def getName(self):
                 return name
 
@@ -2334,7 +2366,10 @@
     def processEnded(self, name):
         """
         When a child process has ended it calls me so I can fire the
-        appropriate deferred which was created in stopService
+        appropriate deferred which was created in stopService.
+
+        Also make sure to signal the dispatcher so that the socket is
+        marked as inactive.
         """
         # Cancel the scheduled _forceStopProcess function if the process
         # dies naturally
@@ -2343,6 +2378,8 @@
                 self.murder[name].cancel()
             del self.murder[name]
 
+        self.processes[name][0].stopped()
+
         del self.protocols[name]
 
         if self._reactor.seconds() - self.timeStarted[name] < self.threshold:
@@ -2429,6 +2466,8 @@
 
         childFDs.update(procObj.getFileDescriptors())
 
+        procObj.starting()
+
         args = procObj.getCommandLine()
 
         self._reactor.spawnProcess(

Modified: CalendarServer/trunk/calendarserver/tap/test/test_caldav.py
===================================================================
--- CalendarServer/trunk/calendarserver/tap/test/test_caldav.py	2014-03-07 20:48:27 UTC (rev 12844)
+++ CalendarServer/trunk/calendarserver/tap/test/test_caldav.py	2014-03-07 20:48:43 UTC (rev 12845)
@@ -954,6 +954,14 @@
         self.args = args
 
 
+    def starting(self):
+        pass
+
+
+    def stopped(self):
+        pass
+
+
     def getCommandLine(self):
         """
         Simple command line.
@@ -1204,12 +1212,35 @@
 
 
 
+class FakeSubsocket(object):
+
+    def __init__(self, fakefd):
+        self.fakefd = fakefd
+
+
+    def childSocket(self):
+        return self.fakefd
+
+
+    def start(self):
+        pass
+
+
+    def restarted(self):
+        pass
+
+
+    def stop(self):
+        pass
+
+
+
 class FakeDispatcher(object):
     n = 3
 
     def addSocket(self):
         self.n += 1
-        return FakeFD(self.n)
+        return FakeSubsocket(FakeFD(self.n))
 
 
 

Modified: CalendarServer/trunk/txweb2/metafd.py
===================================================================
--- CalendarServer/trunk/txweb2/metafd.py	2014-03-07 20:48:27 UTC (rev 12844)
+++ CalendarServer/trunk/txweb2/metafd.py	2014-03-07 20:48:43 UTC (rev 12845)
@@ -21,12 +21,11 @@
 """
 from __future__ import print_function
 
-from functools import total_ordering
-
 from zope.interface import implementer
 
 from twext.internet.sendfdport import (
-    InheritedPort, InheritedSocketDispatcher, InheritingProtocolFactory)
+    InheritedPort, InheritedSocketDispatcher, InheritingProtocolFactory,
+    IStatus)
 from twext.internet.tcp import MaxAcceptTCPServer
 from twext.python.log import Logger
 from txweb2.channel.http import HTTPFactory
@@ -164,17 +163,26 @@
 
 
 
- at total_ordering
+ at implementer(IStatus)
 class WorkerStatus(FancyStrMixin, object):
     """
     The status of a worker process.
     """
 
-    showAttributes = ("acknowledged unacknowledged started abandoned unclosed"
+    showAttributes = ("acknowledged unacknowledged total started abandoned unclosed starting stopped"
                       .split())
 
-    def __init__(self, acknowledged=0, unacknowledged=0, started=0,
-                 abandoned=0, unclosed=0):
+    def __init__(
+        self,
+        acknowledged=0,
+        unacknowledged=0,
+        total=0,
+        started=0,
+        abandoned=0,
+        unclosed=0,
+        starting=1,
+        stopped=0
+    ):
         """
         Create a L{ConnectionStatus} with a number of sent connections and a
         number of un-acknowledged connections.
@@ -187,20 +195,32 @@
             the subprocess which have never received a status response (a
             "C{+}" status message).
 
+        @param total: The total number of acknowledged connections over
+            the lifetime of this socket.
+
+        @param started: The number of times this worker has been started.
+
         @param abandoned: The number of connections which have been sent to
             this worker, but were not acknowledged at the moment that the
-            worker restarted.
+            worker was stopped.
 
-        @param started: The number of times this worker has been started.
-
         @param unclosed: The number of sockets which have been sent to the
             subprocess but not yet closed.
+
+        @param starting: The process that owns this socket is starting. Do not
+            dispatch to it until we receive the started message.
+
+        @param stopped: The process that owns this socket has stopped. Do not
+            dispatch to it.
         """
         self.acknowledged = acknowledged
         self.unacknowledged = unacknowledged
+        self.total = total
         self.started = started
         self.abandoned = abandoned
         self.unclosed = unclosed
+        self.starting = starting
+        self.stopped = stopped
 
 
     def effective(self):
@@ -210,43 +230,67 @@
         return self.acknowledged + self.unacknowledged
 
 
-    def restarted(self):
+    def active(self):
         """
-        The L{WorkerStatus} derived from the current status of a process and
-        the fact that it just restarted.
+        Is the subprocess associated with this socket available to dispatch to.
+        i.e, this socket is neither stopped nor starting
         """
-        return self.__class__(0, 0, self.started + 1, self.unacknowledged)
+        return self.starting == 0 and self.stopped == 0
 
 
-    def _tuplify(self):
-        return tuple(getattr(self, attr) for attr in self.showAttributes)
+    def start(self):
+        """
+        The child process for this L{WorkerStatus} is about to (re)start. Reset the status to indicate it
+        is starting - that should prevent any new connections being dispatched.
+        """
+        return self.reset(
+            starting=1,
+            stopped=0,
+        )
 
 
-    def __lt__(self, other):
-        if not isinstance(other, WorkerStatus):
-            return NotImplemented
-        return self.effective() < other.effective()
+    def restarted(self):
+        """
+        The child process for this L{WorkerStatus} has indicated it is now available to accept
+        connections, so reset the starting status so this socket will be available for dispatch.
+        """
+        return self.reset(
+            started=self.started + 1,
+            starting=0,
+        )
 
 
-    def __eq__(self, other):
-        if not isinstance(other, WorkerStatus):
-            return NotImplemented
-        return self._tuplify() == other._tuplify()
+    def stop(self):
+        """
+        The child process for this L{WorkerStatus} has stopped. Stop the socket and clear out
+        existing counters, but track abandoned connections.
+        """
+        return self.reset(
+            acknowledged=0,
+            unacknowledged=0,
+            abandoned=self.abandoned + self.unacknowledged,
+            starting=0,
+            stopped=1,
+        )
 
 
-    def __add__(self, other):
-        if not isinstance(other, WorkerStatus):
-            return NotImplemented
-        a = self._tuplify()
-        b = other._tuplify()
-        c = [a1 + b1 for (a1, b1) in zip(a, b)]
-        return self.__class__(*c)
+    def adjust(self, **kwargs):
+        """
+        Update the L{WorkerStatus} by adding the supplied values to the specified attributes.
+        """
+        for k, v in kwargs.items():
+            newval = getattr(self, k) + v
+            setattr(self, k, max(newval, 0))
+        return self
 
 
-    def __sub__(self, other):
-        if not isinstance(other, WorkerStatus):
-            return NotImplemented
-        return self + self.__class__(*[-x for x in other._tuplify()])
+    def reset(self, **kwargs):
+        """
+        Reset the L{WorkerStatus} by setting the supplied values in the specified attributes.
+        """
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+        return self
 
 
 
@@ -272,6 +316,7 @@
         self.dispatcher = InheritedSocketDispatcher(self)
         self.maxAccepts = maxAccepts
         self.maxRequests = maxRequests
+        self.overloaded = False
 
 
     def startService(self):
@@ -314,20 +359,20 @@
             # A connection has gone away in a subprocess; we should start
             # accepting connections again if we paused (see
             # newConnectionStatus)
-            return previousStatus - WorkerStatus(acknowledged=1)
+            return previousStatus.adjust(acknowledged=-1)
+
         elif message == '0':
-            # A new process just started accepting new connections.  It might
-            # still have some unacknowledged connections, but any connections
-            # that it acknowledged working on are now completed.  (We have no
-            # way of knowing whether the acknowledged connections were acted
-            # upon or dropped, so we have to treat that number with a healthy
-            # amount of skepticism.)
+            # A new process just started accepting new connections.
             return previousStatus.restarted()
+
         else:
             # '+' acknowledges that the subprocess has taken on the work.
-            return previousStatus + WorkerStatus(acknowledged=1,
-                                                 unacknowledged=-1,
-                                                 unclosed=1)
+            return previousStatus.adjust(
+                acknowledged=1,
+                unacknowledged=-1,
+                total=1,
+                unclosed=1,
+            )
 
 
     def closeCountFromStatus(self, status):
@@ -335,21 +380,22 @@
         Determine the number of sockets to close from the current status.
         """
         toClose = status.unclosed
-        return (toClose, status - WorkerStatus(unclosed=toClose))
+        return (toClose, status.adjust(unclosed=-toClose))
 
 
     def newConnectionStatus(self, previousStatus):
         """
-        Determine the effect of a new connection being sent on a subprocess
-        socket.
+        A connection was just sent to the process, but not yet acknowledged.
         """
-        return previousStatus + WorkerStatus(unacknowledged=1)
+        return previousStatus.adjust(unacknowledged=1)
 
 
     def statusesChanged(self, statuses):
         """
         The L{InheritedSocketDispatcher} is reporting that the list of
-        connection-statuses have changed.
+        connection-statuses have changed. Check to see if we are overloaded
+        or if there are no active processes left. If so, stop the protocol
+        factory from processing more requests until capacity is back.
 
         (The argument to this function is currently duplicated by the
         C{self.dispatcher.statuses} attribute, which is what
@@ -360,8 +406,10 @@
         self._outstandingRequests = current # preserve for or= field in log
         maximum = self.maxRequests
         overloaded = (current >= maximum)
+        available = len(filter(lambda x: x.active(), self.dispatcher.statuses))
+        self.overloaded = (overloaded or available == 0)
         for f in self.factories:
-            if overloaded:
+            if self.overloaded:
                 f.loadAboveMaximum()
             else:
                 f.loadNominal()

Modified: CalendarServer/trunk/txweb2/test/test_metafd.py
===================================================================
--- CalendarServer/trunk/txweb2/test/test_metafd.py	2014-03-07 20:48:27 UTC (rev 12844)
+++ CalendarServer/trunk/txweb2/test/test_metafd.py	2014-03-07 20:48:43 UTC (rev 12845)
@@ -71,15 +71,12 @@
     def startReading(self):
         "Do nothing."
 
-
     def stopReading(self):
         "Do nothing."
 
-
     def startWriting(self):
         "Do nothing."
 
-
     def stopWriting(self):
         "Do nothing."
 
@@ -93,19 +90,15 @@
     def startReading(self):
         "Do nothing."
 
-
     def stopReading(self):
         "Do nothing."
 
-
     def startWriting(self):
         "Do nothing."
 
-
     def stopWriting(self):
         "Do nothing."
 
-
     def __init__(self, *a, **kw):
         super(ServerTransportForTesting, self).__init__(*a, **kw)
         self.reactor = None
@@ -225,12 +218,28 @@
         L{WorkerStatus.__repr__} will show all the values associated with the
         status of the worker.
         """
-        self.assertEquals(repr(WorkerStatus(1, 2, 3, 4, 5)),
-                          "<WorkerStatus acknowledged=1 unacknowledged=2 "
-                          "started=3 abandoned=4 unclosed=5>")
+        self.assertEquals(repr(WorkerStatus(1, 2, 3, 4, 5, 6, 7, 8)),
+                          "<WorkerStatus acknowledged=1 unacknowledged=2 total=3 "
+                          "started=4 abandoned=5 unclosed=6 starting=7 stopped=8>")
 
 
+    def test_workerStatusNonNegative(self):
+        """
+        L{WorkerStatus.__repr__} will show all the values associated with the
+        status of the worker.
+        """
+        w = WorkerStatus()
+        w.adjust(
+            acknowledged=1,
+            unacknowledged=-1,
+            total=1,
+        )
+        self.assertEquals(w.acknowledged, 1)
+        self.assertEquals(w.unacknowledged, 0)
+        self.assertEquals(w.total, 1)
 
+
+
 class LimiterBuilder(object):
     """
     A L{LimiterBuilder} can build a L{ConnectionLimiter} and associated objects
@@ -251,7 +260,9 @@
         self.limiter.addPortService("TCP", 4321, "127.0.0.1", 5,
                                     self.serverServiceMakerMaker(self.service))
         for ignored in xrange(socketCount):
-            self.dispatcher.addSocket()
+            subskt = self.dispatcher.addSocket()
+            subskt.start()
+            subskt.restarted()
         # Has to be running in order to add stuff.
         self.limiter.startService()
         self.port = self.service.myPort
@@ -296,7 +307,7 @@
         @param count: Amount of load to add; default to the maximum that the
             limiter.
         """
-        for x in range(count or self.limiter.maxRequests):
+        for _ignore_x in range(count or self.limiter.maxRequests):
             self.dispatcher.sendFileDescriptor(None, "SSL")
             if acknowledged:
                 self.dispatcher.statusMessage(
@@ -305,6 +316,8 @@
 
 
     def processRestart(self):
+        self.dispatcher._subprocessSockets[0].stop()
+        self.dispatcher._subprocessSockets[0].start()
         self.dispatcher.statusMessage(
             self.dispatcher._subprocessSockets[0], "0"
         )
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20140307/780b064b/attachment-0001.html>


More information about the calendarserver-changes mailing list