[CalendarServer-changes] [6629] CalendarServer/trunk/contrib/performance

source_changes at macosforge.org source_changes at macosforge.org
Fri Nov 12 14:07:04 PST 2010


Revision: 6629
          http://trac.macosforge.org/projects/calendarserver/changeset/6629
Author:   exarkun at twistedmatrix.com
Date:     2010-11-12 14:07:01 -0800 (Fri, 12 Nov 2010)
Log Message:
-----------
Enhance benchmark tracing to capture reads and writes done by the postgres processes as well.

This involves running another dtrace process that looks for just these extra reads and writes and integrating its
results with the rest.  Also added is a bunch of logging calls to make tracking the behavior of the benchmark
tool easier.

Modified Paths:
--------------
    CalendarServer/trunk/contrib/performance/benchlib.py
    CalendarServer/trunk/contrib/performance/benchmark.py
    CalendarServer/trunk/contrib/performance/display-calendar-events.py

Added Paths:
-----------
    CalendarServer/trunk/contrib/performance/pgsql.d

Modified: CalendarServer/trunk/contrib/performance/benchlib.py
===================================================================
--- CalendarServer/trunk/contrib/performance/benchlib.py	2010-11-12 21:22:30 UTC (rev 6628)
+++ CalendarServer/trunk/contrib/performance/benchlib.py	2010-11-12 22:07:01 UTC (rev 6629)
@@ -2,7 +2,10 @@
 from time import time
 
 from twisted.internet.defer import DeferredSemaphore, inlineCallbacks, returnValue, gatherResults
+from twisted.internet.task import deferLater
 from twisted.web.http_headers import Headers
+from twisted.internet import reactor
+from twisted.python.log import msg
 
 from stats import Duration
 from httpclient import StringProducer, readBody
@@ -68,14 +71,32 @@
     data = {urlopen: []}
 
     def once():
+        msg('emitting request')
         before = time()
         d = agent.request(*paramgen())
         def cbResponse(response):
             d = readBody(response)
             def cbBody(ignored):
                 after = time()
+                msg('response received')
+
+                # Give things a moment to settle down.  This is a hack
+                # to try to collect the last of the dtrace output
+                # which may still be sitting in the write buffer of
+                # the dtrace process.  It would be nice if there were
+                # a more reliable way to know when we had it all, but
+                # no luck on that front so far.  The implementation of
+                # mark is supposed to take care of that, but the
+                # assumption it makes about ordering of events appears
+                # to be invalid.
+
+                # XXX Disabled until I get a chance to seriously
+                # measure what affect, if any, it has.
+                # d = deferLater(reactor, 0.5, dtrace.mark)
                 d = dtrace.mark()
+
                 def cbStats(stats):
+                    msg('stats collected')
                     for k, v in stats.iteritems():
                         data.setdefault(k, []).append(v)
                     data[urlopen].append(after - before)
@@ -86,13 +107,17 @@
         d.addCallback(cbResponse)
         return d
 
+    msg('starting dtrace')
     yield dtrace.start()
+    msg('dtrace started')
     l = []
     for i in range(samples):
         l.append(sem.run(once))
     yield gatherResults(l)
 
+    msg('stopping dtrace')
     leftOver = yield dtrace.stop()
+    msg('dtrace stopped')
     for (k, v) in leftOver.items():
         if v:
             print 'Extra', k, ':', v

Modified: CalendarServer/trunk/contrib/performance/benchmark.py
===================================================================
--- CalendarServer/trunk/contrib/performance/benchmark.py	2010-11-12 21:22:30 UTC (rev 6628)
+++ CalendarServer/trunk/contrib/performance/benchmark.py	2010-11-12 22:07:01 UTC (rev 6629)
@@ -14,6 +14,7 @@
 from twisted.internet.defer import (
     Deferred, inlineCallbacks, gatherResults)
 from twisted.internet import reactor
+from twisted.python.log import msg
 
 from stats import SQLDuration, Bytes
 
@@ -119,7 +120,7 @@
         try:
             which, when = rest.split(None, 1)
         except ValueError:
-            print 'Bad EXECUTE line: %r' % (rest,)
+            msg('Bad EXECUTE line: %r' % (rest,))
             return
 
         if which == 'SQL':
@@ -129,17 +130,17 @@
         when = int(when)
         if which == 'ENTRY':
             if self.start is not None:
-                print 'entry without return at', when, 'in', cmd
+                msg('entry without return at %s in %s' % (when, cmd))
             self.start = when
         elif which == 'RETURN':
             if self.start is None:
-                print 'return without entry at', when, 'in', cmd
+                msg('return without entry at %s in %s' % (when, cmd))
             elif self.sql is None:
-                print 'return without SQL at', when, 'in', cmd
+                msg('return without SQL at %s in %s' % (when, cmd))
             else:
                 diff = when - self.start
                 if diff < 0:
-                    print 'Completely bogus EXECUTE', self.start, when
+                    msg('Completely bogus EXECUTE %s %s' % (self.start, when))
                 else:
                     if cmd == 'EXECUTE':
                         accum = self.collector._execute
@@ -201,14 +202,40 @@
         ready = []
         self.finished = []
         self.dtraces = {}
+
+        # Trace each child process specifically.  Necessary because of
+        # the way SQL execution is measured, which requires the
+        # $target dtrace variable (which can only take on a single
+        # value).
         for p in self.pids:
-            started, stopped = self._startDTrace(p)
+            started, stopped = self._startDTrace(self._dScript, p)
             ready.append(started)
             self.finished.append(stopped)
+
+        # Also trace postgres i/o operations.  This involves no
+        # target, because the dtrace code just looks for processes
+        # named "postgres".
+        started, stopped = self._startDTrace("pgsql.d", None)
+        ready.append(started)
+        self.finished.append(stopped)
+
         return gatherResults(ready)
 
 
-    def _startDTrace(self, pid):
+    def _startDTrace(self, script, pid):
+        """
+        Launch a dtrace process.
+
+        @param script: A C{str} giving the path to the dtrace program
+            to run.
+
+        @param pid: A C{int} to target dtrace at a particular process,
+            or C{None} not to.
+
+        @return: A two-tuple of L{Deferred}s.  The first will fire
+            when the dtrace process is ready to go, the second will
+            fire when it exits.
+        """
         started = Deferred()
         stopped = Deferred()
         proto = IOMeasureConsumer(started, stopped, _DTraceParser(self))
@@ -220,20 +247,23 @@
             "-I", dirname(__file__),
             # suppress most implicitly generated output (which would mess up our parser)
             "-q",
+            # load this script
+            "-s", script]
+        if pid is not None:
             # make this pid the target
-            "-p", str(pid),
-            # load this script
-            "-s", self._dScript]
+            command.extend(["-p", str(pid)])
+
         process = reactor.spawnProcess(proto, command[0], command)
         def eintr(reason):
             reason.trap(DTraceBug)
-            print 'Dtrace startup failed (', reason.getErrorMessage().strip(), '), retrying.'
-            return self._startDTrace(pid)
+            msg('Dtrace startup failed (%s), retrying.' % (reason.getErrorMessage().strip(),))
+            return self._startDTrace(script, pid)
         def ready(passthrough):
             # Once the dtrace process is ready, save the state and
             # have the stopped Deferred deal with the results.  We
             # don't want to do either of these for failed dtrace
             # processes.
+            msg("dtrace tracking pid=%s" % (pid,))
             self.dtraces[pid] = (process, proto)
             stopped.addCallback(self._cleanup, pid)
             return passthrough
@@ -286,7 +316,7 @@
     for (name, measure) in benchmarks:
         statistics[name] = {}
         for p in parameters:
-            print 'Parameter at', p
+            print '%s, parameter=%s' % (name, p)
             dtrace = DTraceCollector("io_measure.d", pids)
             data = yield measure(host, port, dtrace, p, samples)
             statistics[name][p] = data
@@ -329,7 +359,7 @@
 
 
 def main():
-    from twisted.python.log import err
+    from twisted.python.log import startLogging, err
 
     options = BenchmarkOptions()
     try:
@@ -342,6 +372,8 @@
         from twisted.python.failure import startDebugMode
         startDebugMode()
 
+    startLogging(file('benchmark.log', 'a'), False)
+
     d = benchmark(
         options['host'], options['port'],
         options['log-directory'], options['label'],

Modified: CalendarServer/trunk/contrib/performance/display-calendar-events.py
===================================================================
--- CalendarServer/trunk/contrib/performance/display-calendar-events.py	2010-11-12 21:22:30 UTC (rev 6628)
+++ CalendarServer/trunk/contrib/performance/display-calendar-events.py	2010-11-12 22:07:01 UTC (rev 6629)
@@ -3,6 +3,9 @@
 
 store = CalCalendarStore.defaultCalendarStore()
 calendars = store.calendars()
+print calendars
+raise SystemExit
+
 predicate = CalCalendarStore.eventPredicateWithStartDate_endDate_calendars_(
     NSDate.date(), NSDate.distantFuture(),
     [calendars[2]])

Added: CalendarServer/trunk/contrib/performance/pgsql.d
===================================================================
--- CalendarServer/trunk/contrib/performance/pgsql.d	                        (rev 0)
+++ CalendarServer/trunk/contrib/performance/pgsql.d	2010-11-12 22:07:01 UTC (rev 6629)
@@ -0,0 +1,34 @@
+#define READ(fname) \
+syscall::fname:return, syscall::fname ## _nocancel:return \
+/execname == "postgres"/ \
+{ \
+	printf("READ %d\n\1", arg0); \
+}
+
+READ(read)
+READ(readv)
+READ(pread)
+
+#define WRITE(fname) \
+syscall::fname:entry, syscall::fname ## _nocancel:entry \
+/execname == "postgres"/ \
+{ \
+	printf("WRITE %d\n\1", arg2); \
+}
+
+WRITE(write)
+WRITE(writev)
+WRITE(pwrite)
+
+dtrace:::BEGIN
+{
+	/* Let the watcher know things are alright.
+	 */
+	printf("READY\n");
+}
+
+syscall::execve:entry
+/copyinstr(arg0) == "CalendarServer dtrace benchmarking signal"/
+{
+	printf("MARK x\n\1");
+}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20101112/94efbd53/attachment-0001.html>


More information about the calendarserver-changes mailing list