[CalendarServer-changes] [6202] CalendarServer/trunk/contrib/performance

source_changes at macosforge.org source_changes at macosforge.org
Mon Aug 30 07:52:00 PDT 2010


Revision: 6202
          http://trac.macosforge.org/projects/calendarserver/changeset/6202
Author:   exarkun at twistedmatrix.com
Date:     2010-08-30 07:51:59 -0700 (Mon, 30 Aug 2010)
Log Message:
-----------
Run the dtrace process with sudo, as is necessary.  Add some stats methods so that we can get per-sample aggregate SQL statistics (like total time spent executing SQL).  Try to increase the robustness of the sampler script, and make it upload SQL data as well.

Modified Paths:
--------------
    CalendarServer/trunk/contrib/performance/benchlib.py
    CalendarServer/trunk/contrib/performance/benchmark.py
    CalendarServer/trunk/contrib/performance/run.sh
    CalendarServer/trunk/contrib/performance/sample.sh
    CalendarServer/trunk/contrib/performance/sql_measure.d
    CalendarServer/trunk/contrib/performance/stats.py
    CalendarServer/trunk/contrib/performance/upload.py

Modified: CalendarServer/trunk/contrib/performance/benchlib.py
===================================================================
--- CalendarServer/trunk/contrib/performance/benchlib.py	2010-08-30 02:28:22 UTC (rev 6201)
+++ CalendarServer/trunk/contrib/performance/benchlib.py	2010-08-30 14:51:59 UTC (rev 6202)
@@ -16,6 +16,7 @@
         self.root = root
         self.principal = principal
 
+
     def deleteResource(self, url):
         return self.agent.request('DELETE', 'http://%s%s' % (self.netloc, url))
 
@@ -60,17 +61,20 @@
 
 @inlineCallbacks
 def sample(dtrace, samples, agent, paramgen):
-    data = []
+    urlopen = Duration('urlopen time')
+    data = {urlopen: []}
     yield dtrace.start()
     for i in range(samples):
         before = time()
         response = yield agent.request(*paramgen())
         yield readBody(response)
         after = time()
-        data.append(after - before)
-    stats = yield dtrace.stop()
-    stats[Duration('urlopen time')] = data
-    returnValue(stats)
+        stats = yield dtrace.mark()
+        for k, v in stats.iteritems():
+            data.setdefault(k, []).append(v)
+        data[urlopen].append(after - before)
+    print 'Extra stats:', (yield dtrace.stop())
+    returnValue(data)
 
 
 def select(statistics, benchmark, parameter, statistic):

Modified: CalendarServer/trunk/contrib/performance/benchmark.py
===================================================================
--- CalendarServer/trunk/contrib/performance/benchmark.py	2010-08-30 02:28:22 UTC (rev 6201)
+++ CalendarServer/trunk/contrib/performance/benchmark.py	2010-08-30 14:51:59 UTC (rev 6202)
@@ -67,9 +67,10 @@
         if self.started is None:
             self.done.callback(None)
         else:
-            self.started.errback(RuntimeError("Exited too soon"))
+            self.started.errback(RuntimeError("Exited too soon: %r" % (self._out,)))
 
 
+
 def instancePIDs(directory):
     pids = []
     for pidfile in directory.children():
@@ -189,8 +190,9 @@
         proto = IOMeasureConsumer(started, stopped, _DTraceParser(self))
         process = reactor.spawnProcess(
             proto,
-            "/usr/sbin/dtrace",
-            ["/usr/sbin/dtrace",
+            "/usr/bin/sudo",
+            ["/usr/bin/sudo",
+             "/usr/sbin/dtrace",
              # process preprocessor macros
              "-C",
              # search for include targets in the source directory containing this file

Modified: CalendarServer/trunk/contrib/performance/run.sh
===================================================================
--- CalendarServer/trunk/contrib/performance/run.sh	2010-08-30 02:28:22 UTC (rev 6201)
+++ CalendarServer/trunk/contrib/performance/run.sh	2010-08-30 14:51:59 UTC (rev 6202)
@@ -1,2 +1,2 @@
 #!/bin/bash
-PYTHONPATH=$PYTHONPATH:../../../vobject/:../../../../CalDAVClientLibrary/trunk/src:../../../Twisted/ "$@"
+PYTHONPATH=$PYTHONPATH:~/Projects/CalendarServer/Twisted "$@"

Modified: CalendarServer/trunk/contrib/performance/sample.sh
===================================================================
--- CalendarServer/trunk/contrib/performance/sample.sh	2010-08-30 02:28:22 UTC (rev 6201)
+++ CalendarServer/trunk/contrib/performance/sample.sh	2010-08-30 14:51:59 UTC (rev 6202)
@@ -6,13 +6,16 @@
 
 SOURCE=~/Projects/CalendarServer/trunk
 BENCHMARKS="vfreebusy event"
+STATISTICS=("urlopen time" execute)
 ADDURL=http://localhost:8000/result/add/
 export PYTHONPATH=$PYTHONPATH:$SOURCE/../Twisted
 
 REV=$1
-RESULTS=$2
+LOGS=$2
+RESULTS=$3
 
 pushd $SOURCE
+svn st --no-ignore | grep '^[?I]' | cut -c9- | xargs rm -r
 svn up -r$REV .
 python setup.py build_ext -i
 popd
@@ -21,19 +24,24 @@
   ./setbackend $SOURCE/conf/caldavd-test.plist $backend > $SOURCE/conf/caldavd-dev.plist
   pushd $SOURCE
   ./run -k || true
-  sleep 5
+  while [ -e ./data/Logs/caldavd.pid ]; do
+    echo "Waiting for server to exit..."
+    sleep 1
+  done
   rm -rf data/
   ./run -d -n
+  sleep 5
   popd
-  sleep 5
-  ./benchmark --label r$REV-$backend $BENCHMARKS
+  ./benchmark --label r$REV-$backend --log-directory $LOGS $BENCHMARKS
   data=`echo -n r$REV-$backend*`
   for p in 1 9 81; do
     for b in $BENCHMARKS; do
-      ./upload \
-          --url $ADDURL --revision $REV \
-          --revision-date "`./svn-committime $SOURCE`" --environment nmosbuilder \
-          --backend $backend --statistic "$data,$b,$p,urlopen time"
+      for stat in "${STATISTICS[@]}"; do
+        ./upload \
+            --url $ADDURL --revision $REV \
+            --revision-date "`./svn-committime $SOURCE`" --environment nmosbuilder \
+            --backend $backend --statistic "$data,$b,$p,$stat"
+      done
     done
   done
 

Modified: CalendarServer/trunk/contrib/performance/sql_measure.d
===================================================================
--- CalendarServer/trunk/contrib/performance/sql_measure.d	2010-08-30 02:28:22 UTC (rev 6201)
+++ CalendarServer/trunk/contrib/performance/sql_measure.d	2010-08-30 14:51:59 UTC (rev 6202)
@@ -1,8 +1,8 @@
 
 /*
- * Make almost all query strings fit.
+ * Make almost all query strings fit.  Please don't have SQL longer than this. :(
  */
-#pragma D option strsize=8192
+#pragma D option strsize=32768
 
 dtrace:::BEGIN
 {

Modified: CalendarServer/trunk/contrib/performance/stats.py
===================================================================
--- CalendarServer/trunk/contrib/performance/stats.py	2010-08-30 02:28:22 UTC (rev 6201)
+++ CalendarServer/trunk/contrib/performance/stats.py	2010-08-30 14:51:59 UTC (rev 6202)
@@ -38,6 +38,24 @@
         self.name = name
 
 
+    def __eq__(self, other):
+        if isinstance(other, _Statistic):
+            return self.name == other.name
+        return NotImplemented
+
+
+    def __hash__(self):
+        return hash((self.__class__, self.name))
+
+
+    def squash(self, samples):
+        """
+        Normalize the sample data into float values (one per sample)
+        in seconds (I hope time is the only thing you measure).
+        """
+        return samples
+
+
     def summarize(self, data):
         print self.name, 'mean', mean(data)
         print self.name, 'median', median(data)
@@ -91,23 +109,35 @@
         return sqlparse.format(statement.to_unicode().encode('ascii'))
 
 
-    def summarize(self, data):
+    def squash(self, samples):
+        times = []
+        for data in samples:
+            times.append(
+                sum([interval for (sql, interval) in data]) / NANO)
+        return times
+
+
+    def summarize(self, samples):
+        times = []
         statements = {}
-        intervals = []
-        for (sql, interval) in data:
-            sql = self.normalize(sql)
-            intervals.append(interval)
-            statements[sql] = statements.get(sql, 0) + 1
+        for data in samples:
+            total = 0
+            for (sql, interval) in data:
+                sql = self.normalize(sql)
+                statements[sql] = statements.get(sql, 0) + 1
+                total += interval
+            times.append(total / NANO * 1000)
         for statement, count in statements.iteritems():
             print count, ':', statement
-        return _Statistic.summarize(self, intervals)
+        return _Statistic.summarize(self, times)
 
 
-    def statements(self, data):
+    def statements(self, samples):
         statements = {}
-        for (sql, interval) in data:
-            sql = self.normalize(sql)
-            statements.setdefault(sql, []).append(interval)
+        for data in samples:
+            for (sql, interval) in data:
+                sql = self.normalize(sql)
+                statements.setdefault(sql, []).append(interval)
         
         byTime = []
         for statement, times in statements.iteritems():

Modified: CalendarServer/trunk/contrib/performance/upload.py
===================================================================
--- CalendarServer/trunk/contrib/performance/upload.py	2010-08-30 02:28:22 UTC (rev 6201)
+++ CalendarServer/trunk/contrib/performance/upload.py	2010-08-30 14:51:59 UTC (rev 6202)
@@ -55,6 +55,7 @@
         'max': str(max_value),
         'min': str(min_value),
         }
+    print 'uploading', data
     agent = Agent(reactor)
     d = agent.request('POST', url, None, StringProducer(urlencode(data)))
     def check(response):
@@ -80,6 +81,7 @@
     fname, benchmark, param, statistic = options['statistic'].split(',')
     stat, samples = select(
         pickle.load(file(fname)), benchmark, param, statistic)
+    samples = stat.squash(samples)
 
     d = upload(
         reactor,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20100830/078b4c3a/attachment-0001.html>


More information about the calendarserver-changes mailing list