[CalendarServer-changes] [9725] CalendarServer/trunk/contrib/performance

Fri Aug 17 10:50:52 PDT 2012

Revision: 9725
          http://trac.macosforge.org/projects/calendarserver/changeset/9725
Author:   cdaboo at apple.com
Date:     2012-08-17 10:50:52 -0700 (Fri, 17 Aug 2012)
Log Message:
-----------
Expand options for controlling LogNormal distribution and include a Numbers spreadsheet to help visualize
what it will look like. Make sure calendar data read/writes call .close() on the fd.

Modified Paths:
--------------
    CalendarServer/trunk/contrib/performance/loadtest/config.dist.plist
    CalendarServer/trunk/contrib/performance/loadtest/config.plist
    CalendarServer/trunk/contrib/performance/loadtest/ical.py
    CalendarServer/trunk/contrib/performance/stats.py
    CalendarServer/trunk/contrib/performance/test_stats.py

Added Paths:
-----------
    CalendarServer/trunk/contrib/performance/LogNormalVisualization.numbers

Added: CalendarServer/trunk/contrib/performance/LogNormalVisualization.numbers
===================================================================
(Binary files differ)


Property changes on: CalendarServer/trunk/contrib/performance/LogNormalVisualization.numbers
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Modified: CalendarServer/trunk/contrib/performance/loadtest/config.dist.plist
===================================================================

--- CalendarServer/trunk/contrib/performance/loadtest/config.dist.plist	2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/loadtest/config.dist.plist	2012-08-17 17:50:52 UTC (rev 9725)
@@ -371,30 +371,11 @@
 							<!-- Define the distribution of how many attendees will be invited to an event.
 							
 								LogNormal is the best fit to observed data.
-							    
-							    Here is a formula for calculating mu and sigma based on average number
-							    of attendees A, with the peak at 1:
-							    
-							    mu = 2/3 * ln(A)
-							    sigma = sqrt(mu)
-							    
-							    Some useful values:
-							    
-							    Attendees		mu		sigma
-							    
-							     1.5			0.25	0.50
-							     2.0			0.46	0.68
-								 3.0			0.73	0.86
-							     4.0			0.92	0.96
-							     5.0			1.07	1.04
-							     6.0			1.19	1.09
-							     7.0			1.30	1.14
-							     8.0			1.39	1.18
-							     9.0			1.46	1.21
-							    10.0			1.54	1.24
-							    15.0			1.81	1.34
-							    20.0			2.00	1.41
 
+
+								For LogNormal "mode" is the peak, "mean" is the mean value.	For invites,
+								mode should typically be 1, and mean whatever matches the user behavior.
+								Our typical mean is 6. 							
 							     -->
 							<key>inviteeCountDistribution</key>
 							<dict>
@@ -402,12 +383,12 @@
 								<string>contrib.performance.stats.LogNormalDistribution</string>
 								<key>params</key>
 								<dict>
-									<!-- mean -->
-									<key>mu</key>
-									<real>1.19</real>
-									<!-- standard deviation -->
-									<key>sigma</key>
-									<real>1.09</real>
+									<!-- mode - peak-->
+									<key>mode</key>
+									<integer>1</integer>
+									<!-- mean - average-->
+									<key>median</key>
+									<integer>6</integer>
 									<!-- maximum -->
 									<key>maximum</key>
 									<real>100</real>
@@ -513,20 +494,24 @@
 							<key>enabled</key>
 							<true/>
 
-							<!-- Define how long to wait after seeing a new invitation before 
-								accepting it. -->
+							<!-- Define how long to wait after seeing a new invitation before
+								accepting it.
+
+								For LogNormal "mode" is the peak, "median" is the 50% cummulative value
+								(i.e., half of the user have accepted by that time).								
+							-->
 							<key>acceptDelayDistribution</key>
 							<dict>
 								<key>type</key>
-								<string>contrib.performance.stats.NormalDistribution</string>
+								<string>contrib.performance.stats.LogNormalDistribution</string>
 								<key>params</key>
 								<dict>
-									<!-- mean -->
-									<key>mu</key>
-									<integer>360</integer>
-									<!-- standard deviation -->
-									<key>sigma</key>
-									<integer>60</integer>
+									<!-- mode - peak-->
+									<key>mode</key>
+									<integer>300</integer>
+									<!-- median - 50% done-->
+									<key>median</key>
+									<integer>1800</integer>
 								</dict>
 							</dict>
 						</dict>

Modified: CalendarServer/trunk/contrib/performance/loadtest/config.plist
===================================================================
--- CalendarServer/trunk/contrib/performance/loadtest/config.plist	2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/loadtest/config.plist	2012-08-17 17:50:52 UTC (rev 9725)
@@ -365,30 +365,11 @@
 							<!-- Define the distribution of how many attendees will be invited to an event.
 							
 								LogNormal is the best fit to observed data.
-							    
-							    Here is a formula for calculating mu and sigma based on average number
-							    of attendees A, with the peak at 1:
-							    
-							    mu = 2/3 * ln(A)
-							    sigma = sqrt(mu)
-							    
-							    Some useful values:
-							    
-							    Attendees		mu		sigma
-							    
-							     1.5			0.25	0.50
-							     2.0			0.46	0.68
-								 3.0			0.73	0.86
-							     4.0			0.92	0.96
-							     5.0			1.07	1.04
-							     6.0			1.19	1.09
-							     7.0			1.30	1.14
-							     8.0			1.39	1.18
-							     9.0			1.46	1.21
-							    10.0			1.54	1.24
-							    15.0			1.81	1.34
-							    20.0			2.00	1.41
 
+
+								For LogNormal "mode" is the peak, "mean" is the mean value.	For invites,
+								mode should typically be 1, and mean whatever matches the user behavior.
+								Our typical mean is 6. 							
 							     -->
 							<key>inviteeCountDistribution</key>
 							<dict>
@@ -396,12 +377,12 @@
 								<string>contrib.performance.stats.LogNormalDistribution</string>
 								<key>params</key>
 								<dict>
-									<!-- mean -->
-									<key>mu</key>
-									<real>1.19</real>
-									<!-- standard deviation -->
-									<key>sigma</key>
-									<real>1.09</real>
+									<!-- mode - peak-->
+									<key>mode</key>
+									<integer>1</integer>
+									<!-- mean - average-->
+									<key>median</key>
+									<integer>6</integer>
 									<!-- maximum -->
 									<key>maximum</key>
 									<real>100</real>
@@ -508,19 +489,23 @@
 							<true/>
 
 							<!-- Define how long to wait after seeing a new invitation before
-								accepting it. -->
+								accepting it.
+
+								For LogNormal "mode" is the peak, "median" is the 50% cummulative value
+								(i.e., half of the user have accepted by that time).								
+							-->
 							<key>acceptDelayDistribution</key>
 							<dict>
 								<key>type</key>
-								<string>contrib.performance.stats.NormalDistribution</string>
+								<string>contrib.performance.stats.LogNormalDistribution</string>
 								<key>params</key>
 								<dict>
-									<!-- mean -->
-									<key>mu</key>
-									<integer>60</integer>
-									<!-- standard deviation -->
-									<key>sigma</key>
-									<integer>60</integer>
+									<!-- mode - peak-->
+									<key>mode</key>
+									<integer>300</integer>
+									<!-- median - 50% done-->
+									<key>median</key>
+									<integer>1800</integer>
 								</dict>
 							</dict>
 						</dict>

Modified: CalendarServer/trunk/contrib/performance/loadtest/ical.py
===================================================================
--- CalendarServer/trunk/contrib/performance/loadtest/ical.py	2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/loadtest/ical.py	2012-08-17 17:50:52 UTC (rev 9725)
@@ -159,7 +159,13 @@
         Data always read from disk - never cached in the object.
         """
         path = self.serializePath()
-        return Component.fromString(open(path).read()) if path and os.path.exists(path) else None
+        if path and os.path.exists(path):
+            f = open(path)
+            comp = Component.fromString(f.read())
+            f.close()
+            return comp
+        else:
+            return None
 
 
     @component.setter
@@ -172,7 +178,9 @@
             if component is None:
                 os.remove(path)
             else:
-                open(path, "w").write(str(component))
+                f = open(path, "w")
+                f.write(str(component))
+                f.close()
         self.uid = component.resourceUID() if component is not None else None
 
 

Modified: CalendarServer/trunk/contrib/performance/stats.py
===================================================================
--- CalendarServer/trunk/contrib/performance/stats.py	2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/stats.py	2012-08-17 17:50:52 UTC (rev 9725)
@@ -25,6 +25,7 @@
 from pycalendar.duration import PyCalendarDuration
 from pycalendar.timezone import PyCalendarTimezone
 from pycalendar.property import PyCalendarProperty
+from math import log, sqrt
 
 NANO = 1000000000.0
 
@@ -261,17 +262,40 @@
 
     compareAttributes = ['_mu', '_sigma', '_maximum']
 
-    def __init__(self, mu, sigma, maximum=None):
+    def __init__(self, mu=None, sigma=None, mean=None, mode=None, median=None, maximum=None):
+        
+        if mu is not None and sigma is not None:
+            scale = 1.0
+        elif not (mu is None and sigma is None):
+            raise ValueError("mu and sigma must both be defined or both not defined")
+        elif mode is None:
+            raise ValueError("When mu and sigma are not defined, mode must be defined")
+        elif median is not None:
+            scale = mode
+            median /= mode
+            mode = 1.0
+            mu = log(median)
+            sigma = sqrt(log(median) - log(mode))
+        elif mean is not None:
+            scale = mode
+            mean /= mode
+            mode = 1.0
+            mu = log(mean) + log(mode) / 2.0
+            sigma = sqrt(log(mean) - log(mode) / 2.0)
+        else:
+            raise ValueError("When using mode one of median or mean must be defined")
+               
         self._mu = mu
         self._sigma = sigma
+        self._scale = scale
         self._maximum = maximum
 
 
     def sample(self):
-        result = random.lognormvariate(self._mu, self._sigma)
+        result = self._scale * random.lognormvariate(self._mu, self._sigma)
         if self._maximum is not None and result > self._maximum:
             for _ignore in range(10):
-                result = random.lognormvariate(self._mu, self._sigma)
+                result = self._scale * random.lognormvariate(self._mu, self._sigma)
                 if result <= self._maximum:
                     break
             else:

Modified: CalendarServer/trunk/contrib/performance/test_stats.py
===================================================================
--- CalendarServer/trunk/contrib/performance/test_stats.py	2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/test_stats.py	2012-08-17 17:50:52 UTC (rev 9725)
@@ -49,14 +49,33 @@
 
 class DistributionTests(TestCase):
     def test_lognormal(self):
-        dist = LogNormalDistribution(1, 1)
+        dist = LogNormalDistribution(mu=1, sigma=1)
         for _ignore_i in range(100):
             value = dist.sample()
             self.assertIsInstance(value, float)
             self.assertTrue(value >= 0.0, "negative value %r" % (value,))
             self.assertTrue(value <= 1000, "implausibly high value %r" % (value,))
 
+        dist = LogNormalDistribution(mode=1, median=2)
+        for _ignore_i in range(100):
+            value = dist.sample()
+            self.assertIsInstance(value, float)
+            self.assertTrue(value >= 0.0, "negative value %r" % (value,))
+            self.assertTrue(value <= 1000, "implausibly high value %r" % (value,))
 
+        dist = LogNormalDistribution(mode=1, mean=2)
+        for _ignore_i in range(100):
+            value = dist.sample()
+            self.assertIsInstance(value, float)
+            self.assertTrue(value >= 0.0, "negative value %r" % (value,))
+            self.assertTrue(value <= 1000, "implausibly high value %r" % (value,))
+
+        self.assertRaises(ValueError, LogNormalDistribution, mu=1)
+        self.assertRaises(ValueError, LogNormalDistribution, sigma=1)
+        self.assertRaises(ValueError, LogNormalDistribution, mode=1)
+        self.assertRaises(ValueError, LogNormalDistribution, mean=1)
+        self.assertRaises(ValueError, LogNormalDistribution, median=1)
+        
     def test_uniformdiscrete(self):
         population = [1, 5, 6, 9]
         counts = dict.fromkeys(population, 0)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20120817/9a93a501/attachment-0001.html>