[CalendarServer-changes] [9725] CalendarServer/trunk/contrib/performance
source_changes at macosforge.org
source_changes at macosforge.org
Fri Aug 17 10:50:52 PDT 2012
Revision: 9725
http://trac.macosforge.org/projects/calendarserver/changeset/9725
Author: cdaboo at apple.com
Date: 2012-08-17 10:50:52 -0700 (Fri, 17 Aug 2012)
Log Message:
-----------
Expand options for controlling LogNormal distribution and include a Numbers spreadsheet to help visualize
what it will look like. Make sure calendar data read/writes call .close() on the fd.
Modified Paths:
--------------
CalendarServer/trunk/contrib/performance/loadtest/config.dist.plist
CalendarServer/trunk/contrib/performance/loadtest/config.plist
CalendarServer/trunk/contrib/performance/loadtest/ical.py
CalendarServer/trunk/contrib/performance/stats.py
CalendarServer/trunk/contrib/performance/test_stats.py
Added Paths:
-----------
CalendarServer/trunk/contrib/performance/LogNormalVisualization.numbers
Added: CalendarServer/trunk/contrib/performance/LogNormalVisualization.numbers
===================================================================
(Binary files differ)
Property changes on: CalendarServer/trunk/contrib/performance/LogNormalVisualization.numbers
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Modified: CalendarServer/trunk/contrib/performance/loadtest/config.dist.plist
===================================================================
--- CalendarServer/trunk/contrib/performance/loadtest/config.dist.plist 2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/loadtest/config.dist.plist 2012-08-17 17:50:52 UTC (rev 9725)
@@ -371,30 +371,11 @@
<!-- Define the distribution of how many attendees will be invited to an event.
LogNormal is the best fit to observed data.
-
- Here is a formula for calculating mu and sigma based on average number
- of attendees A, with the peak at 1:
-
- mu = 2/3 * ln(A)
- sigma = sqrt(mu)
-
- Some useful values:
-
- Attendees mu sigma
-
- 1.5 0.25 0.50
- 2.0 0.46 0.68
- 3.0 0.73 0.86
- 4.0 0.92 0.96
- 5.0 1.07 1.04
- 6.0 1.19 1.09
- 7.0 1.30 1.14
- 8.0 1.39 1.18
- 9.0 1.46 1.21
- 10.0 1.54 1.24
- 15.0 1.81 1.34
- 20.0 2.00 1.41
+
+ For LogNormal "mode" is the peak, "mean" is the mean value. For invites,
+ mode should typically be 1, and mean whatever matches the user behavior.
+ Our typical mean is 6.
-->
<key>inviteeCountDistribution</key>
<dict>
@@ -402,12 +383,12 @@
<string>contrib.performance.stats.LogNormalDistribution</string>
<key>params</key>
<dict>
- <!-- mean -->
- <key>mu</key>
- <real>1.19</real>
- <!-- standard deviation -->
- <key>sigma</key>
- <real>1.09</real>
+ <!-- mode - peak-->
+ <key>mode</key>
+ <integer>1</integer>
+ <!-- mean - average-->
+ <key>median</key>
+ <integer>6</integer>
<!-- maximum -->
<key>maximum</key>
<real>100</real>
@@ -513,20 +494,24 @@
<key>enabled</key>
<true/>
- <!-- Define how long to wait after seeing a new invitation before
- accepting it. -->
+ <!-- Define how long to wait after seeing a new invitation before
+ accepting it.
+
+ For LogNormal "mode" is the peak, "median" is the 50% cummulative value
+ (i.e., half of the user have accepted by that time).
+ -->
<key>acceptDelayDistribution</key>
<dict>
<key>type</key>
- <string>contrib.performance.stats.NormalDistribution</string>
+ <string>contrib.performance.stats.LogNormalDistribution</string>
<key>params</key>
<dict>
- <!-- mean -->
- <key>mu</key>
- <integer>360</integer>
- <!-- standard deviation -->
- <key>sigma</key>
- <integer>60</integer>
+ <!-- mode - peak-->
+ <key>mode</key>
+ <integer>300</integer>
+ <!-- median - 50% done-->
+ <key>median</key>
+ <integer>1800</integer>
</dict>
</dict>
</dict>
Modified: CalendarServer/trunk/contrib/performance/loadtest/config.plist
===================================================================
--- CalendarServer/trunk/contrib/performance/loadtest/config.plist 2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/loadtest/config.plist 2012-08-17 17:50:52 UTC (rev 9725)
@@ -365,30 +365,11 @@
<!-- Define the distribution of how many attendees will be invited to an event.
LogNormal is the best fit to observed data.
-
- Here is a formula for calculating mu and sigma based on average number
- of attendees A, with the peak at 1:
-
- mu = 2/3 * ln(A)
- sigma = sqrt(mu)
-
- Some useful values:
-
- Attendees mu sigma
-
- 1.5 0.25 0.50
- 2.0 0.46 0.68
- 3.0 0.73 0.86
- 4.0 0.92 0.96
- 5.0 1.07 1.04
- 6.0 1.19 1.09
- 7.0 1.30 1.14
- 8.0 1.39 1.18
- 9.0 1.46 1.21
- 10.0 1.54 1.24
- 15.0 1.81 1.34
- 20.0 2.00 1.41
+
+ For LogNormal "mode" is the peak, "mean" is the mean value. For invites,
+ mode should typically be 1, and mean whatever matches the user behavior.
+ Our typical mean is 6.
-->
<key>inviteeCountDistribution</key>
<dict>
@@ -396,12 +377,12 @@
<string>contrib.performance.stats.LogNormalDistribution</string>
<key>params</key>
<dict>
- <!-- mean -->
- <key>mu</key>
- <real>1.19</real>
- <!-- standard deviation -->
- <key>sigma</key>
- <real>1.09</real>
+ <!-- mode - peak-->
+ <key>mode</key>
+ <integer>1</integer>
+ <!-- mean - average-->
+ <key>median</key>
+ <integer>6</integer>
<!-- maximum -->
<key>maximum</key>
<real>100</real>
@@ -508,19 +489,23 @@
<true/>
<!-- Define how long to wait after seeing a new invitation before
- accepting it. -->
+ accepting it.
+
+ For LogNormal "mode" is the peak, "median" is the 50% cummulative value
+ (i.e., half of the user have accepted by that time).
+ -->
<key>acceptDelayDistribution</key>
<dict>
<key>type</key>
- <string>contrib.performance.stats.NormalDistribution</string>
+ <string>contrib.performance.stats.LogNormalDistribution</string>
<key>params</key>
<dict>
- <!-- mean -->
- <key>mu</key>
- <integer>60</integer>
- <!-- standard deviation -->
- <key>sigma</key>
- <integer>60</integer>
+ <!-- mode - peak-->
+ <key>mode</key>
+ <integer>300</integer>
+ <!-- median - 50% done-->
+ <key>median</key>
+ <integer>1800</integer>
</dict>
</dict>
</dict>
Modified: CalendarServer/trunk/contrib/performance/loadtest/ical.py
===================================================================
--- CalendarServer/trunk/contrib/performance/loadtest/ical.py 2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/loadtest/ical.py 2012-08-17 17:50:52 UTC (rev 9725)
@@ -159,7 +159,13 @@
Data always read from disk - never cached in the object.
"""
path = self.serializePath()
- return Component.fromString(open(path).read()) if path and os.path.exists(path) else None
+ if path and os.path.exists(path):
+ f = open(path)
+ comp = Component.fromString(f.read())
+ f.close()
+ return comp
+ else:
+ return None
@component.setter
@@ -172,7 +178,9 @@
if component is None:
os.remove(path)
else:
- open(path, "w").write(str(component))
+ f = open(path, "w")
+ f.write(str(component))
+ f.close()
self.uid = component.resourceUID() if component is not None else None
Modified: CalendarServer/trunk/contrib/performance/stats.py
===================================================================
--- CalendarServer/trunk/contrib/performance/stats.py 2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/stats.py 2012-08-17 17:50:52 UTC (rev 9725)
@@ -25,6 +25,7 @@
from pycalendar.duration import PyCalendarDuration
from pycalendar.timezone import PyCalendarTimezone
from pycalendar.property import PyCalendarProperty
+from math import log, sqrt
NANO = 1000000000.0
@@ -261,17 +262,40 @@
compareAttributes = ['_mu', '_sigma', '_maximum']
- def __init__(self, mu, sigma, maximum=None):
+ def __init__(self, mu=None, sigma=None, mean=None, mode=None, median=None, maximum=None):
+
+ if mu is not None and sigma is not None:
+ scale = 1.0
+ elif not (mu is None and sigma is None):
+ raise ValueError("mu and sigma must both be defined or both not defined")
+ elif mode is None:
+ raise ValueError("When mu and sigma are not defined, mode must be defined")
+ elif median is not None:
+ scale = mode
+ median /= mode
+ mode = 1.0
+ mu = log(median)
+ sigma = sqrt(log(median) - log(mode))
+ elif mean is not None:
+ scale = mode
+ mean /= mode
+ mode = 1.0
+ mu = log(mean) + log(mode) / 2.0
+ sigma = sqrt(log(mean) - log(mode) / 2.0)
+ else:
+ raise ValueError("When using mode one of median or mean must be defined")
+
self._mu = mu
self._sigma = sigma
+ self._scale = scale
self._maximum = maximum
def sample(self):
- result = random.lognormvariate(self._mu, self._sigma)
+ result = self._scale * random.lognormvariate(self._mu, self._sigma)
if self._maximum is not None and result > self._maximum:
for _ignore in range(10):
- result = random.lognormvariate(self._mu, self._sigma)
+ result = self._scale * random.lognormvariate(self._mu, self._sigma)
if result <= self._maximum:
break
else:
Modified: CalendarServer/trunk/contrib/performance/test_stats.py
===================================================================
--- CalendarServer/trunk/contrib/performance/test_stats.py 2012-08-17 17:31:36 UTC (rev 9724)
+++ CalendarServer/trunk/contrib/performance/test_stats.py 2012-08-17 17:50:52 UTC (rev 9725)
@@ -49,14 +49,33 @@
class DistributionTests(TestCase):
def test_lognormal(self):
- dist = LogNormalDistribution(1, 1)
+ dist = LogNormalDistribution(mu=1, sigma=1)
for _ignore_i in range(100):
value = dist.sample()
self.assertIsInstance(value, float)
self.assertTrue(value >= 0.0, "negative value %r" % (value,))
self.assertTrue(value <= 1000, "implausibly high value %r" % (value,))
+ dist = LogNormalDistribution(mode=1, median=2)
+ for _ignore_i in range(100):
+ value = dist.sample()
+ self.assertIsInstance(value, float)
+ self.assertTrue(value >= 0.0, "negative value %r" % (value,))
+ self.assertTrue(value <= 1000, "implausibly high value %r" % (value,))
+ dist = LogNormalDistribution(mode=1, mean=2)
+ for _ignore_i in range(100):
+ value = dist.sample()
+ self.assertIsInstance(value, float)
+ self.assertTrue(value >= 0.0, "negative value %r" % (value,))
+ self.assertTrue(value <= 1000, "implausibly high value %r" % (value,))
+
+ self.assertRaises(ValueError, LogNormalDistribution, mu=1)
+ self.assertRaises(ValueError, LogNormalDistribution, sigma=1)
+ self.assertRaises(ValueError, LogNormalDistribution, mode=1)
+ self.assertRaises(ValueError, LogNormalDistribution, mean=1)
+ self.assertRaises(ValueError, LogNormalDistribution, median=1)
+
def test_uniformdiscrete(self):
population = [1, 5, 6, 9]
counts = dict.fromkeys(population, 0)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20120817/9a93a501/attachment-0001.html>
More information about the calendarserver-changes
mailing list