[CalendarServer-changes] [9735] CalendarServer/trunk
source_changes at macosforge.org
source_changes at macosforge.org
Tue Aug 21 12:02:40 PDT 2012
Revision: 9735
http://trac.macosforge.org/projects/calendarserver/changeset/9735
Author: cdaboo at apple.com
Date: 2012-08-21 12:02:38 -0700 (Tue, 21 Aug 2012)
Log Message:
-----------
Remove old AMP logging and stats socket behavior and replace with a single AMP logging mechanism that can send arbitrary messages (logs, statistics etc) to the master, and then master can aggregate those and provide summaries via the stats socket. Right now we implement a single type="access-log" message which is a dict of all the normal access log items. The master then prints that to the actual log file and aggregates the results on a minute-by-minute basis and reports the summary along with system details (cpu, memory, service uptime) vis the stats socket.
Modified Paths:
--------------
CalendarServer/trunk/calendarserver/accesslog.py
CalendarServer/trunk/calendarserver/tap/caldav.py
CalendarServer/trunk/calendarserver/tap/test/test_caldav.py
CalendarServer/trunk/twistedcaldav/stdconfig.py
Added Paths:
-----------
CalendarServer/trunk/contrib/tools/readStats.py
Modified: CalendarServer/trunk/calendarserver/accesslog.py
===================================================================
--- CalendarServer/trunk/calendarserver/accesslog.py 2012-08-21 01:03:58 UTC (rev 9734)
+++ CalendarServer/trunk/calendarserver/accesslog.py 2012-08-21 19:02:38 UTC (rev 9735)
@@ -25,11 +25,17 @@
"AMPLoggingFactory",
]
+import collections
import datetime
+import json
import os
+try:
+ import psutil
+except ImportError:
+ psutil = None
import time
-from twisted.internet import protocol
+from twisted.internet import protocol, task
from twisted.protocols import amp
from twext.web2 import iweb
from txdav.xml import element as davxml
@@ -60,11 +66,10 @@
def emit(self, eventDict):
+ format = None
+ formatArgs = None
if eventDict.get("interface") is iweb.IRequest:
- if config.GlobalStatsLoggingFrequency is not 0:
- self.logGlobalHit()
-
request = eventDict["request"]
response = eventDict["response"]
loginfo = eventDict["loginfo"]
@@ -120,13 +125,27 @@
' "%(referer)s" "%(userAgent)s"'
)
+ formatArgs = {
+ "host" : request.remoteAddr.host,
+ "uid" : uid,
+ "date" : self.logDateString(response.headers.getHeader("date", 0)),
+ "method" : method,
+ "uri" : request.uri.replace('"', "%22"),
+ "protocolVersion" : ".".join(str(x) for x in request.clientproto),
+ "statusCode" : response.code,
+ "bytesSent" : loginfo.bytesSent,
+ "referer" : request.headers.getHeader("referer", "-"),
+ "userAgent" : request.headers.getHeader("user-agent", "-"),
+ }
+
+ # Add extended items to format and formatArgs
if config.EnableExtendedAccessLog:
- formats = [
- format,
- # Performance monitoring extensions
- 'i=%(serverInstance)s or=%(outstandingRequests)s',
- ]
-
+ format += ' i=%(serverInstance)s'
+ formatArgs["serverInstance"] = config.LogID if config.LogID else "0"
+
+ format += ' or=%(outstandingRequests)s'
+ formatArgs["outstandingRequests"] = request.chanRequest.channel.factory.outstandingRequests
+
# Tags for time stamps collected along the way - the first one in the list is the initial
# time for request creation - we use that to track the entire request/response time
nowtime = time.time()
@@ -134,13 +153,16 @@
basetime = request.timeStamps[0][1]
request.timeStamps[0] = ("t", time.time(),)
for tag, timestamp in request.timeStamps:
- formats.append("%s=%.1f" % (tag, (timestamp - basetime) * 1000))
+ format += " %s=%%(%s).1f" % (tag, tag,)
+ formatArgs[tag] = (timestamp - basetime) * 1000
if tag != "t":
basetime = timestamp
if len(request.timeStamps) > 1:
- formats.append("%s=%.1f" % ("t-log", (nowtime - basetime) * 1000))
+ format += " t-log=%(t-log).1f"
+ formatArgs["t-log"] = (timestamp - basetime) * 1000
else:
- formats.append("%s=%.1f" % ("t", (nowtime - request.timeStamps[0][1]) * 1000))
+ format += " t=%(t).1f"
+ formatArgs["t"] = (nowtime - request.timeStamps[0][1]) * 1000
if hasattr(request, "extendedLogItems"):
for k, v in request.extendedLogItems.iteritems():
@@ -148,11 +170,13 @@
v = str(v).replace('"', "%22")
if " " in v:
v = '"%s"' % (v,)
- formats.append("%s=%s" % (k, v))
+ format += " %s=%%(%s)s" % (k, k,)
+ formatArgs[k] = v
# Add the name of the XML error element for debugging purposes
if hasattr(response, "error"):
- formats.append("err=%s" % (response.error.qname()[1],))
+ format += " err=%(err)s"
+ formatArgs["err"] = response.error.qname()[1]
fwdHeaders = request.headers.getRawHeaders("x-forwarded-for", "")
if fwdHeaders:
@@ -160,28 +184,42 @@
# trying to overwhelm the logs
forwardedFor = ",".join([hdr[:50] for hdr in fwdHeaders])
forwardedFor = forwardedFor.replace(" ", "")
- formats.append("fwd=%(fwd)s")
- else:
- forwardedFor = ""
+ format += " fwd=%(fwd)s"
+ formatArgs["fwd"] = forwardedFor
- format = " ".join(formats)
+ elif "overloaded" in eventDict:
+ overloaded = eventDict.get("overloaded")
+ format = (
+ '%(host)s - %(uid)s [%(date)s]'
+ ' "%(method)s"'
+ ' %(statusCode)s %(bytesSent)d'
+ ' "%(referer)s" "%(userAgent)s"'
+ )
+
formatArgs = {
- "host" : request.remoteAddr.host,
- "uid" : uid,
- "date" : self.logDateString(response.headers.getHeader("date", 0)),
- "method" : method,
- "uri" : request.uri.replace('"', "%22"),
- "protocolVersion" : ".".join(str(x) for x in request.clientproto),
- "statusCode" : response.code,
- "bytesSent" : loginfo.bytesSent,
- "referer" : request.headers.getHeader("referer", "-"),
- "userAgent" : request.headers.getHeader("user-agent", "-"),
- "serverInstance" : config.LogID,
- "outstandingRequests" : request.chanRequest.channel.factory.outstandingRequests,
- "fwd" : forwardedFor,
+ "host" : overloaded.transport.hostname,
+ "uid" : "-",
+ "date" : self.logDateString(time.time()),
+ "method" : "???",
+ "uri" : "",
+ "protocolVersion" : "",
+ "statusCode" : 503,
+ "bytesSent" : 0,
+ "referer" : "-",
+ "userAgent" : "-",
}
+ if config.EnableExtendedAccessLog:
+ format += ' p=%(serverPort)s'
+ formatArgs["serverPort"] = overloaded.transport.server.port
+
+ format += ' or=%(outstandingRequests)s'
+ formatArgs["outstandingRequests"] = overloaded.outstandingRequests
+
+
+ # Write anything we got to the log and stats
+ if format is not None:
# sanitize output to mitigate log injection
for k,v in formatArgs.items():
if not isinstance(v, basestring):
@@ -190,38 +228,26 @@
v = v.replace("\n", "\\n")
v = v.replace("\"", "\\\"")
formatArgs[k] = v
+
+ formatArgs["type"] = "access-log"
+ formatArgs["log-format"] = format
+ self.logStats(formatArgs)
- self.logMessage(format % formatArgs)
-
- elif "overloaded" in eventDict:
- overloaded = eventDict.get("overloaded")
- format_str = '%s - - [%s] "???" 503 0 "-" "-" [0.0 ms]'
- format_data = (
- overloaded.transport.hostname,
- self.logDateString(time.time()),
- )
- if config.EnableExtendedAccessLog:
- format_str += " [%s %s]"
- format_data += (
- overloaded.transport.server.port,
- overloaded.outstandingRequests,
- )
- self.logMessage(format_str % format_data)
-
class RotatingFileAccessLoggingObserver(CommonAccessLoggingObserverExtensions):
"""
Class to do "apache" style access logging to a rotating log file. The log
file is rotated after midnight each day.
+
+ This class also currently handles the collection of system and log statistics.
"""
def __init__(self, logpath):
- self.logpath = logpath
- self.globalHitCount = 0
- self.globalHitHistory = []
- for _ignore in range(0, config.GlobalStatsLoggingFrequency + 1):
- self.globalHitHistory.append({"time":int(time.time()), "hits":0})
+ self.logpath = logpath
- def logMessage(self, message, allowrotate=True):
+ self.systemStats = None
+ self.statsByMinute = []
+
+ def accessLog(self, message, allowrotate=True):
"""
Log a message to the file and possibly rotate if date has changed.
@@ -235,21 +261,6 @@
self.rotate()
self.f.write(message + "\n")
- def rotateGlobalHitHistoryStats(self):
- """
- Roll the global hit history array: push the current stats as
- the last element; pop the first (oldest) element and reschedule the task.
- """
-
- self.globalHitHistory.append({"time":int(time.time()), "hits":self.globalHitCount})
- del self.globalHitHistory[0]
- log.debug("rotateGlobalHitHistoryStats: %s" % (self.globalHitHistory,))
- if config.GlobalStatsLoggingFrequency is not 0:
- self.reactor.callLater(
- config.GlobalStatsLoggingPeriod * 60 / config.GlobalStatsLoggingFrequency,
- self.rotateGlobalHitHistoryStats
- )
-
def start(self):
"""
Start logging. Open the log file and log an "open" message.
@@ -257,21 +268,19 @@
super(RotatingFileAccessLoggingObserver, self).start()
self._open()
- self.logMessage("Log opened - server start: [%s]." % (datetime.datetime.now().ctime(),))
-
- # Need a reactor for the callLater() support for rotateGlobalHitHistoryStats()
- from twisted.internet import reactor
- self.reactor = reactor
- self.rotateGlobalHitHistoryStats()
+ self.accessLog("Log opened - server start: [%s]." % (datetime.datetime.now().ctime(),))
def stop(self):
"""
Stop logging. Close the log file and log an "open" message.
"""
- self.logMessage("Log closed - server stop: [%s]." % (datetime.datetime.now().ctime(),), False)
+ self.accessLog("Log closed - server stop: [%s]." % (datetime.datetime.now().ctime(),), False)
super(RotatingFileAccessLoggingObserver, self).stop()
self._close()
+
+ if self.systemStats is not None:
+ self.systemStats.stop()
def _open(self):
"""
@@ -340,46 +349,256 @@
if os.path.exists(newpath):
log.msg("Cannot rotate log file to %s because it already exists." % (newpath,))
return
- self.logMessage("Log closed - rotating: [%s]." % (datetime.datetime.now().ctime(),), False)
+ self.accessLog("Log closed - rotating: [%s]." % (datetime.datetime.now().ctime(),), False)
log.msg("Rotating log file to: %s" % (newpath,), system="Logging")
self.f.close()
os.rename(self.logpath, newpath)
self._open()
- self.logMessage("Log opened - rotated: [%s]." % (datetime.datetime.now().ctime(),), False)
+ self.accessLog("Log opened - rotated: [%s]." % (datetime.datetime.now().ctime(),), False)
- def logGlobalHit(self):
+ def logStats(self, stats):
"""
- Increment the service-global hit counter
+ Update stats
"""
- self.globalHitCount += 1
+ if self.systemStats is None:
+ self.systemStats = SystemMonitor()
- def getGlobalHits(self):
+ # Currently only storing stats for access log type
+ if "type" not in stats or stats["type"] != "access-log":
+ return
+
+ currentStats = self.ensureSequentialStats()
+ self.updateStats(currentStats, stats)
+
+ if stats["type"] == "access-log":
+ self.accessLog(stats["log-format"] % stats)
+
+ def getStats(self):
"""
- Return the global hit stats
- """
+ Return the stats
+ """
+
+ if self.systemStats is None:
+ self.systemStats = SystemMonitor()
- stats = '<?xml version="1.0" encoding="UTF-8"?><plist version="1.0">'
- stats += "<dict><key>totalHits</key><integer>%d</integer>"
- stats += "<key>recentHits</key><dict>"
- stats += "<key>count</key><integer>%d</integer>"
- stats += "<key>since</key><integer>%d</integer>"
- stats += "<key>period</key><integer>%d</integer>"
- stats += "<key>frequency</key><integer>%d</integer>"
- stats += "</dict></dict></plist>"
- return stats % (
- self.globalHitCount,
- self.globalHitCount - self.globalHitHistory[0]["hits"],
- self.globalHitHistory[0]["time"],
- config.GlobalStatsLoggingPeriod,
- config.GlobalStatsLoggingFrequency
- )
+ # The current stats
+ currentStats = self.ensureSequentialStats()
-class LogMessage(amp.Command):
+ # Get previous minute details
+ index = min(2, len(self.statsByMinute))
+ if index > 0:
+ previousMinute = self.statsByMinute[-index][1]
+ else:
+ previousMinute = self.initStats()
+
+ # Do five minute aggregate
+ fiveMinutes = self.initStats()
+ index = min(6, len(self.statsByMinute))
+ for i in range(-index, -1):
+ stat = self.statsByMinute[i][1]
+ self.mergeStats(fiveMinutes, stat)
+
+ # Do one hour aggregate
+ oneHour = self.initStats()
+ index = min(61, len(self.statsByMinute))
+ for i in range(-index, -1):
+ stat = self.statsByMinute[i][1]
+ self.mergeStats(oneHour, stat)
+
+ printStats = {
+ "System":self.systemStats.items,
+ "Current":currentStats,
+ "1 Minute":previousMinute,
+ "5 Minutes":fiveMinutes,
+ "1 Hour":oneHour,
+ }
+ return json.dumps(printStats)
+
+ def ensureSequentialStats(self):
+ """
+ Make sure the list of timed stats is contiguous wrt time.
+ """
+ dtindex = int(time.time() / 60.0) * 60
+
+ if len(self.statsByMinute) > 0:
+ if self.statsByMinute[-1][0] != dtindex:
+ oldindex = self.statsByMinute[-1][0]
+ while oldindex != dtindex:
+ oldindex += 60
+ self.statsByMinute.append((oldindex, self.initStats(),))
+ else:
+ self.statsByMinute.append((dtindex, self.initStats(),))
+ return self.statsByMinute[-1][1]
+
+ def initStats(self):
+
+ def initTimeHistogram():
+ return {
+ "<10ms": 0,
+ "10ms<->100ms" : 0,
+ "100ms<->1s" : 0,
+ "1s<->10s" : 0,
+ "10s<->30s" : 0,
+ "30s<->60s" : 0,
+ ">60s" : 0,
+ "Over 1s" : 0,
+ "Over 10s" : 0,
+ }
+
+ return {
+ "requests" : 0,
+ "method" : collections.defaultdict(int),
+ "uid" : collections.defaultdict(int),
+ "500" : 0,
+ "t" : 0.0,
+ "t-resp-wr": 0.0,
+ "slots" : 0,
+ "T" : initTimeHistogram(),
+ "T-RESP-WR": initTimeHistogram(),
+ "T-MAX" : 0.0,
+ "cpu" : self.systemStats.items["cpu use"],
+ }
+
+ def updateStats(self, current, stats):
+ # Gather specific information and aggregate into our persistent stats
+ if current["requests"] == 0:
+ current["cpu"] = 0.0
+ current["requests"] += 1
+ current["method"][stats["method"]] += 1
+ current["uid"][stats["uid"]] += 1
+ if stats["statusCode"] >= 500:
+ current["500"] += 1
+ current["t"] += stats.get("t", 0.0)
+ current["t-resp-wr"] += stats.get("t-resp-wr", 0.0)
+ current["slots"] += stats.get("outstandingRequests", 0)
+ current["cpu"] += self.systemStats.items["cpu use"]
+
+ def histogramUpdate(t, key):
+ if t >= 60000.0:
+ current[key][">60s"] += 1
+ elif t >= 30000.0:
+ current[key]["30s<->60s"] += 1
+ elif t >= 10000.0:
+ current[key]["10s<->30s"] += 1
+ elif t >= 1000.0:
+ current[key]["1s<->10s"] += 1
+ elif t >= 100.0:
+ current[key]["100ms<->1s"] += 1
+ elif t >= 10.0:
+ current[key]["10ms<->100ms"] += 1
+ else:
+ current[key]["<10ms"] += 1
+ if t >= 1000.0:
+ current[key]["Over 1s"] += 1
+ elif t >= 10000.0:
+ current[key]["Over 10s"] += 1
+
+ t = stats.get("t", None)
+ if t is not None:
+ histogramUpdate(t, "T")
+ current["T-MAX"] = max(current["T-MAX"], t)
+ t = stats.get("t-resp-wr", None)
+ if t is not None:
+ histogramUpdate(t, "T-RESP-WR")
+
+ def mergeStats(self, current, stats):
+ # Gather specific information and aggregate into our persistent stats
+ if current["requests"] == 0:
+ current["cpu"] = 0.0
+ current["requests"] += stats["requests"]
+ for method in stats["method"].keys():
+ current["method"][method] += stats["method"][method]
+ for uid in stats["uid"].keys():
+ current["uid"][uid] += stats["uid"][uid]
+ current["500"] += stats["500"]
+ current["t"] += stats["t"]
+ current["t-resp-wr"] += stats["t-resp-wr"]
+ current["slots"] += stats["slots"]
+ current["cpu"] += stats["cpu"]
+
+ def histogramUpdate(t, key):
+ if t >= 60000.0:
+ current[key][">60s"] += 1
+ elif t >= 30000.0:
+ current[key]["30s<->60s"] += 1
+ elif t >= 10000.0:
+ current[key]["10s<->30s"] += 1
+ elif t >= 1000.0:
+ current[key]["1s<->10s"] += 1
+ elif t >= 100.0:
+ current[key]["100ms<->1s"] += 1
+ elif t >= 10.0:
+ current[key]["10ms<->100ms"] += 1
+ else:
+ current[key]["<10ms"] += 1
+ if t >= 1000.0:
+ current[key]["Over 1s"] += 1
+ elif t >= 10000.0:
+ current[key]["Over 10s"] += 1
+
+ for bin in stats["T"].keys():
+ current["T"][bin] += stats["T"][bin]
+ current["T-MAX"] = max(current["T-MAX"], stats["T-MAX"])
+ for bin in stats["T-RESP-WR"].keys():
+ current["T-RESP-WR"][bin] += stats["T-RESP-WR"][bin]
+
+
+
+class SystemMonitor(object):
+ """
+ Keeps track of system usage information. This installs a reacxtor task to
+ run about once per second and track system use.
+ """
+
+ CPUStats = collections.namedtuple("CPUStats", ("total", "idle",))
+
+ def __init__(self):
+ self.items = {
+ "cpu count" : psutil.NUM_CPUS if psutil is not None else -1,
+ "cpu use" : 0.0,
+ "memory used" : 0,
+ "memory percent": 0.0,
+ "start time" : time.time(),
+ }
+
+ if psutil is not None:
+ times = psutil.cpu_times()
+ self.previous_cpu = SystemMonitor.CPUStats(sum(times), times.idle,)
+ else:
+ self.previous_cpu = SystemMonitor.CPUStats(0, 0)
+
+ self.task = task.LoopingCall(self.update)
+ self.task.start(1.0)
+
+ def stop(self):
+ """
+ Just stop the task
+ """
+ self.task.stop()
+
+ def update(self):
+
+ # CPU usage based on diff'ing CPU times
+ if psutil is not None:
+ times = psutil.cpu_times()
+ cpu_now = SystemMonitor.CPUStats(sum(times), times.idle,)
+ try:
+ self.items["cpu use"] = 100.0 * (1.0 - (cpu_now.idle - self.previous_cpu.idle) / (cpu_now.total - self.previous_cpu.total))
+ except ZeroDivisionError:
+ self.items["cpu use"] = 0.0
+ self.previous_cpu = cpu_now
+
+ # Memory usage
+ if psutil is not None:
+ mem = psutil.virtual_memory()
+ self.items["memory used"] = mem.used
+ self.items["memory percent"] = mem.percent
+
+
+class LogStats(amp.Command):
arguments = [("message", amp.String())]
-class LogGlobalHit(amp.Command):
- arguments = []
class AMPCommonAccessLoggingObserver(CommonAccessLoggingObserverExtensions):
def __init__(self):
@@ -390,7 +609,7 @@
def flushBuffer(self):
if self._buffer:
for msg in self._buffer:
- self.logMessage(msg)
+ self.logStats(msg)
def addClient(self, connectedClient):
@@ -401,31 +620,19 @@
self.flushBuffer()
- def logMessage(self, message):
- """
- Log a message to the remote AMP Protocol
- """
+ def logStats(self, message):
+ """
+ Log server stats via the remote AMP Protocol
+ """
+
if self.protocol is not None:
- # XXX: Yeah we're not waiting for anything to happen here.
- # but we will log an error.
+ message=json.dumps(message)
if isinstance(message, unicode):
message = message.encode("utf-8")
- d = self.protocol.callRemote(LogMessage, message=message)
- d.addErrback(log.err)
- else:
- self._buffer.append(message)
-
-
- def logGlobalHit(self):
- """
- Log a server hit via the remote AMP Protocol
- """
-
- if self.protocol is not None:
- d = self.protocol.callRemote(LogGlobalHit)
+ d = self.protocol.callRemote(LogStats, message=message)
d.addErrback(log.err)
else:
- log.msg("logGlobalHit() only works with an AMP Protocol")
+ self._buffer.append(message)
@@ -439,17 +646,12 @@
super(AMPLoggingProtocol, self).__init__()
- def logMessage(self, message):
- self.observer.logMessage(message)
- return {}
-
- LogMessage.responder(logMessage)
-
- def logGlobalHit(self):
- self.observer.logGlobalHit()
+ def logStats(self, message):
+ stats = json.loads(message)
+ self.observer.logStats(stats)
return {}
- LogGlobalHit.responder(logGlobalHit)
+ LogStats.responder(logStats)
Modified: CalendarServer/trunk/calendarserver/tap/caldav.py
===================================================================
--- CalendarServer/trunk/calendarserver/tap/caldav.py 2012-08-21 01:03:58 UTC (rev 9734)
+++ CalendarServer/trunk/calendarserver/tap/caldav.py 2012-08-21 19:02:38 UTC (rev 9735)
@@ -203,7 +203,7 @@
class CalDAVStatisticsProtocol (Protocol):
def connectionMade(self):
- stats = self.factory.logger.observer.getGlobalHits()
+ stats = self.factory.logger.observer.getStats()
self.transport.write("%s\r\n" % (stats,))
self.transport.loseConnection()
@@ -1254,7 +1254,7 @@
# the current stats on the server.
stats = CalDAVStatisticsServer(logger)
statsService = GroupOwnedUNIXServer(
- gid, config.GlobalStatsSocket, stats, mode=0440
+ gid, config.GlobalStatsSocket, stats, mode=0660
)
statsService.setName("stats")
statsService.setServiceParent(s)
Modified: CalendarServer/trunk/calendarserver/tap/test/test_caldav.py
===================================================================
--- CalendarServer/trunk/calendarserver/tap/test/test_caldav.py 2012-08-21 01:03:58 UTC (rev 9734)
+++ CalendarServer/trunk/calendarserver/tap/test/test_caldav.py 2012-08-21 19:02:38 UTC (rev 9735)
@@ -473,9 +473,9 @@
for serviceName in ["stats"]:
socketService = svc.getServiceNamed(serviceName)
self.assertIsInstance(socketService, GroupOwnedUNIXServer)
- m = socketService.kwargs.get("mode", 0444)
+ m = socketService.kwargs.get("mode", 0666)
self.assertEquals(
- m, int("440", 8),
+ m, int("660", 8),
"Wrong mode on %s: %s" % (serviceName, oct(m))
)
self.assertEquals(socketService.gid, alternateGroup)
Added: CalendarServer/trunk/contrib/tools/readStats.py
===================================================================
--- CalendarServer/trunk/contrib/tools/readStats.py (rev 0)
+++ CalendarServer/trunk/contrib/tools/readStats.py 2012-08-21 19:02:38 UTC (rev 9735)
@@ -0,0 +1,201 @@
+##
+# Copyright (c) 2012 Apple Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##
+
+from StringIO import StringIO
+import datetime
+import json
+import socket
+import tables
+import time
+import sys
+import getopt
+
+"""
+This tool reads data from the server's statistics socket and prints a summary.
+"""
+
+def safeDivision(value, total, factor=1):
+ return value * factor / total if total else 0
+
+def readSock(sockname):
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ s.connect(sockname)
+ data = ""
+ while True:
+ d = s.recv(1024)
+ if d:
+ data += d
+ else:
+ break
+ s.close()
+ return data
+
+def printStats(data):
+
+ stats = json.loads(data)
+ print "- " * 40
+ print datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
+ print "Service Uptime: %s" % (datetime.timedelta(seconds=(int(time.time() - stats["System"]["start time"]))),)
+ if stats["System"]["cpu count"] > 0:
+ print "Current CPU: %.1f%% (%d CPUs)" % (
+ stats["System"]["cpu use"],
+ stats["System"]["cpu count"],
+ )
+ print "Current Memory Used: %d bytes (%.1f GB) (%.1f%% of total)" % (
+ stats["System"]["memory used"],
+ stats["System"]["memory used"] / (1024.0 * 1024 * 1024),
+ stats["System"]["memory percent"],
+ )
+ else:
+ print "Current CPU: Unavailable"
+ print "Current Memory Used: Unavailable"
+ print
+ printRequestSummary(stats)
+ printHistogramSummary(stats["5 Minutes"])
+
+def printFailedStats(message):
+
+ print "- " * 40
+ print datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
+ print message
+ print
+
+def printRequestSummary(stats):
+ table = tables.Table()
+ table.addHeader(
+ ("Period", "Requests", "Av. Requests", "Av. Response", "Av. Response", "Max. Response", "Slot", "CPU", "500's"),
+ )
+ table.addHeader(
+ ( "", "", "per second", "(ms)", "no write(ms)", "(ms)", "Average", "Average", ""),
+ )
+ table.setDefaultColumnFormats(
+ (
+ tables.Table.ColumnFormat("%s", tables.Table.ColumnFormat.LEFT_JUSTIFY),
+ tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%.1f", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%.1f", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%.1f", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%.1f", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%.2f", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%.1f%%", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ )
+ )
+
+ for key, seconds in (("Current", 60,), ("1 Minute", 60,), ("5 Minutes", 5*60,), ("1 Hour", 60*60,),):
+
+ stat = stats[key]
+ table.addRow((
+ key,
+ stat["requests"],
+ safeDivision(float(stat["requests"]), seconds),
+ safeDivision(stat["t"], stat["requests"]),
+ safeDivision(stat["t"] - stat["t-resp-wr"], stat["requests"]),
+ stat["T-MAX"],
+ safeDivision(float(stat["slots"]), stat["requests"]),
+ safeDivision(stat["cpu"], stat["requests"]),
+ stat["500"],
+ ))
+
+ os = StringIO()
+ table.printTable(os=os)
+ print os.getvalue()
+
+def printHistogramSummary(stat):
+
+ print "5 minute average response histogram"
+ table = tables.Table()
+ table.addHeader(
+ ("", "<10ms", "10ms<->100ms", "100ms<->1s", "1s<->10s", "10s<->30s", "30s<->60s", ">60s", "Over 1s", "Over 10s"),
+ )
+ table.setDefaultColumnFormats(
+ (
+ tables.Table.ColumnFormat("%s", tables.Table.ColumnFormat.CENTER_JUSTIFY),
+ tables.Table.ColumnFormat("%d (%.1f%%)", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%d (%.1f%%)", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%d (%.1f%%)", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%d (%.1f%%)", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%d (%.1f%%)", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%d (%.1f%%)", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%d (%.1f%%)", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%.1f%%", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ tables.Table.ColumnFormat("%.1f%%", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
+ )
+ )
+ for i in ("T", "T-RESP-WR",):
+ table.addRow((
+ "Overall Response" if i == "T" else "Response without Write",
+ (stat[i]["<10ms"], safeDivision(stat[i]["<10ms"], stat["requests"], 100.0)),
+ (stat[i]["10ms<->100ms"], safeDivision(stat[i]["10ms<->100ms"], stat["requests"], 100.0)),
+ (stat[i]["100ms<->1s"], safeDivision(stat[i]["100ms<->1s"], stat["requests"], 100.0)),
+ (stat[i]["1s<->10s"], safeDivision(stat[i]["1s<->10s"], stat["requests"], 100.0)),
+ (stat[i]["10s<->30s"], safeDivision(stat[i]["10s<->30s"], stat["requests"], 100.0)),
+ (stat[i]["30s<->60s"], safeDivision(stat[i]["30s<->60s"], stat["requests"], 100.0)),
+ (stat[i][">60s"], safeDivision(stat[i][">60s"], stat["requests"], 100.0)),
+ safeDivision(stat[i]["Over 1s"], stat["requests"], 100.0),
+ safeDivision(stat[i]["Over 10s"], stat["requests"], 100.0),
+ ))
+ os = StringIO()
+ table.printTable(os=os)
+ print os.getvalue()
+
+
+def usage(error_msg=None):
+ if error_msg:
+ print error_msg
+
+ print """Usage: readStats [options]
+Options:
+ -h Print this help and exit
+ -s Name of local socket to read from
+ -t Delay in seconds between each sample [10 seconds]
+
+Description:
+ This utility will print a summary of statistics read from a
+ server continuously with the specified delay.
+
+"""
+
+ if error_msg:
+ raise ValueError(error_msg)
+ else:
+ sys.exit(0)
+
+if __name__ == '__main__':
+
+ delay = 10
+ sockname = "data/Logs/state/caldavd-stats.sock"
+
+ options, args = getopt.getopt(sys.argv[1:], "hs:t:", [])
+
+ for option, value in options:
+ if option == "-h":
+ usage()
+ elif option == "-s":
+ sockname = value
+ elif option == "-t":
+ delay = int(value)
+
+ while True:
+ try:
+ printStats(readSock(sockname))
+ except socket.error:
+ printFailedStats("Unable to read statistics from server socket: %s" % (sockname,))
+ except KeyError, e:
+ printFailedStats("Unable to find key '%s' in statistics from server socket" % (e,))
+ sys.exit(1)
+
+ time.sleep(delay)
Modified: CalendarServer/trunk/twistedcaldav/stdconfig.py
===================================================================
--- CalendarServer/trunk/twistedcaldav/stdconfig.py 2012-08-21 01:03:58 UTC (rev 9734)
+++ CalendarServer/trunk/twistedcaldav/stdconfig.py 2012-08-21 19:02:38 UTC (rev 9735)
@@ -447,8 +447,6 @@
"AccountingLogRoot" : "accounting",
"GlobalStatsSocket" : "caldavd-stats.sock",
- "GlobalStatsLoggingPeriod" : 60,
- "GlobalStatsLoggingFrequency" : 12,
"LogDatabase" : {
"LabelsInSQL" : False,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20120821/9c7007e2/attachment-0001.html>
More information about the calendarserver-changes
mailing list