[CalendarServer-changes] [15693] CalendarServer/trunk

Thu Jun 23 09:35:07 PDT 2016

Revision: 15693
          http://trac.calendarserver.org//changeset/15693
Author:   cdaboo at apple.com
Date:     2016-06-23 09:35:07 -0700 (Thu, 23 Jun 2016)
Log Message:
-----------
Allow sets of plots to be specified with dashtime command line arg. Add other command line args to dashtime. Update help and documentation for dashboard tools.

Modified Paths:
--------------
    CalendarServer/trunk/calendarserver/tools/dashcollect.py
    CalendarServer/trunk/calendarserver/tools/dashtime.py
    CalendarServer/trunk/calendarserver/tools/dashview.py

Added Paths:
-----------
    CalendarServer/trunk/doc/Admin/Dashboard.md

Modified: CalendarServer/trunk/calendarserver/tools/dashcollect.py
===================================================================

--- CalendarServer/trunk/calendarserver/tools/dashcollect.py	2016-06-23 14:03:48 UTC (rev 15692)
+++ CalendarServer/trunk/calendarserver/tools/dashcollect.py	2016-06-23 16:35:07 UTC (rev 15693)
@@ -44,11 +44,12 @@
 }
 """
 
+from argparse import HelpFormatter, SUPPRESS, OPTIONAL, ZERO_OR_MORE, \
+    ArgumentParser
 from collections import OrderedDict
 from datetime import datetime, date
 from threading import Thread
 import SocketServer
-import argparse
 import errno
 import json
 import os
@@ -65,6 +66,27 @@
 
 
 
+class MyHelpFormatter(HelpFormatter):
+    """
+    Help message formatter which adds default values to argument help and
+    retains formatting of all help text.
+    """
+
+    def _fill_text(self, text, width, indent):
+        return ''.join([indent + line for line in text.splitlines(True)])
+
+
+    def _get_help_string(self, action):
+        help = action.help
+        if '%(default)' not in action.help:
+            if action.default is not SUPPRESS:
+                defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
+                if action.option_strings or action.nargs in defaulting_nargs:
+                    help += ' (default: %(default)s)'
+        return help
+
+
+
 def main():
     try:
         # to produce a docstring target
@@ -72,14 +94,16 @@
     except NameError:
         # unlikely but possible...
         thisFile = sys.argv[0]
-    parser = argparse.ArgumentParser(
+    parser = ArgumentParser(
+        formatter_class=MyHelpFormatter,
         description="Dashboard service for CalendarServer.",
-        epilog="To view the docstring, run: pydoc {}".format(thisFile))
-    parser.add_argument("-f", help="Server config file (see docstring for details)")
-    parser.add_argument("-l", help="Log file directory")
-    parser.add_argument("-n", action="store_true", help="New log file")
-    parser.add_argument("-s", default="localhost:8200", help="Run the dash_thread service on the specified host:port")
-    parser.add_argument("-t", action="store_true", help="Rotate log files every hour [default: once per day]")
+        epilog="To view the docstring, run: pydoc {}".format(thisFile),
+    )
+    parser.add_argument("-f", default=SUPPRESS, required=True, help="Server config file (see docstring for details)")
+    parser.add_argument("-l", default=SUPPRESS, required=True, help="Log file directory")
+    parser.add_argument("-n", action="store_true", help="Create a new log file when starting, existing log file is deleted")
+    parser.add_argument("-s", default="localhost:8200", help="Make JSON data available on the specified host:port")
+    parser.add_argument("-t", action="store_true", help="Rotate log files every hour, otherwise once per day")
     parser.add_argument("-z", action="store_true", help="zlib compress json records in log files")
     parser.add_argument("-v", action="store_true", help="Verbose")
     args = parser.parse_args()

Modified: CalendarServer/trunk/calendarserver/tools/dashtime.py
===================================================================
--- CalendarServer/trunk/calendarserver/tools/dashtime.py	2016-06-23 14:03:48 UTC (rev 15692)
+++ CalendarServer/trunk/calendarserver/tools/dashtime.py	2016-06-23 16:35:07 UTC (rev 15693)
@@ -18,10 +18,11 @@
 Tool that extracts time series data from a dashcollect log.
 """
 
+from argparse import SUPPRESS, OPTIONAL, ZERO_OR_MORE, HelpFormatter, \
+    ArgumentParser
 from bz2 import BZ2File
 from collections import OrderedDict, defaultdict
 from zlib import decompress
-import argparse
 import json
 import matplotlib.pyplot as plt
 import operator
@@ -40,6 +41,27 @@
 
 
 
+class MyHelpFormatter(HelpFormatter):
+    """
+    Help message formatter which adds default values to argument help and
+    retains formatting of all help text.
+    """
+
+    def _fill_text(self, text, width, indent):
+        return ''.join([indent + line for line in text.splitlines(True)])
+
+
+    def _get_help_string(self, action):
+        help = action.help
+        if '%(default)' not in action.help:
+            if action.default is not SUPPRESS:
+                defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
+                if action.option_strings or action.nargs in defaulting_nargs:
+                    help += ' (default: %(default)s)'
+        return help
+
+
+
 class DataType(object):
     """
     Base class for object that can process the different types of data in a
@@ -306,7 +328,9 @@
         result = 0
         for onehost in hosts:
             completed = sum(map(operator.itemgetter(2), stats[onehost]["job_assignments"]["workers"]))
-            result += completed - JobsCompletedDataType.lastCompleted[onehost] if JobsCompletedDataType.lastCompleted[onehost] else 0
+            delta = completed - JobsCompletedDataType.lastCompleted[onehost] if JobsCompletedDataType.lastCompleted[onehost] else 0
+            if delta >= 0:
+                result += delta
             JobsCompletedDataType.lastCompleted[onehost] = completed
         return result
 
@@ -399,60 +423,63 @@
 
 
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="Dashboard time series processor.",
-        epilog="cpu - CPU use\nreqs - requests per second\nrespt - average response time",
-    )
-    parser.add_argument("-l", help="Log file to process")
-    parser.add_argument("-p", help="Name of pod to analyze")
-    parser.add_argument("-s", help="Name of server to analyze")
-    parser.add_argument("-v", action="store_true", help="Verbose")
-    args = parser.parse_args()
-    if args.v:
-        global verbose
-        verbose = True
+class Calculator(object):
 
-    # Get the log file
-    try:
-        if args.l.endswith(".bz2"):
-            logfile = BZ2File(os.path.expanduser(args.l))
-        else:
-            logfile = open(os.path.expanduser(args.l))
-    except:
-        print("Failed to open logfile {}".format(args.l))
+    def __init__(self, args):
+        if args.v:
+            global verbose
+            verbose = True
 
-    # Start/end lines in log file to process
-    line_start = 0
-    line_count = 10000
+        # Get the log file
+        self.logname = args.l
+        try:
+            if args.l.endswith(".bz2"):
+                self.logfile = BZ2File(os.path.expanduser(args.l))
+            else:
+                self.logfile = open(os.path.expanduser(args.l))
+        except:
+            print("Failed to open logfile {}".format(args.l))
 
-    # Plot arrays that will be generated
-    x = []
-    y = OrderedDict()
-    titles = {}
-    ymaxes = {}
+        self.pod = getattr(args, "p", None)
+        self.single_server = getattr(args, "s", None)
 
-    def singleHost(valuekeys):
+        self.save = args.save
+        self.noshow = args.noshow
+
+        self.mode = args.mode
+
+        # Start/end lines in log file to process
+        self.line_start = args.start
+        self.line_count = args.count
+
+        # Plot arrays that will be generated
+        self.x = []
+        self.y = OrderedDict()
+        self.titles = {}
+        self.ymaxes = {}
+
+
+    def singleHost(self, valuekeys):
         """
         Generate data for a single host only.
 
         @param valuekeys: L{DataType} keys to process
         @type valuekeys: L{list} or L{str}
         """
-        _plotHosts(valuekeys, (args.s,))
+        self._plotHosts(valuekeys, (self.single_server,))
 
 
-    def combinedHosts(valuekeys):
+    def combinedHosts(self, valuekeys):
         """
         Generate data for all hosts.
 
         @param valuekeys: L{DataType} keys to process
         @type valuekeys: L{list} or L{str}
         """
-        _plotHosts(valuekeys, None)
+        self._plotHosts(valuekeys, None)
 
 
-    def _plotHosts(valuekeys, hosts):
+    def _plotHosts(self, valuekeys, hosts):
         """
         Generate data for a the specified list of hosts.
 
@@ -463,13 +490,13 @@
         """
 
         # For each log file line, process the data for each required measurement
-        with logfile:
-            line = logfile.readline()
+        with self.logfile:
+            line = self.logfile.readline()
             ctr = 0
             while line:
-                if ctr < line_start:
+                if ctr < self.line_start:
                     ctr += 1
-                    line = logfile.readline()
+                    line = self.logfile.readline()
                     continue
 
                 if line[0] == "\x1e":
@@ -478,38 +505,40 @@
                     line = decompress(line.decode("base64"))
                 jline = json.loads(line)
 
-                x.append(ctr)
+                self.x.append(ctr)
                 ctr += 1
 
                 # Initialize the plot arrays when we know how many hosts there are
-                if len(y) == 0:
+                if len(self.y) == 0:
+                    if self.pod is None:
+                        self.pod = sorted(jline["pods"].keys())[0]
                     if hosts is None:
-                        hosts = sorted(jline["pods"][args.p].keys())
+                        hosts = sorted(jline["pods"][self.pod].keys())
                     for measurement in valuekeys:
-                        y[measurement] = []
-                        titles[measurement] = DataType.getTitle(measurement)
-                        ymaxes[measurement] = DataType.getMaxY(measurement, len(hosts))
+                        self.y[measurement] = []
+                        self.titles[measurement] = DataType.getTitle(measurement)
+                        self.ymaxes[measurement] = DataType.getMaxY(measurement, len(hosts))
 
 
                 for measurement in valuekeys:
-                    stats = jline["pods"][args.p]
+                    stats = jline["pods"][self.pod]
                     try:
-                        y[measurement].append(DataType.process(measurement, stats, hosts))
+                        self.y[measurement].append(DataType.process(measurement, stats, hosts))
                     except KeyError:
-                        y[measurement].append(None)
+                        self.y[measurement].append(None)
 
-                line = logfile.readline()
-                if ctr > line_start + line_count:
+                line = self.logfile.readline()
+                if self.line_count != -1 and ctr > self.line_start + self.line_count:
                     break
 
         # Offset data that is averaged over the previous minute
         for measurement in valuekeys:
             if DataType.skip(measurement):
-                y[measurement] = y[measurement][60:]
-                y[measurement].extend([None] * 60)
+                self.y[measurement] = self.y[measurement][60:]
+                self.y[measurement].extend([None] * 60)
 
 
-    def perHost(perhostkeys, combinedkeys):
+    def perHost(self, perhostkeys, combinedkeys):
         """
         Generate a set of per-host plots, together we a set of plots for all-
         host data.
@@ -521,13 +550,13 @@
         """
 
         # For each log file line, process the data for each required measurement
-        with logfile:
-            line = logfile.readline()
+        with self.logfile:
+            line = self.logfile.readline()
             ctr = 0
             while line:
-                if ctr < line_start:
+                if ctr < self.line_start:
                     ctr += 1
-                    line = logfile.readline()
+                    line = self.logfile.readline()
                     continue
 
                 if line[0] == "\x1e":
@@ -536,38 +565,40 @@
                     line = decompress(line.decode("base64"))
                 jline = json.loads(line)
 
-                x.append(ctr)
+                self.x.append(ctr)
                 ctr += 1
 
                 # Initialize the plot arrays when we know how many hosts there are
-                if len(y) == 0:
-                    hosts = sorted(jline["pods"][args.p].keys())
+                if len(self.y) == 0:
+                    if self.pod is None:
+                        self.pod = sorted(jline["pods"].keys())[0]
+                    hosts = sorted(jline["pods"][self.pod].keys())
 
                     for host in hosts:
                         for measurement in perhostkeys:
                             ykey = "{}={}".format(measurement, host)
-                            y[ykey] = []
-                            titles[ykey] = DataType.getTitle(measurement)
-                            ymaxes[ykey] = DataType.getMaxY(measurement, 1)
+                            self.y[ykey] = []
+                            self.titles[ykey] = DataType.getTitle(measurement)
+                            self.ymaxes[ykey] = DataType.getMaxY(measurement, 1)
 
                     for measurement in combinedkeys:
-                        y[measurement] = []
-                        titles[measurement] = DataType.getTitle(measurement)
-                        ymaxes[measurement] = DataType.getMaxY(measurement, len(hosts))
+                        self.y[measurement] = []
+                        self.titles[measurement] = DataType.getTitle(measurement)
+                        self.ymaxes[measurement] = DataType.getMaxY(measurement, len(hosts))
 
                 # Get actual measurement data
                 for host in hosts:
                     for measurement in perhostkeys:
                         ykey = "{}={}".format(measurement, host)
-                        stats = jline["pods"][args.p]
-                        y[ykey].append(DataType.process(measurement, stats, (host,)))
+                        stats = jline["pods"][self.pod]
+                        self.y[ykey].append(DataType.process(measurement, stats, (host,)))
 
                 for measurement in combinedkeys:
-                    stats = jline["pods"][args.p]
-                    y[measurement].append(DataType.process(measurement, stats, hosts))
+                    stats = jline["pods"][self.pod]
+                    self.y[measurement].append(DataType.process(measurement, stats, hosts))
 
-                line = logfile.readline()
-                if ctr > line_start + line_count:
+                line = self.logfile.readline()
+                if self.line_count != -1 and ctr > self.line_start + self.line_count:
                     break
 
         # Offset data that is averaged over the previous minute. Also determine
@@ -577,89 +608,176 @@
         for host in hosts:
             for measurement in perhostkeys:
                 ykey = "{}={}".format(measurement, host)
-                overall_ymax[measurement] = max(overall_ymax[measurement], max(y[ykey]))
+                overall_ymax[measurement] = max(overall_ymax[measurement], max(self.y[ykey]))
                 if DataType.skip(measurement):
-                    y[ykey] = y[ykey][60:]
-                    y[ykey].extend([None] * 60)
+                    self.y[ykey] = self.y[ykey][60:]
+                    self.y[ykey].extend([None] * 60)
         for host in hosts:
             for measurement in perhostkeys:
                 ykey = "{}={}".format(measurement, host)
-                ymaxes[ykey] = overall_ymax[measurement]
+                self.ymaxes[ykey] = overall_ymax[measurement]
 
         for measurement in combinedkeys:
             if DataType.skip(measurement):
-                y[measurement] = y[measurement][60:]
-                y[measurement].extend([None] * 60)
+                self.y[measurement] = self.y[measurement][60:]
+                self.y[measurement].extend([None] * 60)
 
 
-    # Data for a single host, with jobs queued detail for all hosts
-#    singleHost((
-#        CPUDataType.key,
-#        RequestsDataType.key,
-#        ResponseDataType.key,
-#        JobsCompletedDataType.key,
-#        JobQueueDataType.key + "-SCHEDULE",
-#        JobQueueDataType.key + "-PUSH",
-#        JobQueueDataType.key,
-#    ))
+    def run(self, mode, *args):
+        getattr(self, mode)(*args)
 
-    # Data aggregated for all hosts - job detail
-#    combinedHosts((
-#        CPUDataType.key,
-#        RequestsDataType.key,
-#        ResponseDataType.key,
-#        JobsCompletedDataType.key,
-#        JobQueueDataType.key + "-SCHEDULE",
-#        JobQueueDataType.key + "-PUSH",
-#        JobQueueDataType.key,
-#    ))
 
-    # Generic aggregated data for all hosts
-    combinedHosts((
-        CPUDataType.key,
-        RequestsDataType.key,
-        ResponseDataType.key,
-        JobsCompletedDataType.key,
-        JobQueueDataType.key,
-    ))
+    def plot(self):
+        # Generate a single stacked plot of the data
+        plotmax = len(self.y.keys())
+        plt.figure(figsize=(18.5, min(5 + len(self.y.keys()), 18)))
+        for plotnum, measurement in enumerate(self.y.keys()):
+            plt.subplot(len(self.y), 1, plotnum + 1)
+            plotSeries(self.titles[measurement], self.x, self.y[measurement], 0, self.ymaxes[measurement], plotnum == plotmax - 1)
+        if self.save:
+            plt.savefig(".".join((os.path.expanduser(self.logname), self.mode, "png",)), orientation="landscape", format="png")
+        if not self.noshow:
+            plt.show()
 
 
-    # Data aggregated for all hosts - method detail
-#    combinedHosts((
-#        CPUDataType.key,
-#        RequestsDataType.key,
-#        ResponseDataType.key,
-#        MethodCountDataType.key + "-PUT ics",
-#        MethodCountDataType.key + "-REPORT cal-home-sync",
-#        MethodCountDataType.key + "-PROPFIND Calendar Home",
-#        MethodCountDataType.key + "-REPORT cal-sync",
-#        MethodCountDataType.key + "-PROPFIND Calendar",
-#    ))
 
-    # Per-host CPU, and total CPU
-#    perHost((
-#        RequestsDataType.key,
-#    ), (
-#        CPUDataType.key,
-#    ))
+def main():
 
-    # Per-host job completion, and total CPU, total jobs queued
-#    perHost((
-#        JobsCompletedDataType.key,
-#    ), (
-#        CPUDataType.key,
-#        JobQueueDataType.key,
-#    ))
+    selectMode = {
+        "basic":
+            # Generic aggregated data for all hosts
+            (
+                "combinedHosts",
+                (
+                    CPUDataType.key,
+                    RequestsDataType.key,
+                    ResponseDataType.key,
+                    JobsCompletedDataType.key,
+                    JobQueueDataType.key,
+                )
+            ),
+        "basicjob":
+            # Data aggregated for all hosts - job detail
+            (
+                "combinedHosts",
+                (
+                    CPUDataType.key,
+                    RequestsDataType.key,
+                    ResponseDataType.key,
+                    JobsCompletedDataType.key,
+                    JobQueueDataType.key + "-SCHEDULE",
+                    JobQueueDataType.key + "-PUSH",
+                    JobQueueDataType.key,
+                ),
+            ),
+        "basicschedule":
+            # Data aggregated for all hosts - job detail
+            (
+                "combinedHosts",
+                (
+                    CPUDataType.key,
+                    JobsCompletedDataType.key,
+                    JobQueueDataType.key + "-SCHEDULE_ORGANIZER_WORK",
+                    JobQueueDataType.key + "-SCHEDULE_ORGANIZER_SEND_WORK",
+                    JobQueueDataType.key + "-SCHEDULE_REPLY_WORK",
+                    JobQueueDataType.key + "-SCHEDULE_AUTO_REPLY_WORK",
+                    JobQueueDataType.key + "-SCHEDULE_REFRESH_WORK",
+                    JobQueueDataType.key + "-PUSH",
+                    JobQueueDataType.key,
+                ),
+            ),
+        "basicmethod":
+            # Data aggregated for all hosts - method detail
+            (
+                "combinedHosts",
+                (
+                    CPUDataType.key,
+                    RequestsDataType.key,
+                    ResponseDataType.key,
+                    MethodCountDataType.key + "-PUT ics",
+                    MethodCountDataType.key + "-REPORT cal-home-sync",
+                    MethodCountDataType.key + "-PROPFIND Calendar Home",
+                    MethodCountDataType.key + "-REPORT cal-sync",
+                    MethodCountDataType.key + "-PROPFIND Calendar",
+                ),
+            ),
 
-    # Generate a single stacked plot of the data
-    plotmax = len(y.keys())
-    for plotnum, measurement in enumerate(y.keys()):
-        plt.subplot(len(y), 1, plotnum + 1)
-        plotSeries(titles[measurement], x, y[measurement], 0, ymaxes[measurement], plotnum == plotmax - 1)
-    plt.show()
+        "hostrequests":
+            # Per-host requests, and total requests & CPU
+            (
+                "perHost",
+                (RequestsDataType.key,),
+                (
+                    RequestsDataType.key,
+                    CPUDataType.key,
+                ),
+            ),
+        "hostcpu":
+            # Per-host CPU, and total CPU
+            (
+                "perHost",
+                (CPUDataType.key,),
+                (
+                    RequestsDataType.key,
+                    CPUDataType.key,
+                ),
+            ),
+        "hostcompleted":
+            # Per-host job completion, and total CPU, total jobs queued
+            (
+                "perHost",
+                (JobsCompletedDataType.key,),
+                (
+                    CPUDataType.key,
+                    JobQueueDataType.key,
+                ),
+            ),
+    }
 
+    parser = ArgumentParser(
+        formatter_class=MyHelpFormatter,
+        description="Dashboard time series processor.",
+        epilog="""Available modes:
 
+basic - stacked plots of total CPU, total request count, total average response
+    time, completed jobs, and job queue size.
 
+basicjob - as per basic but with queued SCHEDULE_*_WORK and queued
+    PUSH_NOTIFICATION_WORK plots.
+
+basicschedule - stacked plots of total CPU, completed jobs, each queued
+    SCHEDULE_*_WORK, queued, PUSH_NOTIFICATION_WORK, and overall job queue size.
+
+basicmethod - stacked plots of total CPU, total request count, total average
+    response time, PUT-ics, REPORT cal-home-sync, PROPFIND Calendar Home, REPORT
+    cal-sync, and PROPFIND Calendar.
+
+hostrequests = stacked plots of per-host request counts, total request count,
+    and total CPU.
+
+hostcpu = stacked plots of per-host CPU, total request count, and total CPU.
+
+hostcompleted = stacked plots of per-host completed jobs, total CPU, and job
+    queue size.
+""",
+    )
+    parser.add_argument("-l", default=SUPPRESS, required=True, help="Log file to process")
+    parser.add_argument("-p", default=SUPPRESS, help="Name of pod to analyze")
+    parser.add_argument("-s", default=SUPPRESS, help="Name of server to analyze")
+    parser.add_argument("--save", action="store_true", help="Save plot PNG image")
+    parser.add_argument("--noshow", action="store_true", help="Don't show the plot on screen")
+    parser.add_argument("--start", type=int, default=0, help="Log line to start from")
+    parser.add_argument("--count", type=int, default=-1, help="Number of log lines to process from start")
+    parser.add_argument("--mode", default="basic", choices=sorted(selectMode.keys()), help="Type of plot to produce")
+    parser.add_argument("-v", action="store_true", help="Verbose")
+    args = parser.parse_args()
+
+    calculator = Calculator(args)
+    calculator.run(*selectMode[args.mode])
+    calculator.plot()
+
+
+
 def plotSeries(title, x, y, ymin=None, ymax=None, last_subplot=True):
     """
     Plot the chosen dataset key for each scanned data file.
@@ -684,9 +802,9 @@
         plt.ylim(ymin=ymin)
     if ymax is not None:
         plt.ylim(ymax=ymax)
-    plt.minorticks_on()
+    plt.xlim(min(x), max(x))
+    plt.xticks(range(min(x), max(x) + 1, 60))
     plt.grid(True, "major", "x", alpha=0.5, linewidth=0.5)
-    plt.grid(True, "minor", "x", alpha=0.5, linewidth=0.5)
 
 if __name__ == "__main__":
     main()

Modified: CalendarServer/trunk/calendarserver/tools/dashview.py
===================================================================
--- CalendarServer/trunk/calendarserver/tools/dashview.py	2016-06-23 14:03:48 UTC (rev 15692)
+++ CalendarServer/trunk/calendarserver/tools/dashview.py	2016-06-23 16:35:07 UTC (rev 15693)
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 ##
+from argparse import HelpFormatter, SUPPRESS, OPTIONAL, ZERO_OR_MORE, \
+    ArgumentParser
 
 """
 A curses (or plain text) based dashboard for viewing various aspects of the
@@ -22,7 +24,6 @@
 
 from collections import OrderedDict
 from operator import itemgetter
-import argparse
 import collections
 import curses.panel
 import errno
@@ -41,8 +42,32 @@
 
 
 
+class MyHelpFormatter(HelpFormatter):
+    """
+    Help message formatter which adds default values to argument help and
+    retains formatting of all help text.
+    """
+
+    def _fill_text(self, text, width, indent):
+        return ''.join([indent + line for line in text.splitlines(True)])
+
+
+    def _get_help_string(self, action):
+        help = action.help
+        if '%(default)' not in action.help:
+            if action.default is not SUPPRESS:
+                defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
+                if action.option_strings or action.nargs in defaulting_nargs:
+                    help += ' (default: %(default)s)'
+        return help
+
+
+
 def main():
-    parser = argparse.ArgumentParser(description="Dashboard collector viewer service for CalendarServer.")
+    parser = ArgumentParser(
+        formatter_class=MyHelpFormatter,
+        description="Dashboard collector viewer service for CalendarServer.",
+    )
     parser.add_argument("-s", default="localhost:8200", help="Dashboard collector service host:port")
     args = parser.parse_args()
 
@@ -509,17 +534,6 @@
 
     @staticmethod
     def aggregator_jobs(serversdata):
-#        results = OrderedDict()
-#        for server_data in serversdata:
-#            for job_name, job_details in server_data.items():
-#                if job_name not in results:
-#                    results[job_name] = OrderedDict()
-#                for detail_name, detail_value in job_details.items():
-#                    if detail_name in results[job_name]:
-#                        results[job_name][detail_name] += detail_value
-#                    else:
-#                        results[job_name][detail_name] = detail_value
-#        return results
         return serversdata[0]
 
 

Added: CalendarServer/trunk/doc/Admin/Dashboard.md
===================================================================
--- CalendarServer/trunk/doc/Admin/Dashboard.md	                        (rev 0)
+++ CalendarServer/trunk/doc/Admin/Dashboard.md	2016-06-23 16:35:07 UTC (rev 15693)
@@ -0,0 +1,208 @@
+# CalendarServer Dashboard Service
+
+## Overiew
+
+The CalendarServer dashboard service is a way to visualize internal CalendarServer performance data, including HTTP, system, directory, and job queue statistics. At a high level it works as follows:
+
+1. CalendarServer collects statistics internally and makes those available via a "stats socket" that can be read from to get the data.
+2. A `dashcollect` tool is used to periodically read data from one or more CalendarServer hosts or pods and stores that data in a log file, as well as makes the data available to be read via a TCP socket.
+3. A `dashview` tool can be run in a terminal window to show the statistics via multiple tables, using the `curses` terminal protocol.
+4. A `dashtime` tool can be run to process the log file generated by `dashcollect` and display various plots of data over time.
+
+## Detail
+
+### Stats socket
+
+The CalendarServer "stats socket" needs to be enabled in the caldavd.plist in order for the dashboard service to be active. To do that, make sure the following plist key is present:
+
+    <key>Stats</key>
+    <dict>
+      <key>EnableTCPStatsSocket</key>
+      <true/>
+    </dict>
+
+The default port for the "stats socket" is 8100, and can be changed by adding a `TCPStatsPort` item to the above plist key:
+
+    <key>Stats</key>
+    <dict>
+      <key>EnableTCPStatsSocket</key>
+      <true/>
+      <key>TCPStatsPort</key>
+      <integer>8100</integer>
+    </dict>
+
+CalendarServer can also provide a unix socket to read stats from, but that is only useful when the `dashcollect` tool is always run locally.
+
+Internally CalendarServer collects the following data:
+
+1. HTTP request data is collected via the access.log entries generated by each HTTP request. Request data is collected during each wall-clock minute, then averaged over periods of 1 minute, 5 minutes, and 1 hour. In addition, a snapshot of the HTTP request handling state of each child process is generated each time the "stats socket" is read from.
+2. System statistics (CPU use, memory use, etc) are collected once per second.
+3. Job queue statistics are collected each time the "stats socket" is read from. These include a snapshot of the overall state of the job queue table, as well as per-host data on how many jobs have been completed and their average execution time on each child process.
+4. Directory statistics are collected as each directory request is executed.
+
+### `dashcollect` tool
+
+The `dashcollect` tool is a command line tool that periodically reads from one or more CalendarServer "stats sockets" and logs the resulting JSON data to a log file as well as making the most recent data available to be read over a TCP socket. The purpose of this tool is to have a single read of the "stats sockets" of a CalendarServer service, rather than having multiple tools reading from the CalendarServer service and creating additional load that could impact client-facing performance. The `dashcollect` data can be read by as many tools as needed without affecting performance. i.e., if ten people want to watch the CalendarServer performance over time, only one process is reading the `stats socket` on CalendarServer, but ten processes are reading from the `dashcollect` socket.
+
+The JSON log file produced by `dashcollect` is in the form of a [JSON text sequence](https://tools.ietf.org/html/rfc7464). In addition, each JSON data text sequence can be compressed using zip and encoded as base64 text (compression greatly reduces the size of the log file and is recommended). The JSON data read from `dashcollect` TCP socket is standard JSON text (all text is utf-8 encoded).
+
+#### Help
+
+	dashcollect.py --help
+	usage: dashcollect.py [-h] -f F -l L [-n] [-s S] [-t] [-z] [-v]
+	
+	Dashboard service for CalendarServer.
+	
+	optional arguments:
+	  -h, --help  show this help message and exit
+	  -f F        Server config file (see docstring for details)
+	  -l L        Log file directory
+	  -n          Create a new log file when starting, existing log file is
+	              deleted (default: False)
+	  -s S        Make JSON data available on the specified host:port (default:
+	              localhost:8200)
+	  -t          Rotate log files every hour, otherwise once per day (default:
+	              False)
+	  -z          zlib compress json records in log files (default: False)
+	  -v          Verbose (default: False)
+	
+	To view the docstring, run: pydoc calendarserver/tools/dashcollect.py
+
+* The `-f` option must be present and point to a config file (see below).
+* The `-l` option must be present and point to an existing directory where the log files will be written. Log file names have the prefix `dashboard` followed by a timestamp and the file extension `.log`. Log files are rotated once per day, or once per hour as governed by the `-h` option.
+* For CalendarServer services generating lots of data, the `-t` option is recommended to keep each log file to a reasonable size. Without this option there will be one log file per day (with the file name containing the date). With this option there will be one log file per hour (with the file name containing the date and hour).
+* When generating lots of data it is recommended that the `-z` option is used to compress the JSON text sequences in the log files.
+
+#### Config file
+
+The config file (specified via `-f`) is used to define the set of CalendarServer pods and hosts to read stats data from. The file contains JSON data, and example:
+
+    {
+        "title": "My CalDAV service",
+        "pods": {
+            "podA": {
+                "description": "Main pod",
+                "servers": [
+                    "podAhost1.example.com:8100",
+                    "podAhost2.example.com:8100"
+                ]
+            },
+            "podB": {
+                "description": "Development pod",
+                "servers": [
+                    "podBhost1.example.com:8100",
+                    "podBhost2.example.com:8100"
+                ]
+            }
+        }
+    }
+    
+* The `title` member is a descriptive title for the service.
+* The `pods` object contains one item for each CalendarServer pod being monitored. The names used for the object keys will appear in the logs.
+* The `description` member is a description for each pod.
+* The `servers` object is an array of `host:port` values for each host in the pod, with the port set to the TCP stats socket used by that host.
+
+### `dashview` tool
+
+The `dashview` tool is a command line tool that periodically reads from a `dashcollect` socket and displays the data in a curses-based terminal using different table views for each class of data. The user can control which tables are visible at any time. The tool can show the data for any host in a multi-pod/multi-host CalendarServer service, and in addition can show the aggregated data for all hosts in a pod. This tool typically requires a large terminal window for viewing, and the terminal will need good curses support. This tool replaces the older `dashboard` tool which read stats directly from the CalendarServer hosts and is now considered deprecated since having multiple users using it causes service performance issues.
+
+#### Help
+
+	usage: dashview.py [-h] [-s S]
+	
+	Dashboard collector viewer service for CalendarServer.
+	
+	optional arguments:
+	  -h, --help  show this help message and exit
+	  -s S        Dashboard collector service host:port (default: localhost:8200)
+
+* The `-s` option specifies the `dashcollect` service host and port where JSON data can be read from.
+
+#### Panels
+
+The visibility of each panel can be controlled via a "hotkey". In addition there are "hotkey"s that control the visibility of groups of panels:
+
+* `h` toggle display of the _Help_ (hotkeys) panel
+* `s` toggle display of the _System Status_ panel
+* `r` toggle display of the _HTTP Requests_ panel
+* `c` toggle display of the _HTTP Slots_ panel
+* `m` toggle display of the _HTTP Methods_ panel
+* `w` toggle display of the _Job Assignments_ panel
+* `j` toggle display of the _Job Activity_ panel
+* `d` toggle display of the _Directory Service_ panel
+* `H` display all of the HTTP panels only
+* `J` display all of the Jobs panels only
+* `D` display all of the Directory panels only
+* `a` display all panels
+* `n` display no panels
+* ` ` (space) toggle pause
+* `t` toggle Update Speed between 0.1 secs and 1 sec.
+* `x` toggle Aggregate Mode
+* `q` Quit
+
+Arrow keys can be used to select which pod or host data to view:
+
+* `up` & `down` - move between the list of pods
+* `left` & `right` - move between the list of hosts for the current pod
+
+### `dashtime` tool
+
+The `dashtime` tool can produce plots of `dashcollect` logged data, to show how that data changes over time. Note that the Python `matplotlib` module is a requirement (do `pip install matplotlib` if you get an import error running the tool). This tool can produce a set of vertically stacked plots that show the variation of various server stats over time. The stats can be aggregated for all hosts in a pod, or for a single host, or a combination of plots for each host and the aggregated values. Specific plot "modes" have been hard-coded to produce common sets of plots. Plots are by default displayed on screen from where they can be saved, but an option to automatically save a PNG image also exists.
+
+#### Help
+
+	dashtime.py --help
+	usage: dashtime.py [-h] -l L [-p P] [-s S] [--save] [--noshow] [--start START]
+	                   [--count COUNT]
+	                   [--mode {basic,basicjob,basicmethod,basicschedule,hostcompleted,hostcpu,hostrequests}]
+	                   [-v]
+	
+	Dashboard time series processor.
+	
+	optional arguments:
+	  -h, --help            show this help message and exit
+	  -l L                  Log file to process
+	  -p P                  Name of pod to analyze
+	  -s S                  Name of server to analyze
+	  --save                Save plot PNG image (default: False)
+	  --noshow              Don't show the plot on screen (default: False)
+	  --start START         Log line to start from (default: 0)
+	  --count COUNT         Number of log lines to process from start (default:
+	                        -1)
+	  --mode {basic,basicjob,basicmethod,basicschedule,hostcompleted,hostcpu,hostrequests}
+	                        Type of plot to produce (default: basic)
+	  -v                    Verbose (default: False)
+	
+	Available modes:
+	
+	basic - stacked plots of total CPU, total request count, total average response
+	    time, completed jobs, and job queue size.
+	
+	basicjob - as per basic but with queued SCHEDULE_*_WORK and queued
+	    PUSH_NOTIFICATION_WORK plots.
+	
+	basicschedule - stacked plots of total CPU, completed jobs, each queued
+	    SCHEDULE_*_WORK, queued, PUSH_NOTIFICATION_WORK, and overall job queue size.
+	
+	basicmethod - stacked plots of total CPU, total request count, total average
+	    response time, PUT-ics, REPORT cal-home-sync, PROPFIND Calendar Home, REPORT
+	    cal-sync, and PROPFIND Calendar.
+	
+	hostrequests = stacked plots of per-host request counts, total request count,
+	    and total CPU.
+	
+	hostcpu = stacked plots of per-host CPU, total request count, and total CPU.
+	
+	hostcompleted = stacked plots of per-host completed jobs, total CPU, and job
+	    queue size.
+
+* The `-l` option must be present and point to a `dashcollect` log file.
+* The `-p` option defines the pod to view data for (if not present the first pod - alphabetically sorted - is used).
+* The `-s` option defines a specific server to view data for (there are currently no modes that use this).
+* The `--save` option, when present, will cause a PNG image of the plots to be saved to disk. The image file has the same name as the log file, but with the mode name and a `.png` suffix appended, and it will be created in the same directory as the log file.
+* The `--noshow` option, when present, supresses display of the plots on screen.
+* The `--start` option specifies which line in the `dashcollect` log file to start reading from (default is the first line).
+* The `--count` option specifies the maximum number of lines to read from the start (default is all lines after the start).
+* The `--mode` option determines the type of data produced in the plots. Each mode is described in the help text above.
+
+Note that the time scale on the plots is typically one second, as that is the polling period used by `dashcollect`. The HTTP-related data comes from one minute averages, so will look "blocky" compared to the once per-second values of the other stats. The one minute average data is shifted lower by 60 seconds to better match it to the time over which the data was actually collected. 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20160623/8200e307/attachment-0001.html>