[CalendarServer-changes] [15678] CalendarServer/trunk/calendarserver/tools/dashtime.py

Thu Jun 16 13:34:40 PDT 2016

Revision: 15678
          http://trac.calendarserver.org//changeset/15678
Author:   cdaboo at apple.com
Date:     2016-06-16 13:34:40 -0700 (Thu, 16 Jun 2016)
Log Message:
-----------
Add tool to do time-based plots of various dashboard stats.

Added Paths:
-----------
    CalendarServer/trunk/calendarserver/tools/dashtime.py

Added: CalendarServer/trunk/calendarserver/tools/dashtime.py
===================================================================

--- CalendarServer/trunk/calendarserver/tools/dashtime.py	                        (rev 0)
+++ CalendarServer/trunk/calendarserver/tools/dashtime.py	2016-06-16 20:34:40 UTC (rev 15678)
@@ -0,0 +1,551 @@
+#!/usr/bin/env python
+##
+# Copyright (c) 2015-2016 Apple Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##
+"""
+Tool that extracts time series data from a dashcollect log.
+"""
+
+from bz2 import BZ2File
+from collections import OrderedDict, defaultdict
+import argparse
+import json
+import matplotlib.pyplot as plt
+import operator
+import os
+
+
+verbose = False
+def _verbose(log):
+    if verbose:
+        print(log)
+
+
+
+def safeDivision(value, total, factor=1):
+    return value * factor / total if total else 0
+
+
+
+class DataType(object):
+    """
+    Base class for object that can process the different types of data in a
+    dashcollect log.
+    """
+
+    allTypes = OrderedDict()
+    key = ""
+
+    # This indicates whether the class of data is based on a 1 minute average -
+    # which means the data represents a 60 second delay compared to the "real-
+    # time" value. If it is the average then setting this flag will cause the
+    # first 60 data items to be skipped.
+    skip60 = False
+
+
+    @staticmethod
+    def getTitle(measurement):
+        if "-" in measurement:
+            measurement, item = measurement.split("-", 1)
+        else:
+            item = ""
+        return DataType.allTypes[measurement].title(item)
+
+
+    @staticmethod
+    def skip(measurement):
+        if "-" in measurement:
+            measurement = measurement.split("-", 1)[0]
+        return DataType.allTypes[measurement].skip60
+
+
+    @staticmethod
+    def process(measurement, stats, host):
+        if "-" in measurement:
+            measurement, item = measurement.split("-", 1)
+        else:
+            item = ""
+        return DataType.allTypes[measurement].calculate(stats, item, host)
+
+
+    @staticmethod
+    def title(item):
+        raise NotImplementedError
+
+
+    @staticmethod
+    def maxY(stats, item):
+        raise NotImplementedError
+
+
+    @staticmethod
+    def calculate(stats, item, hosts):
+        """
+        If hosts is L{None} then data from all hosts will be aggregated.
+
+        @param stats: per-Pod L{dict} of data from each host in the pod.
+        @type stats: L{dict}
+        @param item: additional L{dict} key for data of interest
+        @type item: L{str}
+        @param hosts: list of hosts to process
+        @type hosts: L{list}
+        """
+        raise NotImplementedError
+
+
+
+class CPUDataType(DataType):
+    """
+    CPU use.
+    """
+
+    key = "cpu"
+
+    @staticmethod
+    def title(item):
+        return "CPU Use %"
+
+
+    @staticmethod
+    def maxY(stats, item, host):
+        return 100 * len(stats) if host is None else 100
+
+
+    @staticmethod
+    def calculate(stats, item, hosts):
+        if hosts is None:
+            hosts = stats.keys()
+        return sum([stats[onehost]["stats_system"]["cpu use"] for onehost in hosts])
+
+
+
+class RequestsDataType(DataType):
+    """
+    Number of requests.
+    """
+
+    key = "reqs"
+    skip60 = True
+
+    @staticmethod
+    def title(item):
+        return "Requests/sec"
+
+
+    @staticmethod
+    def maxY(stats, item, host):
+        return None
+
+
+    @staticmethod
+    def calculate(stats, item, hosts):
+        if hosts is None:
+            hosts = stats.keys()
+        return sum([stats[onehost]["stats"]["1m"]["requests"] for onehost in hosts]) / 60.0
+
+
+
+class ResponseDataType(DataType):
+    """
+    Average response time.
+    """
+
+    key = "respt"
+    skip60 = True
+
+    @staticmethod
+    def title(item):
+        return "Av. Response Time (ms)"
+
+
+    @staticmethod
+    def maxY(stats, item, host):
+        return None
+
+
+    @staticmethod
+    def calculate(stats, item, hosts):
+        if hosts is None:
+            hosts = stats.keys()
+        tsum = sum([stats[onehost]["stats"]["1m"]["t"] for onehost in hosts])
+        rsum = sum([stats[onehost]["stats"]["1m"]["requests"] for onehost in hosts])
+        return safeDivision(tsum, rsum)
+
+
+
+class JobsCompletedDataType(DataType):
+    """
+    Job completion count from job assignments.
+    """
+
+    key = "jcomp"
+
+    lastCompleted = defaultdict(int)
+
+    @staticmethod
+    def title(item):
+        return "Completed"
+
+
+    @staticmethod
+    def maxY(stats, item, host):
+        return None
+
+
+    @staticmethod
+    def calculate(stats, item, hosts):
+        if hosts is None:
+            hosts = stats.keys()
+        result = 0
+        for onehost in hosts:
+            completed = sum(map(operator.itemgetter(2), stats[onehost]["job_assignments"]["workers"]))
+            result += completed - JobsCompletedDataType.lastCompleted[onehost] if JobsCompletedDataType.lastCompleted[onehost] else 0
+            JobsCompletedDataType.lastCompleted[onehost] = completed
+        return result
+
+
+
+class MethodCountDataType(DataType):
+    """
+    Count of specified methods. L{item} should be set to the full name of the
+    "decorated" method seen in dashview.
+    """
+
+    key = "methodc"
+    skip60 = True
+
+    @staticmethod
+    def title(item):
+        return item
+
+
+    @staticmethod
+    def maxY(stats, item, host):
+        return None
+
+
+    @staticmethod
+    def calculate(stats, item, hosts):
+        if hosts is None:
+            hosts = stats.keys()
+        return sum([stats[onehost]["stats"]["1m"]["method"].get(item, 0) for onehost in hosts])
+
+
+
+class MethodResponseDataType(DataType):
+    """
+    Average response time of specified methods. L{item} should be set to the
+    full name of the "decorated" method seen in dashview.
+    """
+
+    key = "methodr"
+    skip60 = True
+
+    @staticmethod
+    def title(item):
+        return item
+
+
+    @staticmethod
+    def maxY(stats, item, host):
+        return None
+
+
+    @staticmethod
+    def calculate(stats, item, hosts):
+        if hosts is None:
+            hosts = stats.keys()
+        tsum = sum([stats[onehost]["stats"]["1m"]["method-t"].get(item, 0) for onehost in hosts])
+        rsum = sum([stats[onehost]["stats"]["1m"]["method"].get(item, 0) for onehost in hosts])
+        return safeDivision(tsum, rsum)
+
+
+
+class JobQueueDataType(DataType):
+    """
+    Count of queued job items. L{item} should be set to the full name or prefix
+    of job types to process. Or if set to L{None}, all jobs are counted.
+    """
+
+    key = "jqueue"
+
+    @staticmethod
+    def title(item):
+        return ("JQ " + "_".join(map(operator.itemgetter(0), item.split("_")))) if item else "Jobs Queued"
+
+
+    @staticmethod
+    def maxY(stats, item, host):
+        return None
+
+
+    @staticmethod
+    def calculate(stats, item, hosts):
+        onehost = sorted(stats.keys())[0]
+
+        if item:
+            return sum(map(operator.itemgetter("queued"), {k: v for k, v in stats[onehost]["jobs"].items() if k.startswith(item)}.values()))
+        else:
+            return sum(map(operator.itemgetter("queued"), stats[onehost]["jobs"].values()))
+
+
+# Register the known L{DataType}s
+for dtype in DataType.__subclasses__():
+    DataType.allTypes[dtype.key] = dtype
+
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Dashboard time series processor.",
+        epilog="cpu - CPU use\nreqs - requests per second\nrespt - average response time",
+    )
+    parser.add_argument("-l", help="Log file to process")
+    parser.add_argument("-p", help="Name of pod to analyze")
+    parser.add_argument("-s", help="Name of server to analyze")
+    parser.add_argument("-v", action="store_true", help="Verbose")
+    args = parser.parse_args()
+    if args.v:
+        global verbose
+        verbose = True
+
+    # Get the log file
+    try:
+        if args.l.endswith(".bz2"):
+            logfile = BZ2File(os.path.expanduser(args.l))
+        else:
+            logfile = open(os.path.expanduser(args.l))
+    except:
+        print("Failed to open logfile {}".format(args.l))
+
+    # Start/end lines in log file to process
+    line_start = 0
+    line_count = 10000
+
+    # Plot arrays that will be generated
+    x = []
+    y = OrderedDict()
+    titles = {}
+    ymaxes = {}
+
+    def singleHost(valuekeys):
+        """
+        Generate data for a single host only.
+
+        @param valuekeys: L{DataType} keys to process
+        @type valuekeys: L{list} or L{str}
+        """
+        _plotHosts(valuekeys, (args.s,))
+
+
+    def combinedHosts(valuekeys):
+        """
+        Generate data for all hosts.
+
+        @param valuekeys: L{DataType} keys to process
+        @type valuekeys: L{list} or L{str}
+        """
+        _plotHosts(valuekeys, None)
+
+
+    def _plotHosts(valuekeys, hosts):
+        """
+        Generate data for a the specified list of hosts.
+
+        @param valuekeys: L{DataType} keys to process
+        @type valuekeys: L{list} or L{str}
+        @param hosts: lists of hosts to process
+        @type hosts: L{list} or L{str}
+        """
+
+        # Initialize the plot arrays
+        for measurement in valuekeys:
+            y[measurement] = []
+            titles[measurement] = DataType.getTitle(measurement)
+            ymaxes[measurement] = None
+
+        # For each log file line, process the data for each required measurement
+        with logfile:
+            line = logfile.readline()
+            ctr = 0
+            while line:
+                if ctr < line_start:
+                    ctr += 1
+                    line = logfile.readline()
+                    continue
+
+                if line[0] == "\x1e":
+                    line = line[1:]
+                jline = json.loads(line)
+
+                x.append(ctr)
+                ctr += 1
+
+                for measurement in valuekeys:
+                    stats = jline["pods"][args.p]
+                    y[measurement].append(DataType.process(measurement, stats, hosts))
+
+                line = logfile.readline()
+                if ctr > line_start + line_count:
+                    break
+
+        # Offset data that is averaged over the previous minute
+        for measurement in valuekeys:
+            if DataType.skip(measurement):
+                y[measurement] = y[measurement][60:]
+                y[measurement].extend([None] * 60)
+
+
+    def perHost(perhostkeys, combinedkeys):
+        """
+        Generate a set of per-host plots, together we a set of plots for all-
+        host data.
+
+        @param perhostkeys: L{DataType} keys for per-host data to process
+        @type perhostkeys: L{list} or L{str}
+        @param combinedkeys: L{DataType} keys for all-host data to process
+        @type combinedkeys: L{list} or L{str}
+        """
+
+        # For each log file line, process the data for each required measurement
+        with logfile:
+            line = logfile.readline()
+            ctr = 0
+            while line:
+                if ctr < line_start:
+                    ctr += 1
+                    line = logfile.readline()
+                    continue
+
+                if line[0] == "\x1e":
+                    line = line[1:]
+                jline = json.loads(line)
+
+                x.append(ctr)
+                ctr += 1
+
+                # Initialize the plot arrays when we know how many hosts there are
+                if len(y) == 0:
+                    hosts = sorted(jline["pods"][args.p].keys())
+
+                    for host in hosts:
+                        for measurement in perhostkeys:
+                            ykey = "{}={}".format(measurement, host)
+                            y[ykey] = []
+                            titles[ykey] = DataType.getTitle(measurement)
+                            ymaxes[ykey] = None
+
+                    for measurement in combinedkeys:
+                        y[measurement] = []
+                        titles[measurement] = DataType.getTitle(measurement)
+                        ymaxes[measurement] = None
+
+                # Get actual measurement data
+                for host in hosts:
+                    for measurement in perhostkeys:
+                        ykey = "{}={}".format(measurement, host)
+                        stats = jline["pods"][args.p]
+                        y[ykey].append(DataType.process(measurement, stats, (host,)))
+
+                for measurement in combinedkeys:
+                    stats = jline["pods"][args.p]
+                    y[measurement].append(DataType.process(measurement, stats, hosts))
+
+                line = logfile.readline()
+                if ctr > line_start + line_count:
+                    break
+
+        # Offset data that is averaged over the previous minute
+        for host in hosts:
+            for measurement in perhostkeys:
+                if DataType.skip(measurement):
+                    ykey = "{}={}".format(measurement, host)
+                    y[ykey] = y[ykey][60:]
+                    y[ykey].extend([None] * 60)
+        for measurement in combinedkeys:
+            if DataType.skip(measurement):
+                y[measurement] = y[measurement][60:]
+                y[measurement].extend([None] * 60)
+
+
+#    singleHost((
+#        CPUDataType.key,
+#        RequestsDataType.key,
+#        ResponseDataType.key,
+#        JobsCompletedDataType.key,
+#        JobQueueDataType.key + "-SCHEDULE",
+#        JobQueueDataType.key + "-PUSH",
+#        JobQueueDataType.key,
+#    ))
+#    combinedHosts((
+#        CPUDataType.key,
+#        RequestsDataType.key,
+#        ResponseDataType.key,
+#        JobsCompletedDataType.key,
+#        JobQueueDataType.key + "-SCHEDULE",
+#        JobQueueDataType.key + "-PUSH",
+#        JobQueueDataType.key,
+#    ))
+    combinedHosts((
+        CPUDataType.key,
+        RequestsDataType.key,
+        ResponseDataType.key,
+        MethodCountDataType.key + "-PUT ics",
+        MethodCountDataType.key + "-REPORT cal-home-sync",
+        MethodCountDataType.key + "-PROPFIND Calendar Home",
+        MethodCountDataType.key + "-REPORT cal-sync",
+        MethodCountDataType.key + "-PROPFIND Calendar",
+    ))
+#    perHost((
+#        RequestsDataType.key,
+#    ), (
+#        CPUDataType.key,
+#        JobQueueDataType.key,
+#    ))
+
+    # Generate a single stacked plot of the data
+    for plotnum, measurement in enumerate(y.keys()):
+        plt.subplot(len(y), 1, plotnum + 1)
+        plotSeries(titles[measurement], x, y[measurement], 0, ymaxes[measurement])
+    plt.show()
+
+
+
+def plotSeries(title, x, y, ymin=None, ymax=None):
+    """
+    Plot the chosen dataset key for each scanned data file.
+
+    @param key: data set key to use
+    @type key: L{str}
+    @param ymin: minimum value for y-axis or L{None} for default
+    @type ymin: L{int} or L{float}
+    @param ymax: maximum value for y-axis or L{None} for default
+    @type ymax: L{int} or L{float}
+    """
+
+    plt.plot(x, y)
+
+    plt.xlabel("Time")
+    plt.ylabel(title)
+    if ymin is not None:
+        plt.ylim(ymin=ymin)
+    if ymax is not None:
+        plt.ylim(ymax=ymax)
+    plt.minorticks_on()
+    plt.grid(True, "major", "x", alpha=0.5, linewidth=0.5)
+    plt.grid(True, "minor", "x", alpha=0.5, linewidth=0.5)
+
+if __name__ == "__main__":
+    main()


Property changes on: CalendarServer/trunk/calendarserver/tools/dashtime.py
___________________________________________________________________
Added: svn:executable
   + *
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20160616/5693f8cb/attachment-0001.html>