[CalendarServer-changes] [6387] CalendarServer/trunk/contrib/tools/anonymous_log.py

source_changes at macosforge.org source_changes at macosforge.org
Wed Sep 29 11:40:58 PDT 2010


Revision: 6387
          http://trac.macosforge.org/projects/calendarserver/changeset/6387
Author:   cdaboo at apple.com
Date:     2010-09-29 11:40:57 -0700 (Wed, 29 Sep 2010)
Log Message:
-----------
Tool to anonymize an access log.

Added Paths:
-----------
    CalendarServer/trunk/contrib/tools/anonymous_log.py

Added: CalendarServer/trunk/contrib/tools/anonymous_log.py
===================================================================
--- CalendarServer/trunk/contrib/tools/anonymous_log.py	                        (rev 0)
+++ CalendarServer/trunk/contrib/tools/anonymous_log.py	2010-09-29 18:40:57 UTC (rev 6387)
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+##
+# Copyright (c) 2010 Apple Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##
+
+from gzip import GzipFile
+import getopt
+import os
+import sys
+import traceback
+
+class CalendarServerLogAnalyzer(object):
+    
+    def __init__(self):
+        
+        self.userCtr = 1
+        self.users = {}
+
+        self.guidCtr = 1
+        self.guids = {}
+
+        self.resourceCtr = 1
+        self.resources = {}
+
+    def anonymizeLogFile(self, logFilePath):
+        
+        fpath = os.path.expanduser(logFilePath)
+        if fpath.endswith(".gz"):
+            f = GzipFile(fpath)
+        else:
+            f = open(fpath)
+            
+        try:
+            for line in f:
+                
+                if not line.startswith("Log"):
+                    line = self.anonymizeLine(line)
+                print line,
+        
+        except Exception, e:
+            print "Exception: %s for %s" % (e, line,)
+            raise
+
+    def anonymizeLine(self, line):
+
+        
+        startPos = line.find("- ")
+        endPos = line.find(" [")
+        userid = line[startPos+2:endPos]
+        
+        if userid != "-":
+            if userid not in self.users:
+                self.users[userid] = "user%05d" % (self.userCtr,)
+                self.userCtr += 1
+            line = line[:startPos+2] + self.users[userid] + line[endPos:]
+            endPos = line.find(" [")
+        
+        startPos = endPos + 1
+    
+        startPos = line.find(']', startPos + 21) + 3
+        endPos = line.find(' ', startPos)
+        if line[startPos] != '?':
+            
+            startPos = endPos + 1
+            endPos = line.find(" HTTP/", startPos)
+            uri = line[startPos:endPos]
+            
+            splits = uri.split("/")
+            if len(splits) >= 4:
+                if splits[1] in ("calendars", "principals"):
+
+                    if splits[3] not in self.guids:
+                        self.guids[splits[3]] = "guid%05d" % (self.guidCtr,)
+                        self.guidCtr += 1
+                    splits[3] = self.guids[splits[3]]
+
+                    if len(splits) > 4:
+                        if splits[4] not in ("", "calendar", "inbox", "outbox", "dropbox"):
+                            if splits[4] not in self.resources:
+                                self.resources[splits[4]] = "resource%d" % (self.resourceCtr,)
+                                self.resourceCtr += 1
+                            splits[4] = self.resources[splits[4]]
+                        
+                    if len(splits) > 5:
+                        for x in range(5, len(splits)):
+                            if splits[x]:
+                                if splits[x] not in self.resources:
+                                    self.resources[splits[x]] = "resource%d%s" % (self.resourceCtr, os.path.splitext(splits[x])[1])
+                                    self.resourceCtr += 1
+                                splits[x] = self.resources[splits[x]]
+                                
+                        
+                    line = line[:startPos] + "/".join(splits) + line[endPos:]
+    
+        return line
+
+def usage(error_msg=None):
+    if error_msg:
+        print error_msg
+
+    print """Usage: anonymous_log [options] [FILE]
+Options:
+    -h            Print this help and exit
+
+Arguments:
+    FILE      File names for the access logs to anonymize
+
+Description:
+    This utility will anonymize the content of an access log.
+
+"""
+
+    if error_msg:
+        raise ValueError(error_msg)
+    else:
+        sys.exit(0)
+
+if __name__ == "__main__":
+
+    try:
+
+        options, args = getopt.getopt(sys.argv[1:], "h", [])
+
+        for option, value in options:
+            if option == "-h":
+                usage()
+            else:
+                usage("Unrecognized option: %s" % (option,))
+
+        # Process arguments
+        if len(args) == 0:
+            args = ("/var/log/caldavd/access.log",)
+
+        pwd = os.getcwd()
+
+        analyzers = []
+        for arg in args:
+            arg = os.path.expanduser(arg)
+            if not arg.startswith("/"):
+                arg = os.path.join(pwd, arg)
+            if arg.endswith("/"):
+                arg = arg[:-1]
+            if not os.path.exists(arg):
+                print "Path does not exist: '%s'. Ignoring." % (arg,)
+                continue
+
+            CalendarServerLogAnalyzer().anonymizeLogFile(arg)
+
+    except Exception, e:
+        sys.exit(str(e))
+        print traceback.print_exc()


Property changes on: CalendarServer/trunk/contrib/tools/anonymous_log.py
___________________________________________________________________
Added: svn:executable
   + *
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/calendarserver-changes/attachments/20100929/11065b88/attachment.html>


More information about the calendarserver-changes mailing list