From eb599134445201fb4a5afe5ebc1573c890a06a3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sven=20M=C3=A4der?= <maeder@phys.ethz.ch>
Date: Tue, 17 Apr 2018 12:02:30 +0200
Subject: [PATCH] Add modified version for access logging

---
 bin/accesslogcat.py   | 201 ++++++++++++++++++++++++
 bin/accesslogparse.py | 350 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 551 insertions(+)
 create mode 100755 bin/accesslogcat.py
 create mode 100755 bin/accesslogparse.py

diff --git a/bin/accesslogcat.py b/bin/accesslogcat.py
new file mode 100755
index 0000000..c004692
--- /dev/null
+++ b/bin/accesslogcat.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+
+#import os
+import sys
+import time
+import subprocess
+import select
+import pprint
+import json
+import argparse
+import lib_path
+import lib
+import dphysldap
+import tabulate
+
+#SLEEP_INTERVAL = 1.0
+update_every = 0.001
+log_path = '/var/log/ldap/'
+log_file_deltalog = log_path + 'delta.log'
+indent = 16
+
+FMT = 'plain'
+tabulate.PRESERVE_WHITESPACE = True
+
+REQ_ATTRS = frozenset({
+    'reqAuthzID',
+    'reqEntryUUID',
+    'entryCSN'
+})
+
+NODES = {
+    0: 'phd-aa1',
+    1: 'phd-aa2',
+    2: 'phd-aa3'
+}
+
+
+def sleep(start_time):
+    """After hard work, take a nap for the rest of the second"""
+    current_time = time.perf_counter()
+    elapsed_time = current_time - start_time
+    sleep_time = update_every - elapsed_time
+
+    if sleep_time > 0:
+        time.sleep(sleep_time)
+
+
+def parse(line):
+    """Parse line and print in pretty format"""
+    req_json = line.split(' >>> ', maxsplit=1)[1]
+    req = json.loads(req_json)
+
+    sid =  int(req['attributes']['entryCSN'][0].split('#')[2])
+
+    print()
+    print('{0:<{indent}}{1}'.format('req:', req['dn'], indent=indent))
+    print('{0:<{indent}}{1}'.format('node:', NODES[sid], indent=indent))
+    print('{0:<{indent}}{1}'.format('type:', req['attributes']['reqType'][0], indent=indent))
+    if 'reqDN' in req['attributes']:
+        print('{0:<{indent}}{1}'.format('entry:', req['attributes']['reqDN'][0], indent=indent))
+
+    if 'reqMod' in req['attributes']:
+        table = list()
+
+        for req_mod in req['attributes']['reqMod']:
+            row = [ ' ' * (indent - 2) ]
+            row.extend(req_mod.split(' ', maxsplit=1))
+            table.append(row)
+
+        print('modfications:')
+        print(tabulate.tabulate(table, tablefmt=FMT))
+
+        del req['attributes']['reqMod']
+
+    entry = dphysldap.Entry(req['attributes'])
+    table = list()
+    
+    for key, value in entry.items():
+        row = [ ' ' * (indent - 2), ''.join([key, ':']), str(value) ]
+        table.append(row)
+        #if key in REQ_ATTRS:
+        #    row = [ ' ' * (indent - 2), ''.join([key, ':']), str(value) ]
+        #    table.append(row)
+
+    print('req attrs:')
+    print(tabulate.tabulate(table, tablefmt=FMT))
+
+    print()
+    print('------------------------------------------------------------------------------------------')
+
+
+def get_input_method(arg):
+    """Determine the input method"""
+    if arg['follow']:
+        if arg['file']:
+            tail_file(arg['file'])
+        else:
+            tail_file(log_file_deltalog)
+    else:
+        if arg['file']:
+            read_file(arg['file'])
+        else:
+            read_stdin()
+
+
+def tail_file(log_file):
+    """Open the file in tail mode"""
+    fin = subprocess.Popen(['tail', '-F', log_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    poll = select.poll()
+    poll.register(fin.stdout)
+
+    try:
+        while True:
+            start_time = time.perf_counter()
+
+            if poll.poll(1):
+                line = fin.stdout.readline().decode('utf-8')
+
+                parse(line)
+
+            sleep(start_time)
+
+    except KeyboardInterrupt:
+        sys.stdout.flush()
+        pass
+
+
+#def tail_file2(log_file):
+#    """Open the file in tail mode"""
+#    with open(log_file, 'r') as fin:
+#        fin.seek(0, os.SEEK_END)
+#
+#        while True:
+#            where = fin.tell()
+#            line = fin.readline()
+#
+#            if not line:
+#                time.sleep(SLEEP_INTERVAL)
+#                fin.seek(where)
+#            else:
+#                parse(line)
+
+
+def read_file(log_file):
+    """Read the whole file"""
+    with open(log_file, 'r') as fin:
+        while True:
+            line = fin.readline()
+
+            if not line:
+                break
+
+            parse(line)
+
+
+def read_stdin():
+    """Read from stdin"""
+    fin = sys.stdin
+
+    try:
+        while True:
+            line = fin.readline()
+
+            if not line:
+                break
+
+            parse(line)
+
+    except KeyboardInterrupt:
+        sys.stdout.flush()
+        pass
+
+
+def main():
+    """Open file in non blocking mode and parse json"""
+    parser = argparse.ArgumentParser(add_help=False, description='Parse deltalog from file or stdin')
+    parser.add_argument(
+        '-f', '--follow',
+        dest='follow', action='store_const', const=True,
+        help='Output appended data as the file grows'
+    )
+    parser.add_argument(
+        'file',
+        nargs='?', type=str,
+        help='File to open'
+    )
+    parser.add_argument(
+        '-h', '--help',
+        action='help',
+        help='Show this help message and exit'
+    )
+    arg = vars(parser.parse_args())
+
+    get_input_method(arg)
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
+    sys.exit(0)
diff --git a/bin/accesslogparse.py b/bin/accesslogparse.py
new file mode 100755
index 0000000..4c4ee59
--- /dev/null
+++ b/bin/accesslogparse.py
@@ -0,0 +1,350 @@
+#!/usr/bin/env python3
+
+import sys
+import fcntl
+import time
+import datetime
+import re
+import pprint
+import logging
+import json
+import mmap
+import base64
+import lib_path
+import lib
+import dphysldap
+
+# security warning: using DEBUG may log sensitive data
+# log levels: CRITICAL | ERROR | WARNING | INFO | DEBUG | NOTSET
+# default:    WARNING
+log_level = logging.DEBUG
+
+script_name = 'accesslogparse'
+deltalog_name = 'access'
+log_path = '/var/log/ldap/'
+update_every = 1
+lock_file = '/var/run/' + script_name + '.lock'
+log_datefmt = '%Y-%m-%d %H:%M:%S'
+log_file_log = log_path + script_name + '.log'
+log_file_deltalog = log_path + deltalog_name + '.log'
+search_base = 'cn=readlog'
+object_classes = '(objectClass=auditObject)'
+log = None
+deltalog = None
+
+REQ_WRITES = frozenset({
+    'add',
+    'delete',
+    'modify',
+    'modrdn'
+})
+
+re_skip = (r'^('
+    r'krbExtraData|'
+    r'krbLastSuccessfulAuth|'
+    r'krbLastFailedAuth|'
+    r'krbLoginFailedCount|'
+    r'heartbeatTimestamp|'
+    r'modifyTimestamp|'
+    r'modifiersName|'
+    r'lastUse.*?|'
+    r'(context|entry)CSN'
+    r'):')
+
+re_filter = (r'^('
+    r'heartbeatTimestamp|'
+    r'lastUse.*?|'
+    r'(context|entry)CSN'
+    r'):')
+
+re_redacted = (r'^('
+    r'krbPrincipalKey|'
+    r'krbMKey|'
+    r'krbExtraData|'
+    r'krbPwdHistory|'
+    r'preBlockPassword|'
+    r'userPassword'
+    r'):')
+
+re_attribute = r'^(?P<key>[^:]+?)(?P<delim>:[-+=#] ?)(?P<value>.*)'
+
+rgx_skip = re.compile(re_skip.encode('utf-8'))
+rgx_filter = re.compile(re_filter.encode('utf-8'))
+rgx_redacted = re.compile(re_redacted.encode('utf-8'))
+rgx_attribute = re.compile(re_attribute.encode('utf-8'))
+rgx_byte = None
+
+
+def lock(lock_file):
+    """Try to get an exclusive lock on the lockfile (only one instance allowed)"""
+    flock = open(lock_file, 'w')
+
+    try:
+        fcntl.lockf(flock, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        return flock
+    except (IOError, OSError):
+        print('error: another instance is running')
+        sys.exit(1)
+
+
+def unlock(lock):
+    """Release the lock"""
+    fcntl.flock(lock, fcntl.LOCK_UN)
+
+
+def init_log():
+    """Setup logging to two logfiles"""
+    global log, deltalog
+
+    log = logging.getLogger('log')
+    log.setLevel(log_level)
+    log_fh = logging.FileHandler(log_file_log)
+    log_fh.setLevel(log_level)
+    log_formatter = logging.Formatter(fmt='{asctime} {levelname}: {message}', style='{', datefmt=log_datefmt)
+    log_fh.setFormatter(log_formatter)
+    log.addHandler(log_fh)
+    
+    deltalog = logging.getLogger('accesslog')
+    deltalog.setLevel(logging.INFO)
+    deltalog_fh = logging.FileHandler(log_file_deltalog)
+    deltalog_fh.setLevel(logging.INFO)
+    deltalog_formatter = logging.Formatter(fmt='{asctime}: {message}', style='{', datefmt=log_datefmt)
+    deltalog_fh.setFormatter(deltalog_formatter)
+    deltalog.addHandler(deltalog_fh)
+
+
+def get_last_line(fname):
+    """Using mmap to return a copy of the last line of a file"""
+    with open(fname) as source:
+        mapping = mmap.mmap(source.fileno(), 0, prot=mmap.PROT_READ)
+    return mapping[mapping.rfind(b'\n', 0, -1)+1:]
+
+
+def is_skipped(entry):
+    """Check if at least on of the reqMod attributes is interesting"""
+    for attribute in entry['raw_attributes']['reqMod']:
+        try:
+            if not rgx_skip.search(attribute):
+                log.debug('interesting attribute: {}'.format(attribute))
+                return False
+        except:
+            log.exception('error: while checking if skipped: {}'.format(attribute))
+            return False
+
+    return True
+
+
+def split_attribute(attribute):
+    """Split up LDAP attribute"""
+    match = rgx_attribute.match(attribute)
+
+    key = match.group('key')
+    delim = match.group('delim')
+    value = match.group('value')
+
+    return key, delim, value
+
+
+def filtered_req_mod(req_mods):
+    """
+    Filter and decode reqMod attributes: 
+    - not of interest: remove
+    - sensitive: redact
+    - byte values: encode base64
+    """
+    filtered_req_mods = list()
+
+    for index, attribute in enumerate(req_mods):
+        try:
+            if rgx_filter and rgx_filter.search(attribute):
+                log.debug('filter attribute: {}'.format(attribute))
+                continue
+
+            elif rgx_redacted and rgx_redacted.search(attribute):
+                key, delim, value = split_attribute(attribute)
+                if value:
+                    value = b'[REDACTED]'
+                    attribute = delim.join([key, value])
+                    log.debug('redact: {}'.format(attribute))
+
+            elif rgx_byte and rgx_byte.search(attribute):
+                key, delim, value = split_attribute(attribute)
+                if value:
+                    value = base64.b64encode(value)
+                    attribute = delim.join([key, value])
+                    log.debug('bytes attribute: {}'.format(attribute))
+
+            filtered_req_mods.append(attribute.decode('utf-8'))
+
+        except:
+            log.exception('error: while filtering: {}'.format(attribute))
+
+    return filtered_req_mods
+
+
+def log_action(action, entry):
+    """Log an action on a req"""
+    req_type = entry['attributes']['reqType'][0]
+
+    if req_type in REQ_WRITES:
+        req_dn = entry['attributes']['reqDN'][0]
+
+        if not action == 'skip':
+            log.debug('entry:')
+            log.debug(pprint.pformat(entry, indent=1))
+            log.info('{}: {} {}'.format(action, req_type, req_dn))
+        else:
+            log.debug('{}: {} {}'.format(action, req_type, req_dn))
+    else:
+        req_session = entry['attributes']['reqSession'][0]
+
+        log.debug('entry:')
+        log.debug(pprint.pformat(entry, indent=1))
+        log.info('{}: {} {}'.format(action, req_type, req_session))
+
+
+def write(entry):
+    """Write a req to file"""
+    req_type = entry['attributes']['reqType'][0]
+
+    entry['attributes'] = dict(entry['attributes'])
+
+    if 'reqMod' in entry['attributes']:
+        entry['attributes']['reqMod'] = filtered_req_mod(entry['raw_attributes']['reqMod'])
+
+    del entry['raw_dn']
+    del entry['raw_attributes']
+
+    log_action('write', entry)
+
+    if req_type in REQ_WRITES:
+        req_dn = entry['attributes']['reqDN'][0]
+
+        try:
+            deltalog.info('{} {} >>> {}'.format(req_type, req_dn, json.dumps(entry)))
+        except:
+            log.exception('error: writing entry: {} >>> {}: {}'.format(entry['dn'], req_type, req_dn))
+    else:
+        req_session = entry['attributes']['reqSession'][0]
+
+        try:
+            deltalog.info('{} {} >>> {}'.format(req_type, req_session, json.dumps(entry)))
+        except:
+            log.exception('error: writing entry: {} >>> {}: {}'.format(entry['dn'], req_session, req_dn))
+
+
+def sleep(start_time):
+    """After hard work, take a nap for the rest of the second"""
+    current_time = time.perf_counter()
+    elapsed_time = current_time - start_time
+    sleep_time = update_every - elapsed_time
+
+    log.debug('runtime {0:.3f}s, sleeping {1:.3f}s'.format(elapsed_time, sleep_time))
+
+    if sleep_time > 0:
+        time.sleep(sleep_time)
+
+
+def connect(slapd):
+    """Connect to slapd or reconnect forever"""
+    try:
+        slapd.connect()
+    except:
+        log.exception('error: failed to connect! trying to reconnect...')
+        slapd.connect_retry()
+        log.error('connected to slapd')
+
+
+def get_byte_attributes():
+    """Get all byte type attributes"""
+    global rgx_byte
+
+    log.debug('connecting to slapd to retrieve schema...')
+    slapd = dphysldap.Slapd(get_info='SCHEMA')
+    connect(slapd)
+
+    byte_syntaxes = slapd.get_byte_syntaxes()
+    byte_attrs = slapd.get_byte_attributes(byte_syntaxes=byte_syntaxes)
+
+    for item in byte_syntaxes:
+        log.debug('byte syntax: {}'.format(item))
+
+    for item in byte_attrs:
+        log.debug('byte attr: {}'.format(item))
+
+    if byte_attrs:
+        re_byte = ''.join([r'^(', '|'.join(byte_attrs), r'):'])
+        rgx_byte = re.compile(re_byte.encode('utf-8'))
+
+    log.debug('disconnecting...')
+    slapd.connection.unbind()
+
+
+def get_req_start():
+    """Get the timestamp of the last logged entry or now"""
+    try:
+        log.debug('trying to read last req from {}'.format(log_file_deltalog))
+        last_line = get_last_line(log_file_deltalog).decode('utf-8')
+        req_json = last_line.split(' >>> ', maxsplit=1)[1]
+        req = json.loads(req_json)
+        req_start = req['attributes']['reqStart'][0]
+        log.info('continuing from last logged req: {}'.format(req_start))
+    except:
+        log.exception('error: trying to retrieve last req')
+        req_start = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S.%fZ')
+        log.error('using current timestamp: {}'.format(req_start))
+
+    return req_start
+
+
+def main():
+    """Connect to slapd socket, search accesslog, write interesting changes"""
+    flock = lock(lock_file)
+
+    init_log()
+
+    get_byte_attributes()
+    req_start = get_req_start()
+
+    log.debug('connecting to slapd...')
+    slapd = dphysldap.Slapd(get_info='NONE')
+    connect(slapd)
+
+    while True:
+        start_time = time.perf_counter()
+        
+        search_filter=''.join(['(&', object_classes, '(reqStart>=', req_start, ')',')'])
+
+        try:
+            response = slapd.search(search_base, search_filter)
+        except:
+            log.exception('error: reading socket! trying to reconnect...')
+            slapd.connect_retry()
+            log.error('socket reconnected.')
+            continue
+
+        if response:
+            for entry in slapd.response():
+                if entry['attributes']['reqStart'][0] == req_start:
+                    continue
+
+                req_start = entry['attributes']['reqStart'][0]
+                req_type = entry['attributes']['reqType'][0]
+
+                log.debug('processing: {}'.format(entry['dn']))
+
+                if req_type == 'modify':
+                    if is_skipped(entry):
+                        log_action('skip', entry)
+                        continue
+
+                write(entry)
+
+        sleep(start_time)
+
+    unlock(flock)
+
+
+if __name__ == '__main__':
+    main()
+    sys.exit(0)
-- 
GitLab