From 0b89f309878548957d47fa1d5b73e5cc9da00ef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20M=C3=A4der?= <maeder@phys.ethz.ch> Date: Wed, 11 Apr 2018 19:07:59 +0200 Subject: [PATCH] Improve filtering, exception handling and output --- bin/deltalogparse.py | 118 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 16 deletions(-) diff --git a/bin/deltalogparse.py b/bin/deltalogparse.py index d2468b8..297570e 100755 --- a/bin/deltalogparse.py +++ b/bin/deltalogparse.py @@ -8,15 +8,18 @@ import pprint import logging import json import mmap +import base64 import lib_path import lib import dphysldap update_every = 1 -log_level = logging.DEBUG +#log_level = logging.WARNING +log_level = logging.INFO +#log_level = logging.DEBUG log_datefmt = '%Y-%m-%d %H:%M:%S' log_path = '/var/log/ldap/' -log_file_log = log_path + 'deltalogparse.log' +log_file_log = log_path + 'logparse.log' log_file_deltalog = log_path + 'delta.log' search_base = 'cn=deltalog' object_classes = '(|(objectClass=auditModify)(objectClass=auditAdd)(objectClass=auditDelete)(objectClass=auditModRDN))' @@ -53,6 +56,14 @@ rgx_filter = re.compile( r'(context|entry)CSN' r'):') +rgx_redacted = re.compile( + r'^(' + r'krbPrincipalKey|' + r'preBlockPassword|' + r'userPassword' + r'):') + +rgx_attr_key = re.compile(r'^(?P<key>.*?)(?P<delimiter>:[=+-:]? ?)') def getlastline(fname): """Using mmap to return a copy of the last line of a file""" @@ -64,20 +75,93 @@ def getlastline(fname): def is_skipped(entry): """Check if at least on of the reqMod attributes is interesting""" for attribute in entry['attributes']['reqMod']: - if not rgx_skip.search(attribute): - log.info('interesting attribute: {}'.format(attribute)) + try: + if not rgx_skip.search(attribute): + log.info('interesting attribute: {}'.format(attribute)) + return False + except TypeError: + log.debug('caught exception: while checking skipped, assume interesting attribute: {}'.format(attribute)) return False return True def filtered(entry): - """Filter out attributes not of interest""" - if 'reqMod' in entry['attributes']: - for attribute in entry['attributes']['reqMod']: - if rgx_filter.search(attribute): - log.info('filter attribute: {}'.format(attribute)) - entry['attributes']['reqMod'].remove(attribute) + """ + Filter attributes: + - not of interest: remove + - sensitive: redact + - binary values: encode base64 + """ + if 'reqMod' in entry['attributes']: + for index, attribute in enumerate(entry['attributes']['reqMod']): + is_bytes = False + + if type(attribute) is bytes: + is_bytes = True + log.info('this is a bytes object, decoding it...') + log.debug('encoded: {}'.format(attribute)) + decoded = attribute.decode('utf-8', errors='ignore') + attribute = decoded + + try: + if rgx_filter.search(attribute): + log.info('filter attribute: {}'.format(attribute)) + entry['attributes']['reqMod'].remove(attribute) + + elif rgx_redacted.search(attribute): + match = rgx_attr_key.match(attribute) + + if match: + key = match.group('key') + delimiter = match.group('delimiter') + + if match.group('delimiter')[-1:] == ' ': + redacted = delimiter.join([key, '[REDACTED]']) + log.info('redact attribute: {}'.format(redacted)) + else: + redacted = ''.join([key, delimiter]) + log.info('redact attribute (nothing to redact): {}'.format(redacted)) + + entry['attributes']['reqMod'][index] = redacted + + else: + log.error('error: matching rgx_attr_key on redacted attribute') + unknown_redacted = 'unknownAttribute: [REDACTED]' + log.error('using: {}'.format(unknown_redacted)) + entry['attributes']['reqMod'][index] = unknown_redacted + + elif is_bytes: + match = rgx_attr_key.match(attribute) + + if match: + key_delimit = ''.join([match.group('key'), match.group('delimiter')]) + key_delimit_bytes = key_delimit.encode('utf-8') + log.debug('encoded key_delimit_bytes: {}'.format(key_delimit_bytes)) + value_bytes = entry['attributes']['reqMod'][index][len(key_delimit_bytes):] + + if value_bytes: + log.debug('value_bytes: {}'.format(value_bytes)) + value_base64 = base64.b64encode(value_bytes).decode('utf-8') + log.debug('value_base64: {}'.format(value_base64)) + half_encoded_base64 = ''.join([key_delimit, value_base64]) + log.info('bytes attribute: {}'.format(half_encoded_base64)) + entry['attributes']['reqMod'][index] = half_encoded_base64 + + else: + log.warning('warning: something strange has happened, this should not happen (maybe)') + encoded_base64 = base64.b64encode(entry['attributes']['reqMod'][index]).decode('utf-8') + log.error('bytes attribute, using base64 encoding: {}'.format(encoded_base64)) + entry['attributes']['reqMod'][index] = encoded_base64 + + else: + log.error('error: matching rgx_attr_key on bytes attribute') + encoded_base64 = base64.b64encode(entry['attributes']['reqMod'][index]).decode('utf-8') + log.error('bytes attribute, using full base64 encoding: {}'.format(encoded_base64)) + entry['attributes']['reqMod'][index] = encoded_base64 + + except TypeError: + log.exception('error: caught exception: while filtering attribute: {}'.format(attribute)) del entry['raw_dn'] del entry['raw_attributes'] @@ -103,18 +187,18 @@ def write(entry): entry['attributes'] = dict(entry['attributes']) # debug - #entry['raw_attributes'] = dict(entry['raw_attributes']) - #log.debug('original entry:') - #log.debug(pprint.pformat(entry, indent=1)) + entry['raw_attributes'] = dict(entry['raw_attributes']) + log.debug('original entry:') + log.debug(pprint.pformat(entry, indent=1)) filtered_entry = filtered(entry) log_action('write', filtered_entry) try: - deltalog.info('{}'.format(json.dumps(filtered_entry))) + deltalog.info('{} {} >>> {}'.format(req_type, req_dn, json.dumps(filtered_entry))) except: - log.exception('error: writing entry: {} with {}: {}'.format(entry['dn'], req_type, req_dn)) + log.exception('error: writing entry: {} >>> {}: {}'.format(entry['dn'], req_type, req_dn)) def sleep(start_time): @@ -143,8 +227,10 @@ def main(): log.info('trying to read last req from {}'.format(log_file_deltalog)) last_line = getlastline(log_file_deltalog).decode('utf-8') req_json = last_line.split(': ', maxsplit=1)[1] - log.debug('last line: {}'.format(req_json)) + req_json = last_line.split(' >>> ', maxsplit=1)[1] + #log.debug('last line: {}'.format(req_json.rstrip())) req = json.loads(req_json) + log.debug('last line pretty:') log.debug(pprint.pformat(req, indent=1)) req_start = req['attributes']['reqStart'][0] log.info('continuing from last logged req: {}'.format(req_start)) -- GitLab