diff --git a/bin/deltalogparse.py b/bin/deltalogparse.py index b3d059e5915f2623862b7c5fa79a72da7903db20..a0b3818e2969260fcd5b00aa65368e5a3b2d6542 100755 --- a/bin/deltalogparse.py +++ b/bin/deltalogparse.py @@ -17,7 +17,7 @@ import dphysldap # security warning: using DEBUG may log sensitive data # log levels: CRITICAL | ERROR | WARNING | INFO | DEBUG | NOTSET # default: WARNING -log_level = logging.INFO +log_level = logging.DEBUG script_name = 'deltalogparse' deltalog_name = 'delta' @@ -32,8 +32,7 @@ object_classes = '(|(objectClass=auditModify)(objectClass=auditAdd)(objectClass= log = None deltalog = None -rgx_skip = re.compile( - r'^(' +re_skip = (r'^(' r'heartbeatTimestamp|' r'modifyTimestamp|' r'modifiersName|' @@ -41,24 +40,32 @@ rgx_skip = re.compile( r'(context|entry)CSN' r'):') -rgx_filter = re.compile( - r'^(' +re_filter = (r'^(' r'heartbeatTimestamp|' r'lastUse.*?|' r'(context|entry)CSN' r'):') -rgx_redacted = re.compile( - r'^(' +re_redacted = (r'^(' r'krbPrincipalKey|' + r'krbMKey|' + r'krbExtraData|' + r'krbPwdHistory|' r'preBlockPassword|' r'userPassword' r'):') -rgx_attr_key = re.compile(r'^(?P<key>.*?)(?P<delimiter>:[-+=#] )') +re_attribute = r'^(?P<key>[^:]+?)(?P<delim>:[-+=#] ?(; )?)(?P<value>.*)' + +rgx_skip = re.compile(re_skip.encode('utf-8')) +rgx_filter = re.compile(re_filter.encode('utf-8')) +rgx_redacted = re.compile(re_redacted.encode('utf-8')) +rgx_attribute = re.compile(re_attribute.encode('utf-8')) +rgx_byte = None def lock(lock_file): + """Try to get an exclusive lock on the lockfile (only one instance allowed)""" flock = open(lock_file, 'w') try: @@ -70,11 +77,14 @@ def lock(lock_file): def unlock(lock): + """Release the lock""" fcntl.flock(lock, fcntl.LOCK_UN) def init_log(): + """Setup logging to two logfiles""" global log, deltalog + log = logging.getLogger('log') log.setLevel(log_level) log_fh = logging.FileHandler(log_file_log) @@ -92,7 +102,7 @@ def init_log(): deltalog.addHandler(deltalog_fh) -def getlastline(fname): +def get_last_line(fname): """Using mmap to return a copy of the last line of a file""" with open(fname) as source: mapping = mmap.mmap(source.fileno(), 0, prot=mmap.PROT_READ) @@ -101,105 +111,71 @@ def getlastline(fname): def is_skipped(entry): """Check if at least on of the reqMod attributes is interesting""" - for attribute in entry['attributes']['reqMod']: + for attribute in entry['raw_attributes']['reqMod']: try: if not rgx_skip.search(attribute): log.debug('interesting attribute: {}'.format(attribute)) return False except: - log.debug('caught exception: while checking skipped, assume interesting attribute: {}'.format(attribute)) + log.exception('caught exception: while checking skipped on attribute: {}'.format(attribute)) return False return True -def filtered(entry): +def split_attribute(attribute): + """Split up LDAP attribute""" + match = rgx_attribute.match(attribute) + + key = match.group('key') + delim = match.group('delim') + value = match.group('value') + + return key, delim, value + + +def filtered(req_mods): """ - Filter attributes: + Filter and decode reqMod attributes: - not of interest: remove - sensitive: redact - - binary values: encode base64 + - byte values: encode base64 """ - if 'reqMod' in entry['attributes']: - for index, attribute in enumerate(entry['attributes']['reqMod']): - is_bytes = False - - if type(attribute) is bytes: - is_bytes = True - log.debug('this is a bytes object, decoding it...') - log.debug('encoded: {}'.format(attribute)) - decoded = attribute.decode('utf-8', errors='ignore') - attribute = decoded - - try: - if rgx_filter.search(attribute): - log.debug('filter attribute: {}'.format(attribute)) - entry['attributes']['reqMod'].remove(attribute) - - elif rgx_redacted.search(attribute): - match = rgx_attr_key.match(attribute) - - if match: - key = match.group('key') - delimiter = match.group('delimiter') - - if match.group('delimiter')[-1:] == ' ': - redacted = delimiter.join([key, '[REDACTED]']) - log.info('redact: {}'.format(redacted)) - else: - redacted = ''.join([key, delimiter]) - log.debug('redact (nothing to redact): {}'.format(redacted)) - - entry['attributes']['reqMod'][index] = redacted - - else: - log.error('error: matching rgx_attr_key on redacted attribute') - unknown_redacted = 'unknownAttribute: [REDACTED]' - log.debug('using: {}'.format(unknown_redacted)) - entry['attributes']['reqMod'][index] = unknown_redacted - - elif is_bytes: - match = rgx_attr_key.match(attribute) - - if match: - key_delimit = ''.join([match.group('key'), match.group('delimiter')]) - key_delimit_bytes = key_delimit.encode('utf-8') - log.debug('encoded key_delimit_bytes: {}'.format(key_delimit_bytes)) - value_bytes = entry['attributes']['reqMod'][index][len(key_delimit_bytes):] - - if value_bytes: - log.debug('value_bytes: {}'.format(value_bytes)) - value_base64 = base64.b64encode(value_bytes).decode('utf-8') - log.debug('value_base64: {}'.format(value_base64)) - half_encoded_base64 = ''.join([key_delimit, value_base64]) - log.debug('bytes attribute: {}'.format(half_encoded_base64)) - entry['attributes']['reqMod'][index] = half_encoded_base64 - - else: - log.warning('warning: something strange has happened, this should not happen (maybe)') - encoded_base64 = base64.b64encode(entry['attributes']['reqMod'][index]).decode('utf-8') - log.debug('bytes attribute, using base64 encoding: {}'.format(encoded_base64)) - entry['attributes']['reqMod'][index] = encoded_base64 - - else: - log.error('error: matching rgx_attr_key on bytes attribute') - encoded_base64 = base64.b64encode(entry['attributes']['reqMod'][index]).decode('utf-8') - log.debug('bytes attribute, using full base64 encoding: {}'.format(encoded_base64)) - entry['attributes']['reqMod'][index] = encoded_base64 - - except: - log.exception('error: caught exception: while filtering attribute: {}'.format(attribute)) + filtered_req_mods = list() - del entry['raw_dn'] - del entry['raw_attributes'] + for index, attribute in enumerate(req_mods): + try: + if rgx_filter and rgx_filter.search(attribute): + log.debug('filter attribute: {}'.format(attribute)) + continue + + elif rgx_redacted and rgx_redacted.search(attribute): + key, delim, value = split_attribute(attribute) + if value: + value = b'[REDACTED]' + attribute = delim.join([key, value]) + log.info('redact: {}'.format(attribute)) - return entry + elif rgx_byte and rgx_byte.search(attribute): + key, delim, value = split_attribute(attribute) + if value: + value = base64.b64encode(value) + attribute = delim.join([key, value]) + log.debug('bytes attribute: {}'.format(attribute)) + + filtered_req_mods.append(attribute.decode('utf-8')) + + except: + log.exception('error: while filtering attribute: {}'.format(attribute)) + + return filtered_req_mods def log_action(action, entry): """Log an action on a req""" req_type = entry['attributes']['reqType'][0] req_dn = entry['attributes']['reqDN'][0] + if not action == 'skip': log.debug('entry:') log.debug(pprint.pformat(entry, indent=1)) @@ -215,17 +191,16 @@ def write(entry): entry['attributes'] = dict(entry['attributes']) - # debugging - #entry['raw_attributes'] = dict(entry['raw_attributes']) - #log.debug('original entry:') - #log.debug(pprint.pformat(entry, indent=1)) + if 'reqMod' in entry['attributes']: + entry['attributes']['reqMod'] = filtered(entry['raw_attributes']['reqMod']) - filtered_entry = filtered(entry) + del entry['raw_dn'] + del entry['raw_attributes'] - log_action('write', filtered_entry) + log_action('write', entry) try: - deltalog.info('{} {} >>> {}'.format(req_type, req_dn, json.dumps(filtered_entry))) + deltalog.info('{} {} >>> {}'.format(req_type, req_dn, json.dumps(entry))) except: log.exception('error: writing entry: {} >>> {}: {}'.format(entry['dn'], req_type, req_dn)) @@ -242,13 +217,8 @@ def sleep(start_time): time.sleep(sleep_time) -def main(): - """Connect to slapd socket, search accesslog, write interesting changes""" - flock = lock(lock_file) - - init_log() - - slapd = dphysldap.Slapd() +def connect(slapd): + """Connect to slapd or reconnect forever""" try: slapd.connect() except: @@ -256,22 +226,75 @@ def main(): if slapd.reconnect(): log.error('connected to slapd') + +def get_byte_attrs(): + """Read the schema from slapd and extract byte attributes""" + global rgx_byte + byte_attrs = list() + byte_syntaxes = set() + byte_syntaxes.add('1.3.6.1.4.1.1466.115.121.1.40') + + log.info('connecting to slapd to get the schema...') + slapd = dphysldap.Slapd(get_info='SCHEMA') + connect(slapd) + + log.info('searching schema for octet string attribute types...') + if slapd.server.schema: + + for key, value in slapd.server.schema.ldap_syntaxes.items(): + if value.extensions: + for extension in value.extensions: + if extension[0] == 'X-NOT-HUMAN-READABLE' and extension[1][0] == 'TRUE': + byte_syntaxes.add(key) + + for key, value in slapd.server.schema.attribute_types.items(): + if value.syntax in byte_syntaxes: + byte_attrs.append(key) + + for item in byte_syntaxes: + log.debug('byte syntax: {}'.format(item)) + + for item in byte_attrs: + log.debug('byte attr: {}'.format(item)) + + if byte_attrs: + re_byte = ''.join([r'^(', '|'.join(byte_attrs), r'):']) + rgx_byte = re.compile(re_byte.encode('utf-8')) + + log.info('disconnecting...') + slapd.connection.unbind() + + +def get_req_start(): + """Get the timestamp of the last logged entry""" try: - log.error('trying to read last req from {}'.format(log_file_deltalog)) - last_line = getlastline(log_file_deltalog).decode('utf-8') - #req_json = last_line.split(': ', maxsplit=1)[1] + log.info('trying to read last req from {}'.format(log_file_deltalog)) + last_line = get_last_line(log_file_deltalog).decode('utf-8') req_json = last_line.split(' >>> ', maxsplit=1)[1] - log.debug('last line: {}'.format(req_json.rstrip())) req = json.loads(req_json) - log.debug('last line pretty:') - log.debug(pprint.pformat(req, indent=1)) req_start = req['attributes']['reqStart'][0] - log.error('continuing from last logged req: {}'.format(req_start)) + log.info('continuing from last logged req: {}'.format(req_start)) except: log.exception('error: trying to retrieve last req') req_start = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S.%fZ') log.error('using current timestamp: {}'.format(req_start)) + return req_start + + +def main(): + """Connect to slapd socket, search accesslog, write interesting changes""" + flock = lock(lock_file) + + init_log() + + get_byte_attrs() + req_start = get_req_start() + + log.info('connecting to slapd...') + slapd = dphysldap.Slapd(get_info='NONE') + connect(slapd) + while True: start_time = time.perf_counter() @@ -292,7 +315,6 @@ def main(): req_start = entry['attributes']['reqStart'][0] req_type = entry['attributes']['reqType'][0] - req_dn = entry['attributes']['reqDN'][0] log.debug('processing: {}'.format(entry['dn'])) diff --git a/lib/isg/dphysldap.py b/lib/isg/dphysldap.py index 9c2c5485d29865f7a951e366c095c54469d5103c..713f327bd8735478e003aae16de39aa7f8ace058 100644 --- a/lib/isg/dphysldap.py +++ b/lib/isg/dphysldap.py @@ -13,7 +13,9 @@ SERVERS = ['phd-aa1.ethz.ch', 'phd-aa2.ethz.ch', 'phd-aa3.ethz.ch'] BASE = 'dc=phys,dc=ethz,dc=ch' CA_CERTS = '/etc/ssl/certs/ca-certificates.crt' SLAPD_SOCKET = 'ldapi:///var/run/slapd/ldapi' -SLAPD_INFO = [ldap3.ALL, ldap3.OFFLINE_SLAPD_2_4] +SLAPD_INFO = ldap3.ALL +#SLAPD_INFO = ldap3.OFFLINE_SLAPD_2_4 +#SLAPD_INFO = ldap3.NONE SLAPD_SEARCH_SCOPE = ldap3.SUBTREE SLAPD_SEARCH_ATTRS = [ldap3.ALL_ATTRIBUTES, ldap3.ALL_OPERATIONAL_ATTRIBUTES]