Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse arrays of objects in AWS WAF logs #459

Merged
merged 5 commits into from
Aug 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions aws/logs_monitoring/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from parsing import (
parse,
separate_security_hub_findings,
parse_aws_waf_logs,
)
from telemetry import (
DD_FORWARDER_TELEMETRY_NAMESPACE_PREFIX,
Expand Down Expand Up @@ -194,6 +195,10 @@ def transform(events):
events.remove(event)
events.extend(findings)

waf = parse_aws_waf_logs(event)
if waf != event:
events.remove(event)
events.append(waf)
return events


Expand Down
118 changes: 118 additions & 0 deletions aws/logs_monitoring/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,124 @@ def cwevent_handler(event, metadata):
yield data


def parse_aws_waf_logs(event):
"""Parse out complex arrays of objects in AWS WAF logs

Attributes to convert:
httpRequest.headers
nonTerminatingMatchingRules
rateBasedRuleList
ruleGroupList

This prevents having an unparsable array of objects in the final log.
"""
if isinstance(event, str):
try:
event = json.loads(event)
except json.JSONDecodeError:
logger.debug("Argument provided for waf parser is not valid JSON")
return event
if event.get(DD_SOURCE) != "waf":
return event

event_copy = copy.deepcopy(event)

message = event_copy.get("message", {})
if isinstance(message, str):
try:
message = json.loads(message)
except json.JSONDecodeError:
logger.debug("Failed to decode waf message")
return event

headers = message.get("httpRequest", {}).get("headers")
if headers:
message["httpRequest"]["headers"] = convert_rule_to_nested_json(headers)

# Iterate through rules in ruleGroupList and nest them under the group id
# ruleGroupList has three attributes that need to be handled separately
rule_groups = message.get("ruleGroupList", {})
if rule_groups and isinstance(rule_groups, list):
message["ruleGroupList"] = {}
for rule_group in rule_groups:
group_id = None
if "ruleGroupId" in rule_group and rule_group["ruleGroupId"]:
group_id = rule_group.pop("ruleGroupId", None)
if group_id not in message["ruleGroupList"]:
message["ruleGroupList"][group_id] = {}

# Extract the terminating rule and nest it under its own id
Copy link
Contributor

@hghotra hghotra Jun 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's refactor the three following blocks (if conditions) into separate functions to reduce the length & complexity of the parse_aws_waf_logs function.

if "terminatingRule" in rule_group and rule_group["terminatingRule"]:
terminating_rule = rule_group.pop("terminatingRule", None)
if not "terminatingRule" in message["ruleGroupList"][group_id]:
message["ruleGroupList"][group_id]["terminatingRule"] = {}
message["ruleGroupList"][group_id]["terminatingRule"].update(
convert_rule_to_nested_json(terminating_rule)
)

# Iterate through array of non-terminating rules and nest each under its own id
if "nonTerminatingMatchingRules" in rule_group and isinstance(
rule_group["nonTerminatingMatchingRules"], list
):
non_terminating_rules = rule_group.pop(
"nonTerminatingMatchingRules", None
)
if (
"nonTerminatingMatchingRules"
not in message["ruleGroupList"][group_id]
):
message["ruleGroupList"][group_id][
"nonTerminatingMatchingRules"
] = {}
message["ruleGroupList"][group_id][
"nonTerminatingMatchingRules"
].update(convert_rule_to_nested_json(non_terminating_rules))

# Iterate through array of excluded rules and nest each under its own id
if "excludedRules" in rule_group and isinstance(
rule_group["excludedRules"], list
):
excluded_rules = rule_group.pop("excludedRules", None)
if "excludedRules" not in message["ruleGroupList"][group_id]:
message["ruleGroupList"][group_id]["excludedRules"] = {}
message["ruleGroupList"][group_id]["excludedRules"].update(
convert_rule_to_nested_json(excluded_rules)
)

rate_based_rules = message.get("rateBasedRuleList", {})
if rate_based_rules:
message["rateBasedRuleList"] = convert_rule_to_nested_json(rate_based_rules)

non_terminating_rules = message.get("nonTerminatingMatchingRules", {})
if non_terminating_rules:
message["nonTerminatingMatchingRules"] = convert_rule_to_nested_json(
non_terminating_rules
)

event_copy["message"] = message
return event_copy


def convert_rule_to_nested_json(rule):
key = None
result_obj = {}
if not isinstance(rule, list):
if "ruleId" in rule and rule["ruleId"]:
key = rule.pop("ruleId", None)
result_obj.update({key: rule})
return result_obj
for entry in rule:
if "ruleId" in entry and entry["ruleId"]:
key = entry.pop("ruleId", None)
elif "rateBasedRuleName" in entry and entry["rateBasedRuleName"]:
key = entry.pop("rateBasedRuleName", None)
elif "name" in entry and "value" in entry:
key = entry["name"]
entry = entry["value"]
result_obj.update({key: entry})
return result_obj


def separate_security_hub_findings(event):
"""Replace Security Hub event with series of events based on findings

Expand Down
Loading