Skip to content

Commit

Permalink
first semgrep sarif codemod for jinja autoescape
Browse files Browse the repository at this point in the history
  • Loading branch information
clavedeluna committed Jul 1, 2024
1 parent eb6ef50 commit e585cf7
Show file tree
Hide file tree
Showing 9 changed files with 180 additions and 27 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ all = [
core = "core_codemods:registry"
sonar = "core_codemods:sonar_registry"
defectdojo = "core_codemods:defectdojo_registry"
semgrep = "core_codemods:semgrep_registry"

[project.entry-points.sarif_detectors]
"semgrep" = "codemodder.semgrep:SemgrepSarifToolDetector"
Expand Down
24 changes: 0 additions & 24 deletions src/codemodder/codemods/semgrep.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import io
import os
import tempfile
from functools import cache
from pathlib import Path

import yaml

from codemodder.codemods.base_detector import BaseDetector
from codemodder.context import CodemodExecutionContext
from codemodder.result import ResultSet
from codemodder.semgrep import SemgrepResultSet
from codemodder.semgrep import run as semgrep_run


Expand Down Expand Up @@ -49,25 +47,3 @@ def apply(
yaml_files = self.get_yaml_files(codemod_id)
with context.timer.measure("semgrep"):
return semgrep_run(context, yaml_files, files_to_analyze)


class SemgrepSarifFileDetector(BaseDetector):
def apply(
self,
codemod_id: str,
context: CodemodExecutionContext,
files_to_analyze: list[Path],
) -> ResultSet:
del codemod_id
del files_to_analyze
return process_semgrep_findings(
tuple(context.tool_result_files_map.get("semgrep", ()))
) # Convert list to tuple for cache hashability


@cache
def process_semgrep_findings(semgrep_sarif_files: tuple[str]) -> ResultSet:
results = SemgrepResultSet()
for file in semgrep_sarif_files or ():
results |= SemgrepResultSet.from_sarif(file)
return results
17 changes: 16 additions & 1 deletion src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,22 @@ class DocMetadata:
for name in SONAR_CODEMOD_NAMES
}

ALL_CODEMODS_METADATA = CORE_CODEMODS | DEFECTDOJO_CODEMODS | SONAR_CODEMODS
SEMGREP_CODEMOD_NAMES = [
"enable-jinja2-autoescape-semgrep",
]
SEMGREP_CODEMODS = {
name: DocMetadata(
importance=CORE_CODEMODS[
core_codemod_name := "-".join(name.split("-")[:-1])
].importance,
guidance_explained=CORE_CODEMODS[core_codemod_name].guidance_explained,
need_sarif="Yes (Semgrep)",
)
for name in SEMGREP_CODEMOD_NAMES
}
ALL_CODEMODS_METADATA = (
CORE_CODEMODS | DEFECTDOJO_CODEMODS | SONAR_CODEMODS | SEMGREP_CODEMODS
)


def generate_docs(codemod):
Expand Down
8 changes: 8 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from .secure_flask_cookie import SecureFlaskCookie
from .secure_flask_session_config import SecureFlaskSessionConfig
from .secure_random import SecureRandom
from .semgrep.semgrep_enable_jinja2_autoescape import SemgrepEnableJinja2Autoescape
from .sonar.sonar_break_or_continue_out_of_loop import SonarBreakOrContinueOutOfLoop
from .sonar.sonar_disable_graphql_introspection import SonarDisableGraphQLIntrospection
from .sonar.sonar_django_json_response_type import SonarDjangoJsonResponseType
Expand Down Expand Up @@ -192,3 +193,10 @@
DjangoSecureSetCookie,
],
)

semgrep_registry = CodemodCollection(
origin="semgrep",
codemods=[
SemgrepEnableJinja2Autoescape,
],
)
74 changes: 74 additions & 0 deletions src/core_codemods/semgrep/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from functools import cache
from pathlib import Path

from codemodder.codemods.base_codemod import Metadata, Reference, ToolMetadata, ToolRule
from codemodder.codemods.base_detector import BaseDetector
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.context import CodemodExecutionContext
from codemodder.result import ResultSet
from codemodder.semgrep import SemgrepResultSet
from core_codemods.api.core_codemod import CoreCodemod, SASTCodemod


class SemgrepCodemod(SASTCodemod):
@property
def origin(self):
return "semgrep"

@classmethod
def from_core_codemod(
cls,
name: str,
other: CoreCodemod,
rule_id: str,
rule_name: str,
rule_url: str,
transformer: BaseTransformerPipeline | None = None,
):

return SemgrepCodemod(
metadata=Metadata(
name=name,
summary=other.summary,
review_guidance=other._metadata.review_guidance,
references=(
other.references + [Reference(url=rule_url, description=rule_name)]
),
description=other.description,
tool=ToolMetadata(
name="Semgrep",
rules=[
ToolRule(
id=rule_id,
name=rule_name,
url=rule_url,
)
],
),
),
transformer=transformer if transformer else other.transformer,
detector=SemgrepSarifFileDetector(),
requested_rules=[rule_id],
)


class SemgrepSarifFileDetector(BaseDetector):
def apply(
self,
codemod_id: str,
context: CodemodExecutionContext,
files_to_analyze: list[Path],
) -> ResultSet:
del codemod_id
del files_to_analyze
return process_semgrep_findings(
tuple(context.tool_result_files_map.get("semgrep", ()))
) # Convert list to tuple for cache hashability


@cache
def process_semgrep_findings(semgrep_sarif_files: tuple[str]) -> ResultSet:
results = SemgrepResultSet()
for file in semgrep_sarif_files or ():
results |= SemgrepResultSet.from_sarif(file)
return results
10 changes: 10 additions & 0 deletions src/core_codemods/semgrep/semgrep_enable_jinja2_autoescape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from core_codemods.enable_jinja2_autoescape import EnableJinja2Autoescape
from core_codemods.semgrep.api import SemgrepCodemod

SemgrepEnableJinja2Autoescape = SemgrepCodemod.from_core_codemod(
name="enable-jinja2-autoescape-semgrep",
other=EnableJinja2Autoescape,
rule_id="python.flask.security.xss.audit.direct-use-of-jinja2.direct-use-of-jinja2",
rule_name="direct-use-of-jinja2",
rule_url="https://sg.run/RoKe",
)
67 changes: 67 additions & 0 deletions tests/codemods/semgrep/test_semgrep_enable_jinja2_autoescape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import json

from codemodder.codemods.test import BaseSASTCodemodTest
from core_codemods.semgrep.semgrep_enable_jinja2_autoescape import (
SemgrepEnableJinja2Autoescape,
)


class TestEnableJinja2Autoescape(BaseSASTCodemodTest):
codemod = SemgrepEnableJinja2Autoescape
tool = "semgrep"

def test_name(self):
assert self.codemod.name == "enable-jinja2-autoescape-semgrep"

def test_import(self, tmpdir):
input_code = """
import jinja2
env = jinja2.Environment()
var = "hello"
"""
expexted_output = """
import jinja2
env = jinja2.Environment(autoescape=True)
var = "hello"
"""
results = {
"runs": [
{
"results": [
{
"fingerprints": {"matchBasedId/v1": "123"},
"locations": [
{
"physicalLocation": {
"artifactLocation": {
"uri": "code.py",
"uriBaseId": "%SRCROOT%",
},
"region": {
"endColumn": 27,
"endLine": 3,
"snippet": {
"text": "env = jinja2.Environment()"
},
"startColumn": 7,
"startLine": 3,
},
}
}
],
"message": {
"text": "Detected direct use of jinja2. If not done properly, this may bypass HTML escaping which opens up the application to cross-site scripting (XSS) vulnerabilities. Prefer using the Flask method 'render_template()' and templates with a '.html' extension in order to prevent XSS."
},
"properties": {},
"ruleId": "python.flask.security.xss.audit.direct-use-of-jinja2.direct-use-of-jinja2",
}
]
}
]
}
self.run_and_assert(
tmpdir,
input_code,
expexted_output,
results=json.dumps(results),
)
4 changes: 3 additions & 1 deletion tests/test_codemod_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ def test_load_codemod_docs_info(codemod: BaseCodemod):
"Merge After Cursory Review",
"Merge Without Review",
)
assert codemod.name in ALL_CODEMODS_METADATA
assert (
codemod.name in ALL_CODEMODS_METADATA
), f"{codemod.name} has not been added to generate_docs.py"
2 changes: 1 addition & 1 deletion tests/test_semgrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

import pytest

from codemodder.codemods.semgrep import SemgrepSarifFileDetector
from codemodder.context import CodemodExecutionContext
from codemodder.semgrep import SemgrepResultSet, SemgrepSarifToolDetector
from core_codemods.semgrep.api import SemgrepSarifFileDetector

SAMPLE_DATA_PATH = Path(__file__).parent / "samples"

Expand Down

0 comments on commit e585cf7

Please sign in to comment.