Skip to content

Commit

Permalink
Implement lint for regex::Regex compilation inside a loop
Browse files Browse the repository at this point in the history
  • Loading branch information
GnomedDev committed Sep 20, 2024
1 parent 903293b commit 63c6dac
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5873,6 +5873,7 @@ Released 2018-09-13
[`ref_in_deref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_in_deref
[`ref_option_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option_ref
[`ref_patterns`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_patterns
[`regex_compile_in_loop`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_compile_in_loop
[`regex_macro`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_macro
[`renamed_function_params`]: https://rust-lang.github.io/rust-clippy/master/index.html#renamed_function_params
[`repeat_once`]: https://rust-lang.github.io/rust-clippy/master/index.html#repeat_once
Expand Down
1 change: 1 addition & 0 deletions clippy_lints/src/declared_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,7 @@ pub static LINTS: &[&crate::LintInfo] = &[
crate::ref_patterns::REF_PATTERNS_INFO,
crate::reference::DEREF_ADDROF_INFO,
crate::regex::INVALID_REGEX_INFO,
crate::regex::REGEX_COMPILE_IN_LOOP_INFO,
crate::regex::TRIVIAL_REGEX_INFO,
crate::repeat_vec_with_capacity::REPEAT_VEC_WITH_CAPACITY_INFO,
crate::reserve_after_initialization::RESERVE_AFTER_INITIALIZATION_INFO,
Expand Down
99 changes: 94 additions & 5 deletions clippy_lints/src/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use clippy_utils::source::SpanRangeExt;
use clippy_utils::{def_path_def_ids, path_def_id, paths};
use rustc_ast::ast::{LitKind, StrStyle};
use rustc_hir::def_id::DefIdMap;
use rustc_hir::intravisit::{self, Visitor};
use rustc_hir::{BorrowKind, Expr, ExprKind};
use rustc_lint::{LateContext, LateLintPass};
use rustc_session::impl_lint_pass;
Expand Down Expand Up @@ -55,6 +56,42 @@ declare_clippy_lint! {
"trivial regular expressions"
}

declare_clippy_lint! {
/// ### What it does
///
/// Checks for [regex](https://crates.io/crates/regex) compilation inside a loop with a literal.
///
/// ### Why is this bad?
///
/// Compiling a regex is a much more expensive operation than using one, and a compiled regex can be used multiple times.
///
/// ### Example
/// ```no_run
/// # let haystacks = [""];
/// for haystack in haystacks {
/// let regex = regex::Regex::new(MY_REGEX);
/// if regex.is_match(heystack) {
/// // Perform operation
/// }
/// }
/// ```
/// should be replaced with
/// ```no_run
/// # let haystacks = [""];
/// let regex = regex::Regex::new(MY_REGEX);
/// for haystack in haystacks {
/// if regex.is_match(heystack) {
/// // Perform operation
/// }
/// }
/// ```
///
#[clippy::version = "1.83.0"]
pub REGEX_COMPILE_IN_LOOP,
perf,
"regular expression compilation performed in a loop"
}

#[derive(Copy, Clone)]
enum RegexKind {
Unicode,
Expand All @@ -68,7 +105,7 @@ pub struct Regex {
definitions: DefIdMap<RegexKind>,
}

impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX]);
impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX, REGEX_COMPILE_IN_LOOP]);

impl<'tcx> LateLintPass<'tcx> for Regex {
fn check_crate(&mut self, cx: &LateContext<'tcx>) {
Expand All @@ -92,17 +129,69 @@ impl<'tcx> LateLintPass<'tcx> for Regex {
}

fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>) {
if let ExprKind::Call(fun, [arg]) = expr.kind
&& let Some(def_id) = path_def_id(cx, fun)
&& let Some(regex_kind) = self.definitions.get(&def_id)
{
if let Some((regex_kind, _, arg)) = extract_regex_call(&self.definitions, cx, expr) {
match regex_kind {
RegexKind::Unicode => check_regex(cx, arg, true),
RegexKind::UnicodeSet => check_set(cx, arg, true),
RegexKind::Bytes => check_regex(cx, arg, false),
RegexKind::BytesSet => check_set(cx, arg, false),
}
}

if let ExprKind::Loop(block, _, _, span) = expr.kind {
let mut visitor = RegexCompVisitor {
cx,
loop_span: span,
definitions: &self.definitions,
};

visitor.visit_block(block);
}
}
}

struct RegexCompVisitor<'pass, 'tcx> {
definitions: &'pass DefIdMap<RegexKind>,
cx: &'pass LateContext<'tcx>,
loop_span: Span,
}

impl<'pass, 'tcx> Visitor<'tcx> for RegexCompVisitor<'pass, 'tcx> {
type NestedFilter = intravisit::nested_filter::None;

fn visit_expr(&mut self, expr: &'tcx Expr<'tcx>) {
if let Some((_, fun, arg)) = extract_regex_call(self.definitions, self.cx, expr)
&& (matches!(arg.kind, ExprKind::Lit(_)) || const_str(self.cx, arg).is_some())
{
span_lint_and_help(
self.cx,
REGEX_COMPILE_IN_LOOP,
fun.span,
"compiling a regex in a loop",
Some(self.loop_span),
"move the regex construction outside this loop",
);
}

// Avoid recursing into loops, as the LateLintPass::visit_expr will do this already.
if !matches!(expr.kind, ExprKind::Loop(..)) {
intravisit::walk_expr(self, expr);
}
}
}

fn extract_regex_call<'tcx>(
definitions: &DefIdMap<RegexKind>,
cx: &LateContext<'tcx>,
expr: &'tcx Expr<'tcx>,
) -> Option<(RegexKind, &'tcx Expr<'tcx>, &'tcx Expr<'tcx>)> {
if let ExprKind::Call(fun, [arg]) = expr.kind
&& let Some(def_id) = path_def_id(cx, fun)
&& let Some(regex_kind) = definitions.get(&def_id)
{
Some((*regex_kind, fun, arg))
} else {
None
}
}

Expand Down
26 changes: 25 additions & 1 deletion tests/ui/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
clippy::needless_borrow,
clippy::needless_borrows_for_generic_args
)]
#![warn(clippy::invalid_regex, clippy::trivial_regex)]
#![warn(clippy::invalid_regex, clippy::trivial_regex, clippy::regex_compile_in_loop)]

extern crate regex;

Expand Down Expand Up @@ -118,7 +118,31 @@ fn trivial_regex() {
let _ = BRegex::new(r"\b{start}word\b{end}");
}

fn regex_compile_in_loop() {
loop {
let regex = Regex::new("a.b");
//~^ ERROR: compiling a regex in a loop
let regex = BRegex::new("a.b");
//~^ ERROR: compiling a regex in a loop

if true {
let regex = Regex::new("a.b");
//~^ ERROR: compiling a regex in a loop
}

for _ in 0..10 {
let nested_regex = Regex::new("a.b");
//~^ ERROR: compiling a regex in a loop
}
}

for i in 0..10 {
let dependant_regex = Regex::new(&format!("{i}"));
}
}

fn main() {
syntax_error();
trivial_regex();
regex_compile_in_loop();
}
52 changes: 51 additions & 1 deletion tests/ui/regex.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -195,5 +195,55 @@ LL | let binary_trivial_empty = BRegex::new("^$");
|
= help: consider using `str::is_empty`

error: aborting due to 24 previous errors
error: compiling a regex in a loop
--> tests/ui/regex.rs:123:21
|
LL | let regex = Regex::new("a.b");
| ^^^^^^^^^^
|
help: move the regex construction outside this loop
--> tests/ui/regex.rs:122:5
|
LL | loop {
| ^^^^
= note: `-D clippy::regex-compile-in-loop` implied by `-D warnings`
= help: to override `-D warnings` add `#[allow(clippy::regex_compile_in_loop)]`

error: compiling a regex in a loop
--> tests/ui/regex.rs:125:21
|
LL | let regex = BRegex::new("a.b");
| ^^^^^^^^^^^
|
help: move the regex construction outside this loop
--> tests/ui/regex.rs:122:5
|
LL | loop {
| ^^^^

error: compiling a regex in a loop
--> tests/ui/regex.rs:129:25
|
LL | let regex = Regex::new("a.b");
| ^^^^^^^^^^
|
help: move the regex construction outside this loop
--> tests/ui/regex.rs:122:5
|
LL | loop {
| ^^^^

error: compiling a regex in a loop
--> tests/ui/regex.rs:134:32
|
LL | let nested_regex = Regex::new("a.b");
| ^^^^^^^^^^
|
help: move the regex construction outside this loop
--> tests/ui/regex.rs:133:9
|
LL | for _ in 0..10 {
| ^^^^^^^^^^^^^^

error: aborting due to 28 previous errors

0 comments on commit 63c6dac

Please sign in to comment.