From 2e2a756d7dd8c4f3d4798670cdc0e36f4a75d2cd Mon Sep 17 00:00:00 2001 From: sharkdp Date: Sun, 15 Sep 2019 15:37:08 +0200 Subject: [PATCH] Implement glob-based searches closes #284 --- Cargo.lock | 1 + Cargo.toml | 1 + src/app.rs | 9 ++++++ src/internal/mod.rs | 10 ++++-- src/main.rs | 13 ++++++-- tests/tests.rs | 74 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 104 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9f14a4bff..031294a1e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -104,6 +104,7 @@ dependencies = [ "ctrlc 3.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "diff 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", "filetime 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "globset 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", "humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "ignore 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 11ec364fc..9a289020f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ regex-syntax = "0.6" ctrlc = "3.1" humantime = "1.1.1" lscolors = "0.5" +globset = "0.4" [dependencies.clap] version = "2.31.2" diff --git a/src/app.rs b/src/app.rs index 65678f8e3..2fbae23f1 100644 --- a/src/app.rs +++ b/src/app.rs @@ -67,6 +67,12 @@ pub fn build_app() -> App<'static, 'static> { .short("i") .overrides_with("case-sensitive"), ) + .arg( + arg("glob") + .long("glob") + .short("g") + .conflicts_with("fixed-strings"), + ) .arg( arg("fixed-strings") .long("fixed-strings") @@ -250,6 +256,9 @@ fn usage() -> HashMap<&'static str, Help> { , "Case-insensitive search (default: smart case)" , "Perform a case-insensitive search. By default, fd uses case-insensitive searches, \ unless the pattern contains an uppercase character (smart case)."); + doc!(h, "glob" + , "Glob-based search (default: regular expression)" + , "Perform a glob-based search instead of a regular expression search."); doc!(h, "fixed-strings" , "Treat the pattern as a literal string" , "Treat the pattern as a literal string instead of a regular expression."); diff --git a/src/internal/mod.rs b/src/internal/mod.rs index 1f09b44a7..ec7d1fad6 100644 --- a/src/internal/mod.rs +++ b/src/internal/mod.rs @@ -10,7 +10,7 @@ use std::borrow::Cow; use std::ffi::{OsStr, OsString}; use regex_syntax::hir::Hir; -use regex_syntax::Parser; +use regex_syntax::ParserBuilder; pub use self::file_types::FileTypes; @@ -47,7 +47,9 @@ pub fn osstr_to_bytes(input: &OsStr) -> Cow<[u8]> { /// Determine if a regex pattern contains a literal uppercase character. pub fn pattern_has_uppercase_char(pattern: &str) -> bool { - Parser::new() + let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build(); + + parser .parse(pattern) .map(|hir| hir_has_uppercase_char(&hir)) .unwrap_or(false) @@ -59,9 +61,13 @@ fn hir_has_uppercase_char(hir: &Hir) -> bool { match *hir.kind() { HirKind::Literal(Literal::Unicode(c)) => c.is_uppercase(), + HirKind::Literal(Literal::Byte(b)) => char::from(b).is_uppercase(), HirKind::Class(Class::Unicode(ref ranges)) => ranges .iter() .any(|r| r.start().is_uppercase() || r.end().is_uppercase()), + HirKind::Class(Class::Bytes(ref ranges)) => ranges + .iter() + .any(|r| char::from(r.start()).is_uppercase() || char::from(r.end()).is_uppercase()), HirKind::Group(Group { ref hir, .. }) | HirKind::Repetition(Repetition { ref hir, .. }) => { hir_has_uppercase_char(hir) } diff --git a/src/main.rs b/src/main.rs index 8b0132745..c4af1ef81 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,6 +24,7 @@ use std::sync::Arc; use std::time; use atty::Stream; +use globset::Glob; use lscolors::LsColors; use regex::bytes::{RegexBuilder, RegexSetBuilder}; @@ -96,8 +97,16 @@ fn main() { ); } - // Treat pattern as literal string if '--fixed-strings' is used - let pattern_regex = if matches.is_present("fixed-strings") { + let pattern_regex = if matches.is_present("glob") { + let glob = match Glob::new(pattern) { + Ok(glob) => glob, + Err(e) => { + print_error_and_exit!("{}", e); + } + }; + glob.regex().to_owned() + } else if matches.is_present("fixed-strings") { + // Treat pattern as literal string if '--fixed-strings' is used regex::escape(pattern) } else { String::from(pattern) diff --git a/tests/tests.rs b/tests/tests.rs index 86ca719fc..0503ccf6d 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -239,6 +239,80 @@ fn test_case_insensitive() { ); } +/// Glob-based searches (--glob) +#[test] +fn test_glob_searches() { + let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); + + te.assert_output( + &["--glob", "*.foo"], + "a.foo + one/b.foo + one/two/c.foo + one/two/three/d.foo", + ); + + te.assert_output( + &["--glob", "[a-c].foo"], + "a.foo + one/b.foo + one/two/c.foo", + ); + + te.assert_output( + &["--glob", "[a-c].foo*"], + "a.foo + one/b.foo + one/two/C.Foo2 + one/two/c.foo", + ); +} + +/// Glob-based searches (--glob) in combination with full path searches (--full-path) +#[test] +fn test_full_path_glob_searches() { + let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); + + te.assert_output( + &["--glob", "--full-path", "**/one/**/*.foo"], + "one/b.foo + one/two/c.foo + one/two/three/d.foo", + ); +} + +#[test] +fn test_smart_case_glob_searches() { + let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); + + te.assert_output( + &["--glob", "c.foo*"], + "one/two/C.Foo2 + one/two/c.foo", + ); + + te.assert_output(&["--glob", "C.Foo*"], "one/two/C.Foo2"); +} + +/// Glob-based searches (--glob) in combination with --case-sensitive +#[test] +fn test_case_sensitive_glob_searches() { + let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); + + te.assert_output(&["--glob", "--case-sensitive", "c.foo*"], "one/two/c.foo"); +} + +/// Glob-based searches (--glob) in combination with --extension +#[test] +fn test_glob_searches_with_extension() { + let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); + + te.assert_output( + &["--glob", "--extension", "foo2", "[a-z].*"], + "one/two/C.Foo2", + ); +} + /// Full path search (--full-path) #[test] fn test_full_path() {