Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

walk: Use unbounded channels #1414

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ normpath = "1.1.1"
crossbeam-channel = "0.5.8"
clap_complete = {version = "4.4.4", optional = true}
faccess = "0.2.4"
thread_local = "1.1.7"

[patch.crates-io]
ignore = { git = "https://github.com/tavianator/ripgrep", branch = "fd" }
Expand Down
31 changes: 16 additions & 15 deletions src/exec/job.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@ use std::sync::Mutex;
use crossbeam_channel::Receiver;

use crate::config::Config;
use crate::dir_entry::DirEntry;
use crate::error::print_error;
use crate::exit_codes::{merge_exitcodes, ExitCode};
use crate::walk::WorkerResult;
use crate::walk::{WorkerMsg, WorkerResult};

use super::CommandSet;

/// An event loop that listens for inputs from the `rx` receiver. Each received input will
/// generate a command with the supplied command template. The generated command will then
/// be executed, and this process will continue until the receiver's sender has closed.
pub fn job(
rx: Receiver<WorkerResult>,
rx: Receiver<WorkerMsg<'_>>,
cmd: &CommandSet,
out_perm: &Mutex<()>,
config: &Config,
Expand All @@ -26,7 +25,8 @@ pub fn job(
loop {
// Obtain the next result from the receiver, else if the channel
// has closed, exit from the loop
let dir_entry: DirEntry = match rx.recv() {
let result = rx.recv().map(WorkerMsg::take);
let dir_entry = match result {
Ok(WorkerResult::Entry(dir_entry)) => dir_entry,
Ok(WorkerResult::Error(err)) => {
if config.show_filesystem_errors {
Expand All @@ -49,18 +49,19 @@ pub fn job(
merge_exitcodes(results)
}

pub fn batch(rx: Receiver<WorkerResult>, cmd: &CommandSet, config: &Config) -> ExitCode {
let paths = rx
.into_iter()
.filter_map(|worker_result| match worker_result {
WorkerResult::Entry(dir_entry) => Some(dir_entry.into_stripped_path(config)),
WorkerResult::Error(err) => {
if config.show_filesystem_errors {
print_error(err.to_string());
pub fn batch(rx: Receiver<WorkerMsg<'_>>, cmd: &CommandSet, config: &Config) -> ExitCode {
let paths =
rx.into_iter()
.map(WorkerMsg::take)
.filter_map(|worker_result| match worker_result {
WorkerResult::Entry(dir_entry) => Some(dir_entry.into_stripped_path(config)),
WorkerResult::Error(err) => {
if config.show_filesystem_errors {
print_error(err.to_string());
}
None
}
None
}
});
});

cmd.execute_batch(paths, config.batch_size, config.path_separator.as_deref())
}
128 changes: 110 additions & 18 deletions src/walk.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
use std::borrow::Cow;
use std::cell::OnceCell;
use std::ffi::OsStr;
use std::io::{self, Write};
use std::mem;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Condvar, Mutex};
use std::thread;
use std::time::{Duration, Instant};

use anyhow::{anyhow, Result};
use crossbeam_channel::{bounded, Receiver, RecvTimeoutError, Sender};
use crossbeam_channel::{unbounded, Receiver, RecvTimeoutError, Sender};
use etcetera::BaseStrategy;
use ignore::overrides::{Override, OverrideBuilder};
use ignore::{self, WalkBuilder, WalkParallel, WalkState};
use regex::bytes::Regex;
use thread_local::ThreadLocal;

use crate::config::Config;
use crate::dir_entry::DirEntry;
Expand Down Expand Up @@ -43,6 +45,81 @@ pub enum WorkerResult {
Error(ignore::Error),
}

/// A WorkerResult that recycles itself.
pub struct WorkerMsg<'a> {
inner: Option<Box<WorkerResult>>,
pool: &'a ResultPool,
}

impl<'a> WorkerMsg<'a> {
/// Create a new message.
fn new(inner: Box<WorkerResult>, pool: &'a ResultPool) -> Self {
Self {
inner: Some(inner),
pool,
}
}

/// Extract the result from this message.
pub fn take(mut self) -> WorkerResult {
*self.inner.take().unwrap()
}
}

impl Drop for WorkerMsg<'_> {
fn drop(&mut self) {
self.pool.recycle();
}
}

/// A pool of WorkerResults that can be recycled.
struct ResultPool {
cap: AtomicUsize,
mutex: Mutex<()>,
cv: Condvar,
}

impl ResultPool {
/// Create an empty pool.
fn new() -> Self {
// Capacity was chosen empircally to perform similarly to an unbounded channel
let cap = AtomicUsize::new(0x4000);
let mutex = Mutex::new(());
let cv = Condvar::new();

Self { cap, mutex, cv }
}

/// Try to decrement the capacity.
fn try_get(&self) -> bool {
self.cap
.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |cap| {
cap.checked_sub(1)
})
.is_ok()
}

/// Allocate or recycle a WorkerResult from the pool.
fn get(&self, result: WorkerResult) -> WorkerMsg<'_> {
if !self.try_get() {
let guard = self.mutex.lock().unwrap();
let guard = self.cv.wait_while(guard, |_| !self.try_get()).unwrap();
drop(guard);
}

WorkerMsg::new(Box::new(result), &self)
}

/// Recycle a WorkerResult.
fn recycle(&self) {
let cap = self.cap.fetch_add(1, Ordering::Relaxed);
if cap == 0 {
drop(self.mutex.lock().unwrap());
self.cv.notify_one();
}
}
}

/// Maximum size of the output buffer before flushing results to the console
const MAX_BUFFER_LENGTH: usize = 1000;
/// Default duration until output buffering switches to streaming.
Expand All @@ -56,8 +133,8 @@ struct ReceiverBuffer<'a, W> {
quit_flag: &'a AtomicBool,
/// The ^C notifier.
interrupt_flag: &'a AtomicBool,
/// Receiver for worker results.
rx: Receiver<WorkerResult>,
/// Receiver for worker messages.
rx: Receiver<WorkerMsg<'a>>,
/// Standard output.
stdout: W,
/// The current buffer mode.
Expand All @@ -72,7 +149,7 @@ struct ReceiverBuffer<'a, W> {

impl<'a, W: Write> ReceiverBuffer<'a, W> {
/// Create a new receiver buffer.
fn new(state: &'a WorkerState, rx: Receiver<WorkerResult>, stdout: W) -> Self {
fn new(state: &'a WorkerState, rx: Receiver<WorkerMsg<'a>>, stdout: W) -> Self {
let config = &state.config;
let quit_flag = state.quit_flag.as_ref();
let interrupt_flag = state.interrupt_flag.as_ref();
Expand Down Expand Up @@ -104,7 +181,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> {

/// Receive the next worker result.
fn recv(&self) -> Result<WorkerResult, RecvTimeoutError> {
match self.mode {
let result = match self.mode {
ReceiverMode::Buffering => {
// Wait at most until we should switch to streaming
self.rx.recv_deadline(self.deadline)
Expand All @@ -113,7 +190,8 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> {
// Wait however long it takes for a result
Ok(self.rx.recv()?)
}
}
};
result.map(WorkerMsg::take)
}

/// Wait for a result or state change.
Expand Down Expand Up @@ -219,18 +297,22 @@ struct WorkerState {
quit_flag: Arc<AtomicBool>,
/// Flag specifically for quitting due to ^C
interrupt_flag: Arc<AtomicBool>,
/// WorkerResult pools.
pools: ThreadLocal<ResultPool>,
}

impl WorkerState {
fn new(patterns: Vec<Regex>, config: Config) -> Self {
let quit_flag = Arc::new(AtomicBool::new(false));
let interrupt_flag = Arc::new(AtomicBool::new(false));
let pools = ThreadLocal::new();

Self {
patterns,
config,
quit_flag,
interrupt_flag,
pools,
}
}

Expand Down Expand Up @@ -319,7 +401,7 @@ impl WorkerState {

/// Run the receiver work, either on this thread or a pool of background
/// threads (for --exec).
fn receive(&self, rx: Receiver<WorkerResult>) -> ExitCode {
fn receive<'a>(&'a self, rx: Receiver<WorkerMsg<'a>>) -> ExitCode {
let config = &self.config;

// This will be set to `Some` if the `--exec` argument was supplied.
Expand Down Expand Up @@ -354,19 +436,27 @@ impl WorkerState {
}
}

/// Create a new ResultPool for a sender.
fn get_pool(&self) -> &ResultPool {
self.pools.get_or(ResultPool::new)
}

/// Spawn the sender threads.
fn spawn_senders(&self, walker: WalkParallel, tx: Sender<WorkerResult>) {
fn spawn_senders<'a>(&'a self, walker: WalkParallel, tx: Sender<WorkerMsg<'a>>) {
walker.run(|| {
let patterns = &self.patterns;
let config = &self.config;
let quit_flag = self.quit_flag.as_ref();
let tx = tx.clone();
let pool = OnceCell::new();

Box::new(move |entry| {
if quit_flag.load(Ordering::Relaxed) {
return WalkState::Quit;
}

let pool = pool.get_or_init(|| self.get_pool());

let entry = match entry {
Ok(ref e) if e.depth() == 0 => {
// Skip the root directory entry.
Expand All @@ -387,20 +477,22 @@ impl WorkerState {
DirEntry::broken_symlink(path)
}
_ => {
return match tx.send(WorkerResult::Error(ignore::Error::WithPath {
let result = pool.get(WorkerResult::Error(ignore::Error::WithPath {
path,
err: inner_err,
})) {
}));
return match tx.send(result) {
Ok(_) => WalkState::Continue,
Err(_) => WalkState::Quit,
}
};
}
},
Err(err) => {
return match tx.send(WorkerResult::Error(err)) {
let result = pool.get(WorkerResult::Error(err));
return match tx.send(result) {
Ok(_) => WalkState::Continue,
Err(_) => WalkState::Quit,
}
};
}
};

Expand Down Expand Up @@ -509,7 +601,8 @@ impl WorkerState {
}
}

let send_result = tx.send(WorkerResult::Entry(entry));
let result = pool.get(WorkerResult::Entry(entry));
let send_result = tx.send(result);

if send_result.is_err() {
return WalkState::Quit;
Expand Down Expand Up @@ -545,8 +638,7 @@ impl WorkerState {
.unwrap();
}

// Channel capacity was chosen empircally to perform similarly to an unbounded channel
let (tx, rx) = bounded(0x4000 * config.threads);
let (tx, rx) = unbounded();

let exit_code = thread::scope(|scope| {
// Spawn the receiver thread(s)
Expand Down