1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
use std::{
io::{self, Read},
process,
};
/// An error that can occur while running a command and reading its output.
///
/// This error can be seamlessly converted to an `io::Error` via a `From`
/// implementation.
#[derive(Debug)]
pub struct CommandError {
kind: CommandErrorKind,
}
#[derive(Debug)]
enum CommandErrorKind {
Io(io::Error),
Stderr(Vec<u8>),
}
impl CommandError {
/// Create an error from an I/O error.
pub(crate) fn io(ioerr: io::Error) -> CommandError {
CommandError { kind: CommandErrorKind::Io(ioerr) }
}
/// Create an error from the contents of stderr (which may be empty).
pub(crate) fn stderr(bytes: Vec<u8>) -> CommandError {
CommandError { kind: CommandErrorKind::Stderr(bytes) }
}
/// Returns true if and only if this error has empty data from stderr.
pub(crate) fn is_empty(&self) -> bool {
match self.kind {
CommandErrorKind::Stderr(ref bytes) => bytes.is_empty(),
_ => false,
}
}
}
impl std::error::Error for CommandError {}
impl std::fmt::Display for CommandError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.kind {
CommandErrorKind::Io(ref e) => e.fmt(f),
CommandErrorKind::Stderr(ref bytes) => {
let msg = String::from_utf8_lossy(bytes);
if msg.trim().is_empty() {
write!(f, "<stderr is empty>")
} else {
let div = "-".repeat(79);
write!(
f,
"\n{div}\n{msg}\n{div}",
div = div,
msg = msg.trim()
)
}
}
}
}
}
impl From<io::Error> for CommandError {
fn from(ioerr: io::Error) -> CommandError {
CommandError { kind: CommandErrorKind::Io(ioerr) }
}
}
impl From<CommandError> for io::Error {
fn from(cmderr: CommandError) -> io::Error {
match cmderr.kind {
CommandErrorKind::Io(ioerr) => ioerr,
CommandErrorKind::Stderr(_) => {
io::Error::new(io::ErrorKind::Other, cmderr)
}
}
}
}
/// Configures and builds a streaming reader for process output.
#[derive(Clone, Debug, Default)]
pub struct CommandReaderBuilder {
async_stderr: bool,
}
impl CommandReaderBuilder {
/// Create a new builder with the default configuration.
pub fn new() -> CommandReaderBuilder {
CommandReaderBuilder::default()
}
/// Build a new streaming reader for the given command's output.
///
/// The caller should set everything that's required on the given command
/// before building a reader, such as its arguments, environment and
/// current working directory. Settings such as the stdout and stderr (but
/// not stdin) pipes will be overridden so that they can be controlled by
/// the reader.
///
/// If there was a problem spawning the given command, then its error is
/// returned.
pub fn build(
&self,
command: &mut process::Command,
) -> Result<CommandReader, CommandError> {
let mut child = command
.stdout(process::Stdio::piped())
.stderr(process::Stdio::piped())
.spawn()?;
let stderr = if self.async_stderr {
StderrReader::r#async(child.stderr.take().unwrap())
} else {
StderrReader::sync(child.stderr.take().unwrap())
};
Ok(CommandReader { child, stderr, eof: false })
}
/// When enabled, the reader will asynchronously read the contents of the
/// command's stderr output. When disabled, stderr is only read after the
/// stdout stream has been exhausted (or if the process quits with an error
/// code).
///
/// Note that when enabled, this may require launching an additional
/// thread in order to read stderr. This is done so that the process being
/// executed is never blocked from writing to stdout or stderr. If this is
/// disabled, then it is possible for the process to fill up the stderr
/// buffer and deadlock.
///
/// This is enabled by default.
pub fn async_stderr(&mut self, yes: bool) -> &mut CommandReaderBuilder {
self.async_stderr = yes;
self
}
}
/// A streaming reader for a command's output.
///
/// The purpose of this reader is to provide an easy way to execute processes
/// whose stdout is read in a streaming way while also making the processes'
/// stderr available when the process fails with an exit code. This makes it
/// possible to execute processes while surfacing the underlying failure mode
/// in the case of an error.
///
/// Moreover, by default, this reader will asynchronously read the processes'
/// stderr. This prevents subtle deadlocking bugs for noisy processes that
/// write a lot to stderr. Currently, the entire contents of stderr is read
/// on to the heap.
///
/// # Example
///
/// This example shows how to invoke `gzip` to decompress the contents of a
/// file. If the `gzip` command reports a failing exit status, then its stderr
/// is returned as an error.
///
/// ```no_run
/// use std::{io::Read, process::Command};
///
/// use grep_cli::CommandReader;
///
/// let mut cmd = Command::new("gzip");
/// cmd.arg("-d").arg("-c").arg("/usr/share/man/man1/ls.1.gz");
///
/// let mut rdr = CommandReader::new(&mut cmd)?;
/// let mut contents = vec![];
/// rdr.read_to_end(&mut contents)?;
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
#[derive(Debug)]
pub struct CommandReader {
child: process::Child,
stderr: StderrReader,
/// This is set to true once 'read' returns zero bytes. When this isn't
/// set and we close the reader, then we anticipate a pipe error when
/// reaping the child process and silence it.
eof: bool,
}
impl CommandReader {
/// Create a new streaming reader for the given command using the default
/// configuration.
///
/// The caller should set everything that's required on the given command
/// before building a reader, such as its arguments, environment and
/// current working directory. Settings such as the stdout and stderr (but
/// not stdin) pipes will be overridden so that they can be controlled by
/// the reader.
///
/// If there was a problem spawning the given command, then its error is
/// returned.
///
/// If the caller requires additional configuration for the reader
/// returned, then use [`CommandReaderBuilder`].
pub fn new(
cmd: &mut process::Command,
) -> Result<CommandReader, CommandError> {
CommandReaderBuilder::new().build(cmd)
}
/// Closes the CommandReader, freeing any resources used by its underlying
/// child process. If the child process exits with a nonzero exit code, the
/// returned Err value will include its stderr.
///
/// `close` is idempotent, meaning it can be safely called multiple times.
/// The first call closes the CommandReader and any subsequent calls do
/// nothing.
///
/// This method should be called after partially reading a file to prevent
/// resource leakage. However there is no need to call `close` explicitly
/// if your code always calls `read` to EOF, as `read` takes care of
/// calling `close` in this case.
///
/// `close` is also called in `drop` as a last line of defense against
/// resource leakage. Any error from the child process is then printed as a
/// warning to stderr. This can be avoided by explicitly calling `close`
/// before the CommandReader is dropped.
pub fn close(&mut self) -> io::Result<()> {
// Dropping stdout closes the underlying file descriptor, which should
// cause a well-behaved child process to exit. If child.stdout is None
// we assume that close() has already been called and do nothing.
let stdout = match self.child.stdout.take() {
None => return Ok(()),
Some(stdout) => stdout,
};
drop(stdout);
if self.child.wait()?.success() {
Ok(())
} else {
let err = self.stderr.read_to_end();
// In the specific case where we haven't consumed the full data
// from the child process, then closing stdout above results in
// a pipe signal being thrown in most cases. But I don't think
// there is any reliable and portable way of detecting it. Instead,
// if we know we haven't hit EOF (so we anticipate a broken pipe
// error) and if stderr otherwise doesn't have anything on it, then
// we assume total success.
if !self.eof && err.is_empty() {
return Ok(());
}
Err(io::Error::from(err))
}
}
}
impl Drop for CommandReader {
fn drop(&mut self) {
if let Err(error) = self.close() {
log::warn!("{}", error);
}
}
}
impl io::Read for CommandReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let stdout = match self.child.stdout {
None => return Ok(0),
Some(ref mut stdout) => stdout,
};
let nread = stdout.read(buf)?;
if nread == 0 {
self.eof = true;
self.close().map(|_| 0)
} else {
Ok(nread)
}
}
}
/// A reader that encapsulates the asynchronous or synchronous reading of
/// stderr.
#[derive(Debug)]
enum StderrReader {
Async(Option<std::thread::JoinHandle<CommandError>>),
Sync(process::ChildStderr),
}
impl StderrReader {
/// Create a reader for stderr that reads contents asynchronously.
fn r#async(mut stderr: process::ChildStderr) -> StderrReader {
let handle =
std::thread::spawn(move || stderr_to_command_error(&mut stderr));
StderrReader::Async(Some(handle))
}
/// Create a reader for stderr that reads contents synchronously.
fn sync(stderr: process::ChildStderr) -> StderrReader {
StderrReader::Sync(stderr)
}
/// Consumes all of stderr on to the heap and returns it as an error.
///
/// If there was a problem reading stderr itself, then this returns an I/O
/// command error.
fn read_to_end(&mut self) -> CommandError {
match *self {
StderrReader::Async(ref mut handle) => {
let handle = handle
.take()
.expect("read_to_end cannot be called more than once");
handle.join().expect("stderr reading thread does not panic")
}
StderrReader::Sync(ref mut stderr) => {
stderr_to_command_error(stderr)
}
}
}
}
fn stderr_to_command_error(stderr: &mut process::ChildStderr) -> CommandError {
let mut bytes = vec![];
match stderr.read_to_end(&mut bytes) {
Ok(_) => CommandError::stderr(bytes),
Err(err) => CommandError::io(err),
}
}