walkdir/
lib.rs

1/*!
2Crate `walkdir` provides an efficient and cross platform implementation
3of recursive directory traversal. Several options are exposed to control
4iteration, such as whether to follow symbolic links (default off), limit the
5maximum number of simultaneous open file descriptors and the ability to
6efficiently skip descending into directories.
7
8To use this crate, add `walkdir` as a dependency to your project's
9`Cargo.toml`:
10
11```toml
12[dependencies]
13walkdir = "2"
14```
15
16# From the top
17
18The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values
19yielded by the iterator. Finally, the [`Error`] type is a small wrapper around
20[`std::io::Error`] with additional information, such as if a loop was detected
21while following symbolic links (not enabled by default).
22
23[`WalkDir`]: struct.WalkDir.html
24[`DirEntry`]: struct.DirEntry.html
25[`Error`]: struct.Error.html
26[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
27
28# Example
29
30The following code recursively iterates over the directory given and prints
31the path for each entry:
32
33```no_run
34use walkdir::WalkDir;
35# use walkdir::Error;
36
37# fn try_main() -> Result<(), Error> {
38for entry in WalkDir::new("foo") {
39    println!("{}", entry?.path().display());
40}
41# Ok(())
42# }
43```
44
45Or, if you'd like to iterate over all entries and ignore any errors that
46may arise, use [`filter_map`]. (e.g., This code below will silently skip
47directories that the owner of the running process does not have permission to
48access.)
49
50```no_run
51use walkdir::WalkDir;
52
53for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) {
54    println!("{}", entry.path().display());
55}
56```
57
58[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map
59
60# Example: follow symbolic links
61
62The same code as above, except [`follow_links`] is enabled:
63
64```no_run
65use walkdir::WalkDir;
66# use walkdir::Error;
67
68# fn try_main() -> Result<(), Error> {
69for entry in WalkDir::new("foo").follow_links(true) {
70    println!("{}", entry?.path().display());
71}
72# Ok(())
73# }
74```
75
76[`follow_links`]: struct.WalkDir.html#method.follow_links
77
78# Example: skip hidden files and directories on unix
79
80This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files
81and directories efficiently (i.e. without recursing into hidden directories):
82
83```no_run
84use walkdir::{DirEntry, WalkDir};
85# use walkdir::Error;
86
87fn is_hidden(entry: &DirEntry) -> bool {
88    entry.file_name()
89         .to_str()
90         .map(|s| s.starts_with("."))
91         .unwrap_or(false)
92}
93
94# fn try_main() -> Result<(), Error> {
95let walker = WalkDir::new("foo").into_iter();
96for entry in walker.filter_entry(|e| !is_hidden(e)) {
97    println!("{}", entry?.path().display());
98}
99# Ok(())
100# }
101```
102
103[`filter_entry`]: struct.IntoIter.html#method.filter_entry
104*/
105
106#![deny(missing_docs)]
107#![allow(unknown_lints)]
108
109#[cfg(doctest)]
110doc_comment::doctest!("../README.md");
111
112use std::cmp::{min, Ordering};
113use std::fmt;
114use std::fs::{self, ReadDir};
115use std::io;
116use std::iter;
117use std::path::{Path, PathBuf};
118use std::result;
119use std::vec;
120
121use same_file::Handle;
122
123pub use crate::dent::DirEntry;
124#[cfg(unix)]
125pub use crate::dent::DirEntryExt;
126pub use crate::error::Error;
127
128mod dent;
129mod error;
130#[cfg(test)]
131mod tests;
132mod util;
133
134/// Like try, but for iterators that return [`Option<Result<_, _>>`].
135///
136/// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
137macro_rules! itry {
138    ($e:expr) => {
139        match $e {
140            Ok(v) => v,
141            Err(err) => return Some(Err(From::from(err))),
142        }
143    };
144}
145
146/// A result type for walkdir operations.
147///
148/// Note that this result type embeds the error type in this crate. This
149/// is only useful if you care about the additional information provided by
150/// the error (such as the path associated with the error or whether a loop
151/// was dectected). If you want things to Just Work, then you can use
152/// [`io::Result`] instead since the error type in this package will
153/// automatically convert to an [`io::Result`] when using the [`try!`] macro.
154///
155/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html
156/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html
157pub type Result<T> = ::std::result::Result<T, Error>;
158
159/// A builder to create an iterator for recursively walking a directory.
160///
161/// Results are returned in depth first fashion, with directories yielded
162/// before their contents. If [`contents_first`] is true, contents are yielded
163/// before their directories. The order is unspecified but if [`sort_by`] is
164/// given, directory entries are sorted according to this function. Directory
165/// entries `.` and `..` are always omitted.
166///
167/// If an error occurs at any point during iteration, then it is returned in
168/// place of its corresponding directory entry and iteration continues as
169/// normal. If an error occurs while opening a directory for reading, then it
170/// is not descended into (but the error is still yielded by the iterator).
171/// Iteration may be stopped at any time. When the iterator is destroyed, all
172/// resources associated with it are freed.
173///
174/// [`contents_first`]: struct.WalkDir.html#method.contents_first
175/// [`sort_by`]: struct.WalkDir.html#method.sort_by
176///
177/// # Usage
178///
179/// This type implements [`IntoIterator`] so that it may be used as the subject
180/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want
181/// to use iterator adapters such as [`filter_entry`].
182///
183/// Idiomatic use of this type should use method chaining to set desired
184/// options. For example, this only shows entries with a depth of `1`, `2` or
185/// `3` (relative to `foo`):
186///
187/// ```no_run
188/// use walkdir::WalkDir;
189/// # use walkdir::Error;
190///
191/// # fn try_main() -> Result<(), Error> {
192/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) {
193///     println!("{}", entry?.path().display());
194/// }
195/// # Ok(())
196/// # }
197/// ```
198///
199/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html
200/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter
201/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry
202///
203/// Note that the iterator by default includes the top-most directory. Since
204/// this is the only directory yielded with depth `0`, it is easy to ignore it
205/// with the [`min_depth`] setting:
206///
207/// ```no_run
208/// use walkdir::WalkDir;
209/// # use walkdir::Error;
210///
211/// # fn try_main() -> Result<(), Error> {
212/// for entry in WalkDir::new("foo").min_depth(1) {
213///     println!("{}", entry?.path().display());
214/// }
215/// # Ok(())
216/// # }
217/// ```
218///
219/// [`min_depth`]: struct.WalkDir.html#method.min_depth
220///
221/// This will only return descendents of the `foo` directory and not `foo`
222/// itself.
223///
224/// # Loops
225///
226/// This iterator (like most/all recursive directory iterators) assumes that
227/// no loops can be made with *hard* links on your file system. In particular,
228/// this would require creating a hard link to a directory such that it creates
229/// a loop. On most platforms, this operation is illegal.
230///
231/// Note that when following symbolic/soft links, loops are detected and an
232/// error is reported.
233#[derive(Debug)]
234pub struct WalkDir {
235    opts: WalkDirOptions,
236    root: PathBuf,
237}
238
239struct WalkDirOptions {
240    follow_links: bool,
241    follow_root_links: bool,
242    max_open: usize,
243    min_depth: usize,
244    max_depth: usize,
245    sorter: Option<
246        Box<
247            dyn FnMut(&DirEntry, &DirEntry) -> Ordering
248                + Send
249                + Sync
250                + 'static,
251        >,
252    >,
253    contents_first: bool,
254    same_file_system: bool,
255}
256
257impl fmt::Debug for WalkDirOptions {
258    fn fmt(
259        &self,
260        f: &mut fmt::Formatter<'_>,
261    ) -> result::Result<(), fmt::Error> {
262        let sorter_str = if self.sorter.is_some() {
263            // FnMut isn't `Debug`
264            "Some(...)"
265        } else {
266            "None"
267        };
268        f.debug_struct("WalkDirOptions")
269            .field("follow_links", &self.follow_links)
270            .field("follow_root_link", &self.follow_root_links)
271            .field("max_open", &self.max_open)
272            .field("min_depth", &self.min_depth)
273            .field("max_depth", &self.max_depth)
274            .field("sorter", &sorter_str)
275            .field("contents_first", &self.contents_first)
276            .field("same_file_system", &self.same_file_system)
277            .finish()
278    }
279}
280
281impl WalkDir {
282    /// Create a builder for a recursive directory iterator starting at the
283    /// file path `root`. If `root` is a directory, then it is the first item
284    /// yielded by the iterator. If `root` is a file, then it is the first
285    /// and only item yielded by the iterator. If `root` is a symlink, then it
286    /// is always followed for the purposes of directory traversal. (A root
287    /// `DirEntry` still obeys its documentation with respect to symlinks and
288    /// the `follow_links` setting.)
289    pub fn new<P: AsRef<Path>>(root: P) -> Self {
290        WalkDir {
291            opts: WalkDirOptions {
292                follow_links: false,
293                follow_root_links: true,
294                max_open: 10,
295                min_depth: 0,
296                max_depth: ::std::usize::MAX,
297                sorter: None,
298                contents_first: false,
299                same_file_system: false,
300            },
301            root: root.as_ref().to_path_buf(),
302        }
303    }
304
305    /// Set the minimum depth of entries yielded by the iterator.
306    ///
307    /// The smallest depth is `0` and always corresponds to the path given
308    /// to the `new` function on this type. Its direct descendents have depth
309    /// `1`, and their descendents have depth `2`, and so on.
310    pub fn min_depth(mut self, depth: usize) -> Self {
311        self.opts.min_depth = depth;
312        if self.opts.min_depth > self.opts.max_depth {
313            self.opts.min_depth = self.opts.max_depth;
314        }
315        self
316    }
317
318    /// Set the maximum depth of entries yield by the iterator.
319    ///
320    /// The smallest depth is `0` and always corresponds to the path given
321    /// to the `new` function on this type. Its direct descendents have depth
322    /// `1`, and their descendents have depth `2`, and so on.
323    ///
324    /// Note that this will not simply filter the entries of the iterator, but
325    /// it will actually avoid descending into directories when the depth is
326    /// exceeded.
327    pub fn max_depth(mut self, depth: usize) -> Self {
328        self.opts.max_depth = depth;
329        if self.opts.max_depth < self.opts.min_depth {
330            self.opts.max_depth = self.opts.min_depth;
331        }
332        self
333    }
334
335    /// Follow symbolic links. By default, this is disabled.
336    ///
337    /// When `yes` is `true`, symbolic links are followed as if they were
338    /// normal directories and files. If a symbolic link is broken or is
339    /// involved in a loop, an error is yielded.
340    ///
341    /// When enabled, the yielded [`DirEntry`] values represent the target of
342    /// the link while the path corresponds to the link. See the [`DirEntry`]
343    /// type for more details.
344    ///
345    /// [`DirEntry`]: struct.DirEntry.html
346    pub fn follow_links(mut self, yes: bool) -> Self {
347        self.opts.follow_links = yes;
348        self
349    }
350
351    /// Follow symbolic links if these are the root of the traversal.
352    /// By default, this is enabled.
353    ///
354    /// When `yes` is `true`, symbolic links on root paths are followed
355    /// which is effective if the symbolic link points to a directory.
356    /// If a symbolic link is broken or is involved in a loop, an error is yielded
357    /// as the first entry of the traversal.
358    ///
359    /// When enabled, the yielded [`DirEntry`] values represent the target of
360    /// the link while the path corresponds to the link. See the [`DirEntry`]
361    /// type for more details, and all future entries will be contained within
362    /// the resolved directory behind the symbolic link of the root path.
363    ///
364    /// [`DirEntry`]: struct.DirEntry.html
365    pub fn follow_root_links(mut self, yes: bool) -> Self {
366        self.opts.follow_root_links = yes;
367        self
368    }
369
370    /// Set the maximum number of simultaneously open file descriptors used
371    /// by the iterator.
372    ///
373    /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set
374    /// to `1` automatically. If this is not set, then it defaults to some
375    /// reasonably low number.
376    ///
377    /// This setting has no impact on the results yielded by the iterator
378    /// (even when `n` is `1`). Instead, this setting represents a trade off
379    /// between scarce resources (file descriptors) and memory. Namely, when
380    /// the maximum number of file descriptors is reached and a new directory
381    /// needs to be opened to continue iteration, then a previous directory
382    /// handle is closed and has its unyielded entries stored in memory. In
383    /// practice, this is a satisfying trade off because it scales with respect
384    /// to the *depth* of your file tree. Therefore, low values (even `1`) are
385    /// acceptable.
386    ///
387    /// Note that this value does not impact the number of system calls made by
388    /// an exhausted iterator.
389    ///
390    /// # Platform behavior
391    ///
392    /// On Windows, if `follow_links` is enabled, then this limit is not
393    /// respected. In particular, the maximum number of file descriptors opened
394    /// is proportional to the depth of the directory tree traversed.
395    pub fn max_open(mut self, mut n: usize) -> Self {
396        if n == 0 {
397            n = 1;
398        }
399        self.opts.max_open = n;
400        self
401    }
402
403    /// Set a function for sorting directory entries with a comparator
404    /// function.
405    ///
406    /// If a compare function is set, the resulting iterator will return all
407    /// paths in sorted order. The compare function will be called to compare
408    /// entries from the same directory.
409    ///
410    /// ```rust,no_run
411    /// use std::cmp;
412    /// use std::ffi::OsString;
413    /// use walkdir::WalkDir;
414    ///
415    /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name()));
416    /// ```
417    pub fn sort_by<F>(mut self, cmp: F) -> Self
418    where
419        F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,
420    {
421        self.opts.sorter = Some(Box::new(cmp));
422        self
423    }
424
425    /// Set a function for sorting directory entries with a key extraction
426    /// function.
427    ///
428    /// If a compare function is set, the resulting iterator will return all
429    /// paths in sorted order. The compare function will be called to compare
430    /// entries from the same directory.
431    ///
432    /// ```rust,no_run
433    /// use std::cmp;
434    /// use std::ffi::OsString;
435    /// use walkdir::WalkDir;
436    ///
437    /// WalkDir::new("foo").sort_by_key(|a| a.file_name().to_owned());
438    /// ```
439    pub fn sort_by_key<K, F>(self, mut cmp: F) -> Self
440    where
441        F: FnMut(&DirEntry) -> K + Send + Sync + 'static,
442        K: Ord,
443    {
444        self.sort_by(move |a, b| cmp(a).cmp(&cmp(b)))
445    }
446
447    /// Sort directory entries by file name, to ensure a deterministic order.
448    ///
449    /// This is a convenience function for calling `Self::sort_by()`.
450    ///
451    /// ```rust,no_run
452    /// use walkdir::WalkDir;
453    ///
454    /// WalkDir::new("foo").sort_by_file_name();
455    /// ```
456    pub fn sort_by_file_name(self) -> Self {
457        self.sort_by(|a, b| a.file_name().cmp(b.file_name()))
458    }
459
460    /// Yield a directory's contents before the directory itself. By default,
461    /// this is disabled.
462    ///
463    /// When `yes` is `false` (as is the default), the directory is yielded
464    /// before its contents are read. This is useful when, e.g. you want to
465    /// skip processing of some directories.
466    ///
467    /// When `yes` is `true`, the iterator yields the contents of a directory
468    /// before yielding the directory itself. This is useful when, e.g. you
469    /// want to recursively delete a directory.
470    ///
471    /// # Example
472    ///
473    /// Assume the following directory tree:
474    ///
475    /// ```text
476    /// foo/
477    ///   abc/
478    ///     qrs
479    ///     tuv
480    ///   def/
481    /// ```
482    ///
483    /// With contents_first disabled (the default), the following code visits
484    /// the directory tree in depth-first order:
485    ///
486    /// ```no_run
487    /// use walkdir::WalkDir;
488    ///
489    /// for entry in WalkDir::new("foo") {
490    ///     let entry = entry.unwrap();
491    ///     println!("{}", entry.path().display());
492    /// }
493    ///
494    /// // foo
495    /// // foo/abc
496    /// // foo/abc/qrs
497    /// // foo/abc/tuv
498    /// // foo/def
499    /// ```
500    ///
501    /// With contents_first enabled:
502    ///
503    /// ```no_run
504    /// use walkdir::WalkDir;
505    ///
506    /// for entry in WalkDir::new("foo").contents_first(true) {
507    ///     let entry = entry.unwrap();
508    ///     println!("{}", entry.path().display());
509    /// }
510    ///
511    /// // foo/abc/qrs
512    /// // foo/abc/tuv
513    /// // foo/abc
514    /// // foo/def
515    /// // foo
516    /// ```
517    pub fn contents_first(mut self, yes: bool) -> Self {
518        self.opts.contents_first = yes;
519        self
520    }
521
522    /// Do not cross file system boundaries.
523    ///
524    /// When this option is enabled, directory traversal will not descend into
525    /// directories that are on a different file system from the root path.
526    ///
527    /// Currently, this option is only supported on Unix and Windows. If this
528    /// option is used on an unsupported platform, then directory traversal
529    /// will immediately return an error and will not yield any entries.
530    pub fn same_file_system(mut self, yes: bool) -> Self {
531        self.opts.same_file_system = yes;
532        self
533    }
534}
535
536impl IntoIterator for WalkDir {
537    type Item = Result<DirEntry>;
538    type IntoIter = IntoIter;
539
540    fn into_iter(self) -> IntoIter {
541        IntoIter {
542            opts: self.opts,
543            start: Some(self.root),
544            stack_list: vec![],
545            stack_path: vec![],
546            oldest_opened: 0,
547            depth: 0,
548            deferred_dirs: vec![],
549            root_device: None,
550        }
551    }
552}
553
554/// An iterator for recursively descending into a directory.
555///
556/// A value with this type must be constructed with the [`WalkDir`] type, which
557/// uses a builder pattern to set options such as min/max depth, max open file
558/// descriptors and whether the iterator should follow symbolic links. After
559/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain.
560///
561/// The order of elements yielded by this iterator is unspecified.
562///
563/// [`WalkDir`]: struct.WalkDir.html
564/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
565#[derive(Debug)]
566pub struct IntoIter {
567    /// Options specified in the builder. Depths, max fds, etc.
568    opts: WalkDirOptions,
569    /// The start path.
570    ///
571    /// This is only `Some(...)` at the beginning. After the first iteration,
572    /// this is always `None`.
573    start: Option<PathBuf>,
574    /// A stack of open (up to max fd) or closed handles to directories.
575    /// An open handle is a plain [`fs::ReadDir`] while a closed handle is
576    /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries.
577    ///
578    /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
579    stack_list: Vec<DirList>,
580    /// A stack of file paths.
581    ///
582    /// This is *only* used when [`follow_links`] is enabled. In all other
583    /// cases this stack is empty.
584    ///
585    /// [`follow_links`]: struct.WalkDir.html#method.follow_links
586    stack_path: Vec<Ancestor>,
587    /// An index into `stack_list` that points to the oldest open directory
588    /// handle. If the maximum fd limit is reached and a new directory needs to
589    /// be read, the handle at this index is closed before the new directory is
590    /// opened.
591    oldest_opened: usize,
592    /// The current depth of iteration (the length of the stack at the
593    /// beginning of each iteration).
594    depth: usize,
595    /// A list of DirEntries corresponding to directories, that are
596    /// yielded after their contents has been fully yielded. This is only
597    /// used when `contents_first` is enabled.
598    deferred_dirs: Vec<DirEntry>,
599    /// The device of the root file path when the first call to `next` was
600    /// made.
601    ///
602    /// If the `same_file_system` option isn't enabled, then this is always
603    /// `None`. Conversely, if it is enabled, this is always `Some(...)` after
604    /// handling the root path.
605    root_device: Option<u64>,
606}
607
608/// An ancestor is an item in the directory tree traversed by walkdir, and is
609/// used to check for loops in the tree when traversing symlinks.
610#[derive(Debug)]
611struct Ancestor {
612    /// The path of this ancestor.
613    path: PathBuf,
614    /// An open file to this ancesor. This is only used on Windows where
615    /// opening a file handle appears to be quite expensive, so we choose to
616    /// cache it. This comes at the cost of not respecting the file descriptor
617    /// limit set by the user.
618    #[cfg(windows)]
619    handle: Handle,
620}
621
622impl Ancestor {
623    /// Create a new ancestor from the given directory path.
624    #[cfg(windows)]
625    fn new(dent: &DirEntry) -> io::Result<Ancestor> {
626        let handle = Handle::from_path(dent.path())?;
627        Ok(Ancestor { path: dent.path().to_path_buf(), handle })
628    }
629
630    /// Create a new ancestor from the given directory path.
631    #[cfg(not(windows))]
632    fn new(dent: &DirEntry) -> io::Result<Ancestor> {
633        Ok(Ancestor { path: dent.path().to_path_buf() })
634    }
635
636    /// Returns true if and only if the given open file handle corresponds to
637    /// the same directory as this ancestor.
638    #[cfg(windows)]
639    fn is_same(&self, child: &Handle) -> io::Result<bool> {
640        Ok(child == &self.handle)
641    }
642
643    /// Returns true if and only if the given open file handle corresponds to
644    /// the same directory as this ancestor.
645    #[cfg(not(windows))]
646    fn is_same(&self, child: &Handle) -> io::Result<bool> {
647        Ok(child == &Handle::from_path(&self.path)?)
648    }
649}
650
651/// A sequence of unconsumed directory entries.
652///
653/// This represents the opened or closed state of a directory handle. When
654/// open, future entries are read by iterating over the raw `fs::ReadDir`.
655/// When closed, all future entries are read into memory. Iteration then
656/// proceeds over a [`Vec<fs::DirEntry>`].
657///
658/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
659/// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html
660#[derive(Debug)]
661enum DirList {
662    /// An opened handle.
663    ///
664    /// This includes the depth of the handle itself.
665    ///
666    /// If there was an error with the initial [`fs::read_dir`] call, then it
667    /// is stored here. (We use an [`Option<...>`] to make yielding the error
668    /// exactly once simpler.)
669    ///
670    /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
671    /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
672    Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> },
673    /// A closed handle.
674    ///
675    /// All remaining directory entries are read into memory.
676    Closed(vec::IntoIter<Result<DirEntry>>),
677}
678
679impl Iterator for IntoIter {
680    type Item = Result<DirEntry>;
681    /// Advances the iterator and returns the next value.
682    ///
683    /// # Errors
684    ///
685    /// If the iterator fails to retrieve the next value, this method returns
686    /// an error value. The error will be wrapped in an Option::Some.
687    fn next(&mut self) -> Option<Result<DirEntry>> {
688        if let Some(start) = self.start.take() {
689            if self.opts.same_file_system {
690                let result = util::device_num(&start)
691                    .map_err(|e| Error::from_path(0, start.clone(), e));
692                self.root_device = Some(itry!(result));
693            }
694            let dent = itry!(DirEntry::from_path(0, start, false));
695            if let Some(result) = self.handle_entry(dent) {
696                return Some(result);
697            }
698        }
699        while !self.stack_list.is_empty() {
700            self.depth = self.stack_list.len();
701            if let Some(dentry) = self.get_deferred_dir() {
702                return Some(Ok(dentry));
703            }
704            if self.depth > self.opts.max_depth {
705                // If we've exceeded the max depth, pop the current dir
706                // so that we don't descend.
707                self.pop();
708                continue;
709            }
710            // Unwrap is safe here because we've verified above that
711            // `self.stack_list` is not empty
712            let next = self
713                .stack_list
714                .last_mut()
715                .expect("BUG: stack should be non-empty")
716                .next();
717            match next {
718                None => self.pop(),
719                Some(Err(err)) => return Some(Err(err)),
720                Some(Ok(dent)) => {
721                    if let Some(result) = self.handle_entry(dent) {
722                        return Some(result);
723                    }
724                }
725            }
726        }
727        if self.opts.contents_first {
728            self.depth = self.stack_list.len();
729            if let Some(dentry) = self.get_deferred_dir() {
730                return Some(Ok(dentry));
731            }
732        }
733        None
734    }
735}
736
737impl IntoIter {
738    /// Skips the current directory.
739    ///
740    /// This causes the iterator to stop traversing the contents of the least
741    /// recently yielded directory. This means any remaining entries in that
742    /// directory will be skipped (including sub-directories).
743    ///
744    /// Note that the ergonomics of this method are questionable since it
745    /// borrows the iterator mutably. Namely, you must write out the looping
746    /// condition manually. For example, to skip hidden entries efficiently on
747    /// unix systems:
748    ///
749    /// ```no_run
750    /// use walkdir::{DirEntry, WalkDir};
751    ///
752    /// fn is_hidden(entry: &DirEntry) -> bool {
753    ///     entry.file_name()
754    ///          .to_str()
755    ///          .map(|s| s.starts_with("."))
756    ///          .unwrap_or(false)
757    /// }
758    ///
759    /// let mut it = WalkDir::new("foo").into_iter();
760    /// loop {
761    ///     let entry = match it.next() {
762    ///         None => break,
763    ///         Some(Err(err)) => panic!("ERROR: {}", err),
764    ///         Some(Ok(entry)) => entry,
765    ///     };
766    ///     if is_hidden(&entry) {
767    ///         if entry.file_type().is_dir() {
768    ///             it.skip_current_dir();
769    ///         }
770    ///         continue;
771    ///     }
772    ///     println!("{}", entry.path().display());
773    /// }
774    /// ```
775    ///
776    /// You may find it more convenient to use the [`filter_entry`] iterator
777    /// adapter. (See its documentation for the same example functionality as
778    /// above.)
779    ///
780    /// [`filter_entry`]: #method.filter_entry
781    pub fn skip_current_dir(&mut self) {
782        if !self.stack_list.is_empty() {
783            self.pop();
784        }
785    }
786
787    /// Yields only entries which satisfy the given predicate and skips
788    /// descending into directories that do not satisfy the given predicate.
789    ///
790    /// The predicate is applied to all entries. If the predicate is
791    /// true, iteration carries on as normal. If the predicate is false, the
792    /// entry is ignored and if it is a directory, it is not descended into.
793    ///
794    /// This is often more convenient to use than [`skip_current_dir`]. For
795    /// example, to skip hidden files and directories efficiently on unix
796    /// systems:
797    ///
798    /// ```no_run
799    /// use walkdir::{DirEntry, WalkDir};
800    /// # use walkdir::Error;
801    ///
802    /// fn is_hidden(entry: &DirEntry) -> bool {
803    ///     entry.file_name()
804    ///          .to_str()
805    ///          .map(|s| s.starts_with("."))
806    ///          .unwrap_or(false)
807    /// }
808    ///
809    /// # fn try_main() -> Result<(), Error> {
810    /// for entry in WalkDir::new("foo")
811    ///                      .into_iter()
812    ///                      .filter_entry(|e| !is_hidden(e)) {
813    ///     println!("{}", entry?.path().display());
814    /// }
815    /// # Ok(())
816    /// # }
817    /// ```
818    ///
819    /// Note that the iterator will still yield errors for reading entries that
820    /// may not satisfy the predicate.
821    ///
822    /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
823    /// passed to this predicate.
824    ///
825    /// Note that if the iterator has `contents_first` enabled, then this
826    /// method is no different than calling the standard `Iterator::filter`
827    /// method (because directory entries are yielded after they've been
828    /// descended into).
829    ///
830    /// [`skip_current_dir`]: #method.skip_current_dir
831    /// [`min_depth`]: struct.WalkDir.html#method.min_depth
832    /// [`max_depth`]: struct.WalkDir.html#method.max_depth
833    pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P>
834    where
835        P: FnMut(&DirEntry) -> bool,
836    {
837        FilterEntry { it: self, predicate }
838    }
839
840    fn handle_entry(
841        &mut self,
842        mut dent: DirEntry,
843    ) -> Option<Result<DirEntry>> {
844        if self.opts.follow_links && dent.file_type().is_symlink() {
845            dent = itry!(self.follow(dent));
846        }
847        let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir();
848        if is_normal_dir {
849            if self.opts.same_file_system && dent.depth() > 0 {
850                if itry!(self.is_same_file_system(&dent)) {
851                    itry!(self.push(&dent));
852                }
853            } else {
854                itry!(self.push(&dent));
855            }
856        } else if dent.depth() == 0
857            && dent.file_type().is_symlink()
858            && self.opts.follow_root_links
859        {
860            // As a special case, if we are processing a root entry, then we
861            // always follow it even if it's a symlink and follow_links is
862            // false. We are careful to not let this change the semantics of
863            // the DirEntry however. Namely, the DirEntry should still respect
864            // the follow_links setting. When it's disabled, it should report
865            // itself as a symlink. When it's enabled, it should always report
866            // itself as the target.
867            let md = itry!(fs::metadata(dent.path()).map_err(|err| {
868                Error::from_path(dent.depth(), dent.path().to_path_buf(), err)
869            }));
870            if md.file_type().is_dir() {
871                itry!(self.push(&dent));
872            }
873        }
874        if is_normal_dir && self.opts.contents_first {
875            self.deferred_dirs.push(dent);
876            None
877        } else if self.skippable() {
878            None
879        } else {
880            Some(Ok(dent))
881        }
882    }
883
884    fn get_deferred_dir(&mut self) -> Option<DirEntry> {
885        if self.opts.contents_first {
886            if self.depth < self.deferred_dirs.len() {
887                // Unwrap is safe here because we've guaranteed that
888                // `self.deferred_dirs.len()` can never be less than 1
889                let deferred: DirEntry = self
890                    .deferred_dirs
891                    .pop()
892                    .expect("BUG: deferred_dirs should be non-empty");
893                if !self.skippable() {
894                    return Some(deferred);
895                }
896            }
897        }
898        None
899    }
900
901    fn push(&mut self, dent: &DirEntry) -> Result<()> {
902        // Make room for another open file descriptor if we've hit the max.
903        let free =
904            self.stack_list.len().checked_sub(self.oldest_opened).unwrap();
905        if free == self.opts.max_open {
906            self.stack_list[self.oldest_opened].close();
907        }
908        // Open a handle to reading the directory's entries.
909        let rd = fs::read_dir(dent.path()).map_err(|err| {
910            Some(Error::from_path(self.depth, dent.path().to_path_buf(), err))
911        });
912        let mut list = DirList::Opened { depth: self.depth, it: rd };
913        if let Some(ref mut cmp) = self.opts.sorter {
914            let mut entries: Vec<_> = list.collect();
915            entries.sort_by(|a, b| match (a, b) {
916                (&Ok(ref a), &Ok(ref b)) => cmp(a, b),
917                (&Err(_), &Err(_)) => Ordering::Equal,
918                (&Ok(_), &Err(_)) => Ordering::Greater,
919                (&Err(_), &Ok(_)) => Ordering::Less,
920            });
921            list = DirList::Closed(entries.into_iter());
922        }
923        if self.opts.follow_links {
924            let ancestor = Ancestor::new(&dent)
925                .map_err(|err| Error::from_io(self.depth, err))?;
926            self.stack_path.push(ancestor);
927        }
928        // We push this after stack_path since creating the Ancestor can fail.
929        // If it fails, then we return the error and won't descend.
930        self.stack_list.push(list);
931        // If we had to close out a previous directory stream, then we need to
932        // increment our index the oldest still-open stream. We do this only
933        // after adding to our stack, in order to ensure that the oldest_opened
934        // index remains valid. The worst that can happen is that an already
935        // closed stream will be closed again, which is a no-op.
936        //
937        // We could move the close of the stream above into this if-body, but
938        // then we would have more than the maximum number of file descriptors
939        // open at a particular point in time.
940        if free == self.opts.max_open {
941            // Unwrap is safe here because self.oldest_opened is guaranteed to
942            // never be greater than `self.stack_list.len()`, which implies
943            // that the subtraction won't underflow and that adding 1 will
944            // never overflow.
945            self.oldest_opened = self.oldest_opened.checked_add(1).unwrap();
946        }
947        Ok(())
948    }
949
950    fn pop(&mut self) {
951        self.stack_list.pop().expect("BUG: cannot pop from empty stack");
952        if self.opts.follow_links {
953            self.stack_path.pop().expect("BUG: list/path stacks out of sync");
954        }
955        // If everything in the stack is already closed, then there is
956        // room for at least one more open descriptor and it will
957        // always be at the top of the stack.
958        self.oldest_opened = min(self.oldest_opened, self.stack_list.len());
959    }
960
961    fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> {
962        dent =
963            DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?;
964        // The only way a symlink can cause a loop is if it points
965        // to a directory. Otherwise, it always points to a leaf
966        // and we can omit any loop checks.
967        if dent.is_dir() {
968            self.check_loop(dent.path())?;
969        }
970        Ok(dent)
971    }
972
973    fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> {
974        let hchild = Handle::from_path(&child)
975            .map_err(|err| Error::from_io(self.depth, err))?;
976        for ancestor in self.stack_path.iter().rev() {
977            let is_same = ancestor
978                .is_same(&hchild)
979                .map_err(|err| Error::from_io(self.depth, err))?;
980            if is_same {
981                return Err(Error::from_loop(
982                    self.depth,
983                    &ancestor.path,
984                    child.as_ref(),
985                ));
986            }
987        }
988        Ok(())
989    }
990
991    fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> {
992        let dent_device = util::device_num(dent.path())
993            .map_err(|err| Error::from_entry(dent, err))?;
994        Ok(self
995            .root_device
996            .map(|d| d == dent_device)
997            .expect("BUG: called is_same_file_system without root device"))
998    }
999
1000    fn skippable(&self) -> bool {
1001        self.depth < self.opts.min_depth || self.depth > self.opts.max_depth
1002    }
1003}
1004
1005impl iter::FusedIterator for IntoIter {}
1006
1007impl DirList {
1008    fn close(&mut self) {
1009        if let DirList::Opened { .. } = *self {
1010            *self = DirList::Closed(self.collect::<Vec<_>>().into_iter());
1011        }
1012    }
1013}
1014
1015impl Iterator for DirList {
1016    type Item = Result<DirEntry>;
1017
1018    #[inline(always)]
1019    fn next(&mut self) -> Option<Result<DirEntry>> {
1020        match *self {
1021            DirList::Closed(ref mut it) => it.next(),
1022            DirList::Opened { depth, ref mut it } => match *it {
1023                Err(ref mut err) => err.take().map(Err),
1024                Ok(ref mut rd) => rd.next().map(|r| match r {
1025                    Ok(r) => DirEntry::from_entry(depth + 1, &r),
1026                    Err(err) => Err(Error::from_io(depth + 1, err)),
1027                }),
1028            },
1029        }
1030    }
1031}
1032
1033/// A recursive directory iterator that skips entries.
1034///
1035/// Values of this type are created by calling [`.filter_entry()`] on an
1036/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`.
1037///
1038/// Directories that fail the predicate `P` are skipped. Namely, they are
1039/// never yielded and never descended into.
1040///
1041/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options
1042/// are not passed through this filter.
1043///
1044/// If opening a handle to a directory resulted in an error, then it is yielded
1045/// and no corresponding call to the predicate is made.
1046///
1047/// Type parameter `I` refers to the underlying iterator and `P` refers to the
1048/// predicate, which is usually `FnMut(&DirEntry) -> bool`.
1049///
1050/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry
1051/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
1052/// [`min_depth`]: struct.WalkDir.html#method.min_depth
1053/// [`max_depth`]: struct.WalkDir.html#method.max_depth
1054#[derive(Debug)]
1055pub struct FilterEntry<I, P> {
1056    it: I,
1057    predicate: P,
1058}
1059
1060impl<P> Iterator for FilterEntry<IntoIter, P>
1061where
1062    P: FnMut(&DirEntry) -> bool,
1063{
1064    type Item = Result<DirEntry>;
1065
1066    /// Advances the iterator and returns the next value.
1067    ///
1068    /// # Errors
1069    ///
1070    /// If the iterator fails to retrieve the next value, this method returns
1071    /// an error value. The error will be wrapped in an `Option::Some`.
1072    fn next(&mut self) -> Option<Result<DirEntry>> {
1073        loop {
1074            let dent = match self.it.next() {
1075                None => return None,
1076                Some(result) => itry!(result),
1077            };
1078            if !(self.predicate)(&dent) {
1079                if dent.is_dir() {
1080                    self.it.skip_current_dir();
1081                }
1082                continue;
1083            }
1084            return Some(Ok(dent));
1085        }
1086    }
1087}
1088
1089impl<P> iter::FusedIterator for FilterEntry<IntoIter, P> where
1090    P: FnMut(&DirEntry) -> bool
1091{
1092}
1093
1094impl<P> FilterEntry<IntoIter, P>
1095where
1096    P: FnMut(&DirEntry) -> bool,
1097{
1098    /// Yields only entries which satisfy the given predicate and skips
1099    /// descending into directories that do not satisfy the given predicate.
1100    ///
1101    /// The predicate is applied to all entries. If the predicate is
1102    /// true, iteration carries on as normal. If the predicate is false, the
1103    /// entry is ignored and if it is a directory, it is not descended into.
1104    ///
1105    /// This is often more convenient to use than [`skip_current_dir`]. For
1106    /// example, to skip hidden files and directories efficiently on unix
1107    /// systems:
1108    ///
1109    /// ```no_run
1110    /// use walkdir::{DirEntry, WalkDir};
1111    /// # use walkdir::Error;
1112    ///
1113    /// fn is_hidden(entry: &DirEntry) -> bool {
1114    ///     entry.file_name()
1115    ///          .to_str()
1116    ///          .map(|s| s.starts_with("."))
1117    ///          .unwrap_or(false)
1118    /// }
1119    ///
1120    /// # fn try_main() -> Result<(), Error> {
1121    /// for entry in WalkDir::new("foo")
1122    ///                      .into_iter()
1123    ///                      .filter_entry(|e| !is_hidden(e)) {
1124    ///     println!("{}", entry?.path().display());
1125    /// }
1126    /// # Ok(())
1127    /// # }
1128    /// ```
1129    ///
1130    /// Note that the iterator will still yield errors for reading entries that
1131    /// may not satisfy the predicate.
1132    ///
1133    /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
1134    /// passed to this predicate.
1135    ///
1136    /// Note that if the iterator has `contents_first` enabled, then this
1137    /// method is no different than calling the standard `Iterator::filter`
1138    /// method (because directory entries are yielded after they've been
1139    /// descended into).
1140    ///
1141    /// [`skip_current_dir`]: #method.skip_current_dir
1142    /// [`min_depth`]: struct.WalkDir.html#method.min_depth
1143    /// [`max_depth`]: struct.WalkDir.html#method.max_depth
1144    pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> {
1145        FilterEntry { it: self, predicate }
1146    }
1147
1148    /// Skips the current directory.
1149    ///
1150    /// This causes the iterator to stop traversing the contents of the least
1151    /// recently yielded directory. This means any remaining entries in that
1152    /// directory will be skipped (including sub-directories).
1153    ///
1154    /// Note that the ergonomics of this method are questionable since it
1155    /// borrows the iterator mutably. Namely, you must write out the looping
1156    /// condition manually. For example, to skip hidden entries efficiently on
1157    /// unix systems:
1158    ///
1159    /// ```no_run
1160    /// use walkdir::{DirEntry, WalkDir};
1161    ///
1162    /// fn is_hidden(entry: &DirEntry) -> bool {
1163    ///     entry.file_name()
1164    ///          .to_str()
1165    ///          .map(|s| s.starts_with("."))
1166    ///          .unwrap_or(false)
1167    /// }
1168    ///
1169    /// let mut it = WalkDir::new("foo").into_iter();
1170    /// loop {
1171    ///     let entry = match it.next() {
1172    ///         None => break,
1173    ///         Some(Err(err)) => panic!("ERROR: {}", err),
1174    ///         Some(Ok(entry)) => entry,
1175    ///     };
1176    ///     if is_hidden(&entry) {
1177    ///         if entry.file_type().is_dir() {
1178    ///             it.skip_current_dir();
1179    ///         }
1180    ///         continue;
1181    ///     }
1182    ///     println!("{}", entry.path().display());
1183    /// }
1184    /// ```
1185    ///
1186    /// You may find it more convenient to use the [`filter_entry`] iterator
1187    /// adapter. (See its documentation for the same example functionality as
1188    /// above.)
1189    ///
1190    /// [`filter_entry`]: #method.filter_entry
1191    pub fn skip_current_dir(&mut self) {
1192        self.it.skip_current_dir();
1193    }
1194}