flate2/gz/
bufread.rs

1use std::cmp;
2use std::io;
3use std::io::prelude::*;
4use std::mem;
5
6use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
7use crate::crc::CrcReader;
8use crate::deflate;
9use crate::Compression;
10
11fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
12    let min = cmp::min(into.len(), from.len() - *pos);
13    into[..min].copy_from_slice(&from[*pos..*pos + min]);
14    *pos += min;
15    min
16}
17
18/// A gzip streaming encoder
19///
20/// This structure implements a [`Read`] interface. When read from, it reads
21/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
22///
23/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
24/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
25///
26/// # Examples
27///
28/// ```
29/// use std::io::prelude::*;
30/// use std::io;
31/// use flate2::Compression;
32/// use flate2::bufread::GzEncoder;
33/// use std::fs::File;
34/// use std::io::BufReader;
35///
36/// // Opens sample file, compresses the contents and returns a Vector or error
37/// // File wrapped in a BufReader implements BufRead
38///
39/// fn open_hello_world() -> io::Result<Vec<u8>> {
40///     let f = File::open("examples/hello_world.txt")?;
41///     let b = BufReader::new(f);
42///     let mut gz = GzEncoder::new(b, Compression::fast());
43///     let mut buffer = Vec::new();
44///     gz.read_to_end(&mut buffer)?;
45///     Ok(buffer)
46/// }
47/// ```
48#[derive(Debug)]
49pub struct GzEncoder<R> {
50    inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
51    header: Vec<u8>,
52    pos: usize,
53    eof: bool,
54}
55
56pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
57    let crc = CrcReader::new(r);
58    GzEncoder {
59        inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
60        header,
61        pos: 0,
62        eof: false,
63    }
64}
65
66impl<R: BufRead> GzEncoder<R> {
67    /// Creates a new encoder which will use the given compression level.
68    ///
69    /// The encoder is not configured specially for the emitted header. For
70    /// header configuration, see the `GzBuilder` type.
71    ///
72    /// The data read from the stream `r` will be compressed and available
73    /// through the returned reader.
74    pub fn new(r: R, level: Compression) -> GzEncoder<R> {
75        GzBuilder::new().buf_read(r, level)
76    }
77
78    fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
79        if self.pos == 8 {
80            return Ok(0);
81        }
82        let crc = self.inner.get_ref().crc();
83        let calced_crc_bytes = crc.sum().to_le_bytes();
84        let arr = [
85            calced_crc_bytes[0],
86            calced_crc_bytes[1],
87            calced_crc_bytes[2],
88            calced_crc_bytes[3],
89            (crc.amount() >> 0) as u8,
90            (crc.amount() >> 8) as u8,
91            (crc.amount() >> 16) as u8,
92            (crc.amount() >> 24) as u8,
93        ];
94        Ok(copy(into, &arr, &mut self.pos))
95    }
96}
97
98impl<R> GzEncoder<R> {
99    /// Acquires a reference to the underlying reader.
100    pub fn get_ref(&self) -> &R {
101        self.inner.get_ref().get_ref()
102    }
103
104    /// Acquires a mutable reference to the underlying reader.
105    ///
106    /// Note that mutation of the reader may result in surprising results if
107    /// this encoder is continued to be used.
108    pub fn get_mut(&mut self) -> &mut R {
109        self.inner.get_mut().get_mut()
110    }
111
112    /// Returns the underlying stream, consuming this encoder
113    pub fn into_inner(self) -> R {
114        self.inner.into_inner().into_inner()
115    }
116}
117
118#[inline]
119fn finish(buf: &[u8; 8]) -> (u32, u32) {
120    let crc = ((buf[0] as u32) << 0)
121        | ((buf[1] as u32) << 8)
122        | ((buf[2] as u32) << 16)
123        | ((buf[3] as u32) << 24);
124    let amt = ((buf[4] as u32) << 0)
125        | ((buf[5] as u32) << 8)
126        | ((buf[6] as u32) << 16)
127        | ((buf[7] as u32) << 24);
128    (crc, amt)
129}
130
131impl<R: BufRead> Read for GzEncoder<R> {
132    fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
133        let mut amt = 0;
134        if self.eof {
135            return self.read_footer(into);
136        } else if self.pos < self.header.len() {
137            amt += copy(into, &self.header, &mut self.pos);
138            if amt == into.len() {
139                return Ok(amt);
140            }
141            let tmp = into;
142            into = &mut tmp[amt..];
143        }
144        match self.inner.read(into)? {
145            0 => {
146                self.eof = true;
147                self.pos = 0;
148                self.read_footer(into)
149            }
150            n => Ok(amt + n),
151        }
152    }
153}
154
155impl<R: BufRead + Write> Write for GzEncoder<R> {
156    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
157        self.get_mut().write(buf)
158    }
159
160    fn flush(&mut self) -> io::Result<()> {
161        self.get_mut().flush()
162    }
163}
164
165/// A decoder for a single member of a [gzip file].
166///
167/// This structure implements a [`Read`] interface. When read from, it reads
168/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
169///
170/// After reading a single member of the gzip data this reader will return
171/// Ok(0) even if there are more bytes available in the underlying reader.
172/// If you need the following bytes, call `into_inner()` after Ok(0) to
173/// recover the underlying reader.
174///
175/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
176/// or read more
177/// [in the introduction](../index.html#about-multi-member-gzip-files).
178///
179/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
180/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
181/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
182///
183/// # Examples
184///
185/// ```
186/// use std::io::prelude::*;
187/// use std::io;
188/// # use flate2::Compression;
189/// # use flate2::write::GzEncoder;
190/// use flate2::bufread::GzDecoder;
191///
192/// # fn main() {
193/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
194/// #   e.write_all(b"Hello World").unwrap();
195/// #   let bytes = e.finish().unwrap();
196/// #   println!("{}", decode_reader(bytes).unwrap());
197/// # }
198/// #
199/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
200/// // Here &[u8] implements BufRead
201///
202/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
203///    let mut gz = GzDecoder::new(&bytes[..]);
204///    let mut s = String::new();
205///    gz.read_to_string(&mut s)?;
206///    Ok(s)
207/// }
208/// ```
209#[derive(Debug)]
210pub struct GzDecoder<R> {
211    state: GzState,
212    reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
213    multi: bool,
214}
215
216#[derive(Debug)]
217enum GzState {
218    Header(GzHeaderParser),
219    Body(GzHeader),
220    Finished(GzHeader, usize, [u8; 8]),
221    Err(io::Error),
222    End(Option<GzHeader>),
223}
224
225impl<R: BufRead> GzDecoder<R> {
226    /// Creates a new decoder from the given reader, immediately parsing the
227    /// gzip header.
228    pub fn new(mut r: R) -> GzDecoder<R> {
229        let mut header_parser = GzHeaderParser::new();
230
231        let state = match header_parser.parse(&mut r) {
232            Ok(_) => GzState::Body(GzHeader::from(header_parser)),
233            Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
234                GzState::Header(header_parser)
235            }
236            Err(err) => GzState::Err(err),
237        };
238
239        GzDecoder {
240            state,
241            reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
242            multi: false,
243        }
244    }
245
246    fn multi(mut self, flag: bool) -> GzDecoder<R> {
247        self.multi = flag;
248        self
249    }
250}
251
252impl<R> GzDecoder<R> {
253    /// Returns the header associated with this stream, if it was valid
254    pub fn header(&self) -> Option<&GzHeader> {
255        match &self.state {
256            GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
257            GzState::End(header) => header.as_ref(),
258            _ => None,
259        }
260    }
261
262    /// Acquires a reference to the underlying reader.
263    pub fn get_ref(&self) -> &R {
264        self.reader.get_ref().get_ref()
265    }
266
267    /// Acquires a mutable reference to the underlying stream.
268    ///
269    /// Note that mutation of the stream may result in surprising results if
270    /// this decoder is continued to be used.
271    pub fn get_mut(&mut self) -> &mut R {
272        self.reader.get_mut().get_mut()
273    }
274
275    /// Consumes this decoder, returning the underlying reader.
276    pub fn into_inner(self) -> R {
277        self.reader.into_inner().into_inner()
278    }
279}
280
281impl<R: BufRead> Read for GzDecoder<R> {
282    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
283        loop {
284            match &mut self.state {
285                GzState::Header(parser) => {
286                    parser.parse(self.reader.get_mut().get_mut())?;
287                    self.state = GzState::Body(GzHeader::from(mem::take(parser)));
288                }
289                GzState::Body(header) => {
290                    if into.is_empty() {
291                        return Ok(0);
292                    }
293                    match self.reader.read(into)? {
294                        0 => {
295                            self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
296                        }
297                        n => {
298                            return Ok(n);
299                        }
300                    }
301                }
302                GzState::Finished(header, pos, buf) => {
303                    if *pos < buf.len() {
304                        *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
305                    } else {
306                        let (crc, amt) = finish(&buf);
307
308                        if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
309                            self.state = GzState::End(Some(mem::take(header)));
310                            return Err(corrupt());
311                        } else if self.multi {
312                            let is_eof = self
313                                .reader
314                                .get_mut()
315                                .get_mut()
316                                .fill_buf()
317                                .map(|buf| buf.is_empty())?;
318
319                            if is_eof {
320                                self.state = GzState::End(Some(mem::take(header)));
321                            } else {
322                                self.reader.reset();
323                                self.reader.get_mut().reset_data();
324                                self.state = GzState::Header(GzHeaderParser::new())
325                            }
326                        } else {
327                            self.state = GzState::End(Some(mem::take(header)));
328                        }
329                    }
330                }
331                GzState::Err(err) => {
332                    let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
333                    self.state = GzState::End(None);
334                    return result;
335                }
336                GzState::End(_) => return Ok(0),
337            }
338        }
339    }
340}
341
342impl<R: BufRead + Write> Write for GzDecoder<R> {
343    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
344        self.get_mut().write(buf)
345    }
346
347    fn flush(&mut self) -> io::Result<()> {
348        self.get_mut().flush()
349    }
350}
351
352/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
353///
354/// This structure implements a [`Read`] interface. When read from, it reads
355/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
356///
357/// A gzip file consists of a series of *members* concatenated one after another.
358/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
359/// underlying reader does. For a file, this reads to the end of the file.
360///
361/// To handle members separately, see [GzDecoder] or read more
362/// [in the introduction](../index.html#about-multi-member-gzip-files).
363///
364/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
365/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
366/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
367///
368/// # Examples
369///
370/// ```
371/// use std::io::prelude::*;
372/// use std::io;
373/// # use flate2::Compression;
374/// # use flate2::write::GzEncoder;
375/// use flate2::bufread::MultiGzDecoder;
376///
377/// # fn main() {
378/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
379/// #   e.write_all(b"Hello World").unwrap();
380/// #   let bytes = e.finish().unwrap();
381/// #   println!("{}", decode_reader(bytes).unwrap());
382/// # }
383/// #
384/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
385/// // Here &[u8] implements BufRead
386///
387/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
388///    let mut gz = MultiGzDecoder::new(&bytes[..]);
389///    let mut s = String::new();
390///    gz.read_to_string(&mut s)?;
391///    Ok(s)
392/// }
393/// ```
394#[derive(Debug)]
395pub struct MultiGzDecoder<R>(GzDecoder<R>);
396
397impl<R: BufRead> MultiGzDecoder<R> {
398    /// Creates a new decoder from the given reader, immediately parsing the
399    /// (first) gzip header. If the gzip stream contains multiple members all will
400    /// be decoded.
401    pub fn new(r: R) -> MultiGzDecoder<R> {
402        MultiGzDecoder(GzDecoder::new(r).multi(true))
403    }
404}
405
406impl<R> MultiGzDecoder<R> {
407    /// Returns the current header associated with this stream, if it's valid
408    pub fn header(&self) -> Option<&GzHeader> {
409        self.0.header()
410    }
411
412    /// Acquires a reference to the underlying reader.
413    pub fn get_ref(&self) -> &R {
414        self.0.get_ref()
415    }
416
417    /// Acquires a mutable reference to the underlying stream.
418    ///
419    /// Note that mutation of the stream may result in surprising results if
420    /// this decoder is continued to be used.
421    pub fn get_mut(&mut self) -> &mut R {
422        self.0.get_mut()
423    }
424
425    /// Consumes this decoder, returning the underlying reader.
426    pub fn into_inner(self) -> R {
427        self.0.into_inner()
428    }
429}
430
431impl<R: BufRead> Read for MultiGzDecoder<R> {
432    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
433        self.0.read(into)
434    }
435}
436
437#[cfg(test)]
438mod test {
439    use crate::bufread::GzDecoder;
440    use crate::gz::write;
441    use crate::Compression;
442    use std::io::{Read, Write};
443
444    // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
445    // additional data to be consumed by the caller.
446    #[test]
447    fn decode_extra_data() {
448        let expected = "Hello World";
449
450        let compressed = {
451            let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
452            e.write(expected.as_ref()).unwrap();
453            let mut b = e.finish().unwrap();
454            b.push(b'x');
455            b
456        };
457
458        let mut output = Vec::new();
459        let mut decoder = GzDecoder::new(compressed.as_slice());
460        let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
461        assert_eq!(decoded_bytes, output.len());
462        let actual = std::str::from_utf8(&output).expect("String parsing error");
463        assert_eq!(
464            actual, expected,
465            "after decompression we obtain the original input"
466        );
467
468        output.clear();
469        assert_eq!(
470            decoder.read(&mut output).unwrap(),
471            0,
472            "subsequent read of decoder returns 0, but inner reader can return additional data"
473        );
474        let mut reader = decoder.into_inner();
475        assert_eq!(
476            reader.read_to_end(&mut output).unwrap(),
477            1,
478            "extra data is accessible in underlying buf-read"
479        );
480        assert_eq!(output, b"x");
481    }
482}