flate2/gz/
write.rs

1use std::cmp;
2use std::io;
3use std::io::prelude::*;
4
5use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser};
6use crate::crc::{Crc, CrcWriter};
7use crate::zio;
8use crate::{Compress, Compression, Decompress, Status};
9
10/// A gzip streaming encoder
11///
12/// This structure exposes a [`Write`] interface that will emit compressed data
13/// to the underlying writer `W`.
14///
15/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
16///
17/// # Examples
18///
19/// ```
20/// use std::io::prelude::*;
21/// use flate2::Compression;
22/// use flate2::write::GzEncoder;
23///
24/// // Vec<u8> implements Write to print the compressed bytes of sample string
25/// # fn main() {
26///
27/// let mut e = GzEncoder::new(Vec::new(), Compression::default());
28/// e.write_all(b"Hello World").unwrap();
29/// println!("{:?}", e.finish().unwrap());
30/// # }
31/// ```
32#[derive(Debug)]
33pub struct GzEncoder<W: Write> {
34    inner: zio::Writer<W, Compress>,
35    crc: Crc,
36    crc_bytes_written: usize,
37    header: Vec<u8>,
38}
39
40pub fn gz_encoder<W: Write>(header: Vec<u8>, w: W, lvl: Compression) -> GzEncoder<W> {
41    GzEncoder {
42        inner: zio::Writer::new(w, Compress::new(lvl, false)),
43        crc: Crc::new(),
44        header,
45        crc_bytes_written: 0,
46    }
47}
48
49impl<W: Write> GzEncoder<W> {
50    /// Creates a new encoder which will use the given compression level.
51    ///
52    /// The encoder is not configured specially for the emitted header. For
53    /// header configuration, see the `GzBuilder` type.
54    ///
55    /// The data written to the returned encoder will be compressed and then
56    /// written to the stream `w`.
57    pub fn new(w: W, level: Compression) -> GzEncoder<W> {
58        GzBuilder::new().write(w, level)
59    }
60
61    /// Acquires a reference to the underlying writer.
62    pub fn get_ref(&self) -> &W {
63        self.inner.get_ref()
64    }
65
66    /// Acquires a mutable reference to the underlying writer.
67    ///
68    /// Note that mutation of the writer may result in surprising results if
69    /// this encoder is continued to be used.
70    pub fn get_mut(&mut self) -> &mut W {
71        self.inner.get_mut()
72    }
73
74    /// Attempt to finish this output stream, writing out final chunks of data.
75    ///
76    /// Note that this function can only be used once data has finished being
77    /// written to the output stream. After this function is called then further
78    /// calls to `write` may result in a panic.
79    ///
80    /// # Panics
81    ///
82    /// Attempts to write data to this stream may result in a panic after this
83    /// function is called.
84    ///
85    /// # Errors
86    ///
87    /// This function will perform I/O to complete this stream, and any I/O
88    /// errors which occur will be returned from this function.
89    pub fn try_finish(&mut self) -> io::Result<()> {
90        self.write_header()?;
91        self.inner.finish()?;
92
93        while self.crc_bytes_written < 8 {
94            let (sum, amt) = (self.crc.sum(), self.crc.amount());
95            let buf = [
96                (sum >> 0) as u8,
97                (sum >> 8) as u8,
98                (sum >> 16) as u8,
99                (sum >> 24) as u8,
100                (amt >> 0) as u8,
101                (amt >> 8) as u8,
102                (amt >> 16) as u8,
103                (amt >> 24) as u8,
104            ];
105            let inner = self.inner.get_mut();
106            let n = inner.write(&buf[self.crc_bytes_written..])?;
107            self.crc_bytes_written += n;
108        }
109        Ok(())
110    }
111
112    /// Finish encoding this stream, returning the underlying writer once the
113    /// encoding is done.
114    ///
115    /// Note that this function may not be suitable to call in a situation where
116    /// the underlying stream is an asynchronous I/O stream. To finish a stream
117    /// the `try_finish` (or `shutdown`) method should be used instead. To
118    /// re-acquire ownership of a stream it is safe to call this method after
119    /// `try_finish` or `shutdown` has returned `Ok`.
120    ///
121    /// # Errors
122    ///
123    /// This function will perform I/O to complete this stream, and any I/O
124    /// errors which occur will be returned from this function.
125    pub fn finish(mut self) -> io::Result<W> {
126        self.try_finish()?;
127        Ok(self.inner.take_inner())
128    }
129
130    fn write_header(&mut self) -> io::Result<()> {
131        while !self.header.is_empty() {
132            let n = self.inner.get_mut().write(&self.header)?;
133            self.header.drain(..n);
134        }
135        Ok(())
136    }
137}
138
139impl<W: Write> Write for GzEncoder<W> {
140    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
141        assert_eq!(self.crc_bytes_written, 0);
142        self.write_header()?;
143        let n = self.inner.write(buf)?;
144        self.crc.update(&buf[..n]);
145        Ok(n)
146    }
147
148    fn flush(&mut self) -> io::Result<()> {
149        assert_eq!(self.crc_bytes_written, 0);
150        self.write_header()?;
151        self.inner.flush()
152    }
153}
154
155impl<R: Read + Write> Read for GzEncoder<R> {
156    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
157        self.get_mut().read(buf)
158    }
159}
160
161impl<W: Write> Drop for GzEncoder<W> {
162    fn drop(&mut self) {
163        if self.inner.is_present() {
164            let _ = self.try_finish();
165        }
166    }
167}
168
169/// A decoder for a single member of a [gzip file].
170///
171/// This structure exposes a [`Write`] interface, receiving compressed data and
172/// writing uncompressed data to the underlying writer.
173///
174/// After decoding a single member of the gzip data this writer will return the number of bytes up to
175/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to
176/// handle any data following the gzip member.
177///
178/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
179/// or read more
180/// [in the introduction](../index.html#about-multi-member-gzip-files).
181///
182/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
183/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
184///
185/// # Examples
186///
187/// ```
188/// use std::io::prelude::*;
189/// use std::io;
190/// use flate2::Compression;
191/// use flate2::write::{GzEncoder, GzDecoder};
192///
193/// # fn main() {
194/// #    let mut e = GzEncoder::new(Vec::new(), Compression::default());
195/// #    e.write(b"Hello World").unwrap();
196/// #    let bytes = e.finish().unwrap();
197/// #    assert_eq!("Hello World", decode_writer(bytes).unwrap());
198/// # }
199/// // Uncompresses a gzip encoded vector of bytes and returns a string or error
200/// // Here Vec<u8> implements Write
201/// fn decode_writer(bytes: Vec<u8>) -> io::Result<String> {
202///    let mut writer = Vec::new();
203///    let mut decoder = GzDecoder::new(writer);
204///    decoder.write_all(&bytes[..])?;
205///    writer = decoder.finish()?;
206///    let return_string = String::from_utf8(writer).expect("String parsing error");
207///    Ok(return_string)
208/// }
209/// ```
210#[derive(Debug)]
211pub struct GzDecoder<W: Write> {
212    inner: zio::Writer<CrcWriter<W>, Decompress>,
213    crc_bytes: Vec<u8>,
214    header_parser: GzHeaderParser,
215}
216
217const CRC_BYTES_LEN: usize = 8;
218
219impl<W: Write> GzDecoder<W> {
220    /// Creates a new decoder which will write uncompressed data to the stream.
221    ///
222    /// When this encoder is dropped or unwrapped the final pieces of data will
223    /// be flushed.
224    pub fn new(w: W) -> GzDecoder<W> {
225        GzDecoder {
226            inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)),
227            crc_bytes: Vec::with_capacity(CRC_BYTES_LEN),
228            header_parser: GzHeaderParser::new(),
229        }
230    }
231
232    /// Returns the header associated with this stream.
233    pub fn header(&self) -> Option<&GzHeader> {
234        self.header_parser.header()
235    }
236
237    /// Acquires a reference to the underlying writer.
238    pub fn get_ref(&self) -> &W {
239        self.inner.get_ref().get_ref()
240    }
241
242    /// Acquires a mutable reference to the underlying writer.
243    ///
244    /// Note that mutating the output/input state of the stream may corrupt this
245    /// object, so care must be taken when using this method.
246    pub fn get_mut(&mut self) -> &mut W {
247        self.inner.get_mut().get_mut()
248    }
249
250    /// Attempt to finish this output stream, writing out final chunks of data.
251    ///
252    /// Note that this function can only be used once data has finished being
253    /// written to the output stream. After this function is called then further
254    /// calls to `write` may result in a panic.
255    ///
256    /// # Panics
257    ///
258    /// Attempts to write data to this stream may result in a panic after this
259    /// function is called.
260    ///
261    /// # Errors
262    ///
263    /// This function will perform I/O to finish the stream, returning any
264    /// errors which happen.
265    pub fn try_finish(&mut self) -> io::Result<()> {
266        self.finish_and_check_crc()?;
267        Ok(())
268    }
269
270    /// Consumes this decoder, flushing the output stream.
271    ///
272    /// This will flush the underlying data stream and then return the contained
273    /// writer if the flush succeeded.
274    ///
275    /// Note that this function may not be suitable to call in a situation where
276    /// the underlying stream is an asynchronous I/O stream. To finish a stream
277    /// the `try_finish` (or `shutdown`) method should be used instead. To
278    /// re-acquire ownership of a stream it is safe to call this method after
279    /// `try_finish` or `shutdown` has returned `Ok`.
280    ///
281    /// # Errors
282    ///
283    /// This function will perform I/O to complete this stream, and any I/O
284    /// errors which occur will be returned from this function.
285    pub fn finish(mut self) -> io::Result<W> {
286        self.finish_and_check_crc()?;
287        Ok(self.inner.take_inner().into_inner())
288    }
289
290    fn finish_and_check_crc(&mut self) -> io::Result<()> {
291        self.inner.finish()?;
292
293        if self.crc_bytes.len() != 8 {
294            return Err(corrupt());
295        }
296
297        let crc = ((self.crc_bytes[0] as u32) << 0)
298            | ((self.crc_bytes[1] as u32) << 8)
299            | ((self.crc_bytes[2] as u32) << 16)
300            | ((self.crc_bytes[3] as u32) << 24);
301        let amt = ((self.crc_bytes[4] as u32) << 0)
302            | ((self.crc_bytes[5] as u32) << 8)
303            | ((self.crc_bytes[6] as u32) << 16)
304            | ((self.crc_bytes[7] as u32) << 24);
305        if crc != self.inner.get_ref().crc().sum() {
306            return Err(corrupt());
307        }
308        if amt != self.inner.get_ref().crc().amount() {
309            return Err(corrupt());
310        }
311        Ok(())
312    }
313}
314
315impl<W: Write> Write for GzDecoder<W> {
316    fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
317        let buflen = buf.len();
318        if self.header().is_none() {
319            match self.header_parser.parse(&mut buf) {
320                Err(err) => {
321                    if err.kind() == io::ErrorKind::UnexpectedEof {
322                        // all data read but header still not complete
323                        Ok(buflen)
324                    } else {
325                        Err(err)
326                    }
327                }
328                Ok(_) => {
329                    debug_assert!(self.header().is_some());
330                    // buf now contains the unread part of the original buf
331                    let n = buflen - buf.len();
332                    Ok(n)
333                }
334            }
335        } else {
336            let (n, status) = self.inner.write_with_status(buf)?;
337
338            if status == Status::StreamEnd && n < buf.len() && self.crc_bytes.len() < 8 {
339                let remaining = buf.len() - n;
340                let crc_bytes = cmp::min(remaining, CRC_BYTES_LEN - self.crc_bytes.len());
341                self.crc_bytes.extend(&buf[n..n + crc_bytes]);
342                return Ok(n + crc_bytes);
343            }
344            Ok(n)
345        }
346    }
347
348    fn flush(&mut self) -> io::Result<()> {
349        self.inner.flush()
350    }
351}
352
353impl<W: Read + Write> Read for GzDecoder<W> {
354    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
355        self.inner.get_mut().get_mut().read(buf)
356    }
357}
358
359/// A gzip streaming decoder that decodes a [gzip file] with multiple members.
360///
361/// This structure exposes a [`Write`] interface that will consume compressed data and
362/// write uncompressed data to the underlying writer.
363///
364/// A gzip file consists of a series of *members* concatenated one after another.
365/// `MultiGzDecoder` decodes all members of a file and writes them to the
366/// underlying writer one after another.
367///
368/// To handle members separately, see [GzDecoder] or read more
369/// [in the introduction](../index.html#about-multi-member-gzip-files).
370///
371/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
372#[derive(Debug)]
373pub struct MultiGzDecoder<W: Write> {
374    inner: GzDecoder<W>,
375}
376
377impl<W: Write> MultiGzDecoder<W> {
378    /// Creates a new decoder which will write uncompressed data to the stream.
379    /// If the gzip stream contains multiple members all will be decoded.
380    pub fn new(w: W) -> MultiGzDecoder<W> {
381        MultiGzDecoder {
382            inner: GzDecoder::new(w),
383        }
384    }
385
386    /// Returns the header associated with the current member.
387    pub fn header(&self) -> Option<&GzHeader> {
388        self.inner.header()
389    }
390
391    /// Acquires a reference to the underlying writer.
392    pub fn get_ref(&self) -> &W {
393        self.inner.get_ref()
394    }
395
396    /// Acquires a mutable reference to the underlying writer.
397    ///
398    /// Note that mutating the output/input state of the stream may corrupt this
399    /// object, so care must be taken when using this method.
400    pub fn get_mut(&mut self) -> &mut W {
401        self.inner.get_mut()
402    }
403
404    /// Attempt to finish this output stream, writing out final chunks of data.
405    ///
406    /// Note that this function can only be used once data has finished being
407    /// written to the output stream. After this function is called then further
408    /// calls to `write` may result in a panic.
409    ///
410    /// # Panics
411    ///
412    /// Attempts to write data to this stream may result in a panic after this
413    /// function is called.
414    ///
415    /// # Errors
416    ///
417    /// This function will perform I/O to finish the stream, returning any
418    /// errors which happen.
419    pub fn try_finish(&mut self) -> io::Result<()> {
420        self.inner.try_finish()
421    }
422
423    /// Consumes this decoder, flushing the output stream.
424    ///
425    /// This will flush the underlying data stream and then return the contained
426    /// writer if the flush succeeded.
427    ///
428    /// Note that this function may not be suitable to call in a situation where
429    /// the underlying stream is an asynchronous I/O stream. To finish a stream
430    /// the `try_finish` (or `shutdown`) method should be used instead. To
431    /// re-acquire ownership of a stream it is safe to call this method after
432    /// `try_finish` or `shutdown` has returned `Ok`.
433    ///
434    /// # Errors
435    ///
436    /// This function will perform I/O to complete this stream, and any I/O
437    /// errors which occur will be returned from this function.
438    pub fn finish(self) -> io::Result<W> {
439        self.inner.finish()
440    }
441}
442
443impl<W: Write> Write for MultiGzDecoder<W> {
444    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
445        if buf.is_empty() {
446            Ok(0)
447        } else {
448            match self.inner.write(buf) {
449                Ok(0) => {
450                    // When the GzDecoder indicates that it has finished
451                    // create a new GzDecoder to handle additional data.
452                    self.inner.try_finish()?;
453                    let w = self.inner.inner.take_inner().into_inner();
454                    self.inner = GzDecoder::new(w);
455                    self.inner.write(buf)
456                }
457                res => res,
458            }
459        }
460    }
461
462    fn flush(&mut self) -> io::Result<()> {
463        self.inner.flush()
464    }
465}
466
467#[cfg(test)]
468mod tests {
469    use super::*;
470
471    const STR: &str = "Hello World Hello World Hello World Hello World Hello World \
472                               Hello World Hello World Hello World Hello World Hello World \
473                               Hello World Hello World Hello World Hello World Hello World \
474                               Hello World Hello World Hello World Hello World Hello World \
475                               Hello World Hello World Hello World Hello World Hello World";
476
477    #[test]
478    fn decode_writer_one_chunk() {
479        let mut e = GzEncoder::new(Vec::new(), Compression::default());
480        e.write(STR.as_ref()).unwrap();
481        let bytes = e.finish().unwrap();
482
483        let mut writer = Vec::new();
484        let mut decoder = GzDecoder::new(writer);
485        let n = decoder.write(&bytes[..]).unwrap();
486        decoder.write(&bytes[n..]).unwrap();
487        decoder.try_finish().unwrap();
488        writer = decoder.finish().unwrap();
489        let return_string = String::from_utf8(writer).expect("String parsing error");
490        assert_eq!(return_string, STR);
491    }
492
493    #[test]
494    fn decode_writer_partial_header() {
495        let mut e = GzEncoder::new(Vec::new(), Compression::default());
496        e.write(STR.as_ref()).unwrap();
497        let bytes = e.finish().unwrap();
498
499        let mut writer = Vec::new();
500        let mut decoder = GzDecoder::new(writer);
501        assert_eq!(decoder.write(&bytes[..5]).unwrap(), 5);
502        let n = decoder.write(&bytes[5..]).unwrap();
503        if n < bytes.len() - 5 {
504            decoder.write(&bytes[n + 5..]).unwrap();
505        }
506        writer = decoder.finish().unwrap();
507        let return_string = String::from_utf8(writer).expect("String parsing error");
508        assert_eq!(return_string, STR);
509    }
510
511    #[test]
512    fn decode_writer_partial_header_filename() {
513        let filename = "test.txt";
514        let mut e = GzBuilder::new()
515            .filename(filename)
516            .read(STR.as_bytes(), Compression::default());
517        let mut bytes = Vec::new();
518        e.read_to_end(&mut bytes).unwrap();
519
520        let mut writer = Vec::new();
521        let mut decoder = GzDecoder::new(writer);
522        assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
523        let n = decoder.write(&bytes[12..]).unwrap();
524        if n < bytes.len() - 12 {
525            decoder.write(&bytes[n + 12..]).unwrap();
526        }
527        assert_eq!(
528            decoder.header().unwrap().filename().unwrap(),
529            filename.as_bytes()
530        );
531        writer = decoder.finish().unwrap();
532        let return_string = String::from_utf8(writer).expect("String parsing error");
533        assert_eq!(return_string, STR);
534    }
535
536    #[test]
537    fn decode_writer_partial_header_comment() {
538        let comment = "test comment";
539        let mut e = GzBuilder::new()
540            .comment(comment)
541            .read(STR.as_bytes(), Compression::default());
542        let mut bytes = Vec::new();
543        e.read_to_end(&mut bytes).unwrap();
544
545        let mut writer = Vec::new();
546        let mut decoder = GzDecoder::new(writer);
547        assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
548        let n = decoder.write(&bytes[12..]).unwrap();
549        if n < bytes.len() - 12 {
550            decoder.write(&bytes[n + 12..]).unwrap();
551        }
552        assert_eq!(
553            decoder.header().unwrap().comment().unwrap(),
554            comment.as_bytes()
555        );
556        writer = decoder.finish().unwrap();
557        let return_string = String::from_utf8(writer).expect("String parsing error");
558        assert_eq!(return_string, STR);
559    }
560
561    #[test]
562    fn decode_writer_exact_header() {
563        let mut e = GzEncoder::new(Vec::new(), Compression::default());
564        e.write(STR.as_ref()).unwrap();
565        let bytes = e.finish().unwrap();
566
567        let mut writer = Vec::new();
568        let mut decoder = GzDecoder::new(writer);
569        assert_eq!(decoder.write(&bytes[..10]).unwrap(), 10);
570        decoder.write(&bytes[10..]).unwrap();
571        writer = decoder.finish().unwrap();
572        let return_string = String::from_utf8(writer).expect("String parsing error");
573        assert_eq!(return_string, STR);
574    }
575
576    #[test]
577    fn decode_writer_partial_crc() {
578        let mut e = GzEncoder::new(Vec::new(), Compression::default());
579        e.write(STR.as_ref()).unwrap();
580        let bytes = e.finish().unwrap();
581
582        let mut writer = Vec::new();
583        let mut decoder = GzDecoder::new(writer);
584        let l = bytes.len() - 5;
585        let n = decoder.write(&bytes[..l]).unwrap();
586        decoder.write(&bytes[n..]).unwrap();
587        writer = decoder.finish().unwrap();
588        let return_string = String::from_utf8(writer).expect("String parsing error");
589        assert_eq!(return_string, STR);
590    }
591
592    // Two or more gzip files concatenated form a multi-member gzip file. MultiGzDecoder will
593    // concatenate the decoded contents of all members.
594    #[test]
595    fn decode_multi_writer() {
596        let mut e = GzEncoder::new(Vec::new(), Compression::default());
597        e.write(STR.as_ref()).unwrap();
598        let bytes = e.finish().unwrap().repeat(2);
599
600        let mut writer = Vec::new();
601        let mut decoder = MultiGzDecoder::new(writer);
602        let mut count = 0;
603        while count < bytes.len() {
604            let n = decoder.write(&bytes[count..]).unwrap();
605            assert!(n != 0);
606            count += n;
607        }
608        writer = decoder.finish().unwrap();
609        let return_string = String::from_utf8(writer).expect("String parsing error");
610        let expected = STR.repeat(2);
611        assert_eq!(return_string, expected);
612    }
613
614    // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
615    // additional data to be consumed by the caller.
616    #[test]
617    fn decode_extra_data() {
618        let compressed = {
619            let mut e = GzEncoder::new(Vec::new(), Compression::default());
620            e.write(STR.as_ref()).unwrap();
621            let mut b = e.finish().unwrap();
622            b.push(b'x');
623            b
624        };
625
626        let mut writer = Vec::new();
627        let mut decoder = GzDecoder::new(writer);
628        let mut consumed_bytes = 0;
629        loop {
630            let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
631            if n == 0 {
632                break;
633            }
634            consumed_bytes += n;
635        }
636        writer = decoder.finish().unwrap();
637        let actual = String::from_utf8(writer).expect("String parsing error");
638        assert_eq!(actual, STR);
639        assert_eq!(&compressed[consumed_bytes..], b"x");
640    }
641}