1use std::ffi::CString;
2use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3use std::time;
4
5use crate::bufreader::BufReader;
6use crate::{Compression, Crc};
7
8pub static FHCRC: u8 = 1 << 1;
9pub static FEXTRA: u8 = 1 << 2;
10pub static FNAME: u8 = 1 << 3;
11pub static FCOMMENT: u8 = 1 << 4;
12pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
13
14pub mod bufread;
15pub mod read;
16pub mod write;
17
18const MAX_HEADER_BUF: usize = 65535;
21
22#[derive(PartialEq, Clone, Debug, Default)]
27pub struct GzHeader {
28 extra: Option<Vec<u8>>,
29 filename: Option<Vec<u8>>,
30 comment: Option<Vec<u8>>,
31 operating_system: u8,
32 mtime: u32,
33}
34
35impl GzHeader {
36 pub fn filename(&self) -> Option<&[u8]> {
38 self.filename.as_ref().map(|s| &s[..])
39 }
40
41 pub fn extra(&self) -> Option<&[u8]> {
43 self.extra.as_ref().map(|s| &s[..])
44 }
45
46 pub fn comment(&self) -> Option<&[u8]> {
48 self.comment.as_ref().map(|s| &s[..])
49 }
50
51 pub fn operating_system(&self) -> u8 {
56 self.operating_system
57 }
58
59 pub fn mtime(&self) -> u32 {
69 self.mtime
70 }
71
72 pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80 if self.mtime == 0 {
81 None
82 } else {
83 let duration = time::Duration::new(u64::from(self.mtime), 0);
84 let datetime = time::UNIX_EPOCH + duration;
85 Some(datetime)
86 }
87 }
88}
89
90#[derive(Debug, Default)]
91pub enum GzHeaderState {
92 Start(u8, [u8; 10]),
93 Xlen(Option<Box<Crc>>, u8, [u8; 2]),
94 Extra(Option<Box<Crc>>, u16),
95 Filename(Option<Box<Crc>>),
96 Comment(Option<Box<Crc>>),
97 Crc(Option<Box<Crc>>, u8, [u8; 2]),
98 #[default]
99 Complete,
100}
101
102#[derive(Debug, Default)]
103pub struct GzHeaderParser {
104 state: GzHeaderState,
105 flags: u8,
106 header: GzHeader,
107}
108
109impl GzHeaderParser {
110 fn new() -> Self {
111 GzHeaderParser {
112 state: GzHeaderState::Start(0, [0; 10]),
113 flags: 0,
114 header: GzHeader::default(),
115 }
116 }
117
118 fn parse<R: BufRead>(&mut self, r: &mut R) -> Result<()> {
119 loop {
120 match &mut self.state {
121 GzHeaderState::Start(count, buffer) => {
122 while (*count as usize) < buffer.len() {
123 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
124 }
125 if buffer[0] != 0x1f || buffer[1] != 0x8b {
127 return Err(bad_header());
128 }
129 if buffer[2] != 8 {
131 return Err(bad_header());
132 }
133 self.flags = buffer[3];
134 if self.flags & FRESERVED != 0 {
136 return Err(bad_header());
137 }
138 self.header.mtime = ((buffer[4] as u32) << 0)
139 | ((buffer[5] as u32) << 8)
140 | ((buffer[6] as u32) << 16)
141 | ((buffer[7] as u32) << 24);
142 let _xfl = buffer[8];
143 self.header.operating_system = buffer[9];
144 let crc = if self.flags & FHCRC != 0 {
145 let mut crc = Box::new(Crc::new());
146 crc.update(buffer);
147 Some(crc)
148 } else {
149 None
150 };
151 self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
152 }
153 GzHeaderState::Xlen(crc, count, buffer) => {
154 if self.flags & FEXTRA != 0 {
155 while (*count as usize) < buffer.len() {
156 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
157 }
158 if let Some(crc) = crc {
159 crc.update(buffer);
160 }
161 let xlen = parse_le_u16(buffer);
162 self.header.extra = Some(vec![0; xlen as usize]);
163 self.state = GzHeaderState::Extra(crc.take(), 0);
164 } else {
165 self.state = GzHeaderState::Filename(crc.take());
166 }
167 }
168 GzHeaderState::Extra(crc, count) => {
169 debug_assert!(self.header.extra.is_some());
170 let extra = self.header.extra.as_mut().unwrap();
171 while (*count as usize) < extra.len() {
172 *count += read_into(r, &mut extra[*count as usize..])? as u16;
173 }
174 if let Some(crc) = crc {
175 crc.update(extra);
176 }
177 self.state = GzHeaderState::Filename(crc.take());
178 }
179 GzHeaderState::Filename(crc) => {
180 if self.flags & FNAME != 0 {
181 let filename = self.header.filename.get_or_insert_with(Vec::new);
182 read_to_nul(r, filename)?;
183 if let Some(crc) = crc {
184 crc.update(filename);
185 crc.update(b"\0");
186 }
187 }
188 self.state = GzHeaderState::Comment(crc.take());
189 }
190 GzHeaderState::Comment(crc) => {
191 if self.flags & FCOMMENT != 0 {
192 let comment = self.header.comment.get_or_insert_with(Vec::new);
193 read_to_nul(r, comment)?;
194 if let Some(crc) = crc {
195 crc.update(comment);
196 crc.update(b"\0");
197 }
198 }
199 self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
200 }
201 GzHeaderState::Crc(crc, count, buffer) => {
202 if let Some(crc) = crc {
203 debug_assert!(self.flags & FHCRC != 0);
204 while (*count as usize) < buffer.len() {
205 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
206 }
207 let stored_crc = parse_le_u16(buffer);
208 let calced_crc = crc.sum() as u16;
209 if stored_crc != calced_crc {
210 return Err(corrupt());
211 }
212 }
213 self.state = GzHeaderState::Complete;
214 }
215 GzHeaderState::Complete => {
216 return Ok(());
217 }
218 }
219 }
220 }
221
222 fn header(&self) -> Option<&GzHeader> {
223 match self.state {
224 GzHeaderState::Complete => Some(&self.header),
225 _ => None,
226 }
227 }
228}
229
230impl From<GzHeaderParser> for GzHeader {
231 fn from(parser: GzHeaderParser) -> Self {
232 debug_assert!(matches!(parser.state, GzHeaderState::Complete));
233 parser.header
234 }
235}
236
237fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
241 debug_assert!(!buffer.is_empty());
242 match r.read(buffer) {
243 Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
244 Ok(n) => Ok(n),
245 Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
246 Err(e) => Err(e),
247 }
248}
249
250fn read_to_nul<R: BufRead>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
252 let mut bytes = r.bytes();
253 loop {
254 match bytes.next().transpose()? {
255 Some(0) => return Ok(()),
256 Some(_) if buffer.len() == MAX_HEADER_BUF => {
257 return Err(Error::new(
258 ErrorKind::InvalidInput,
259 "gzip header field too long",
260 ));
261 }
262 Some(byte) => {
263 buffer.push(byte);
264 }
265 None => {
266 return Err(ErrorKind::UnexpectedEof.into());
267 }
268 }
269 }
270}
271
272fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
273 u16::from_le_bytes(*buffer)
274}
275
276fn bad_header() -> Error {
277 Error::new(ErrorKind::InvalidInput, "invalid gzip header")
278}
279
280fn corrupt() -> Error {
281 Error::new(
282 ErrorKind::InvalidInput,
283 "corrupt gzip stream does not have a matching checksum",
284 )
285}
286
287#[derive(Debug, Default)]
314pub struct GzBuilder {
315 extra: Option<Vec<u8>>,
316 filename: Option<CString>,
317 comment: Option<CString>,
318 operating_system: Option<u8>,
319 mtime: u32,
320}
321
322impl GzBuilder {
323 pub fn new() -> GzBuilder {
325 Self::default()
326 }
327
328 pub fn mtime(mut self, mtime: u32) -> GzBuilder {
330 self.mtime = mtime;
331 self
332 }
333
334 pub fn operating_system(mut self, os: u8) -> GzBuilder {
336 self.operating_system = Some(os);
337 self
338 }
339
340 pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
342 self.extra = Some(extra.into());
343 self
344 }
345
346 pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
352 self.filename = Some(CString::new(filename.into()).unwrap());
353 self
354 }
355
356 pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
362 self.comment = Some(CString::new(comment.into()).unwrap());
363 self
364 }
365
366 pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
371 write::gz_encoder(self.into_header(lvl), w, lvl)
372 }
373
374 pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
379 read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
380 }
381
382 pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
387 where
388 R: BufRead,
389 {
390 bufread::gz_encoder(self.into_header(lvl), r, lvl)
391 }
392
393 fn into_header(self, lvl: Compression) -> Vec<u8> {
394 let GzBuilder {
395 extra,
396 filename,
397 comment,
398 operating_system,
399 mtime,
400 } = self;
401 let mut flg = 0;
402 let mut header = vec![0u8; 10];
403 if let Some(v) = extra {
404 flg |= FEXTRA;
405 header.push((v.len() >> 0) as u8);
406 header.push((v.len() >> 8) as u8);
407 header.extend(v);
408 }
409 if let Some(filename) = filename {
410 flg |= FNAME;
411 header.extend(filename.as_bytes_with_nul().iter().copied());
412 }
413 if let Some(comment) = comment {
414 flg |= FCOMMENT;
415 header.extend(comment.as_bytes_with_nul().iter().copied());
416 }
417 header[0] = 0x1f;
418 header[1] = 0x8b;
419 header[2] = 8;
420 header[3] = flg;
421 header[4] = (mtime >> 0) as u8;
422 header[5] = (mtime >> 8) as u8;
423 header[6] = (mtime >> 16) as u8;
424 header[7] = (mtime >> 24) as u8;
425 header[8] = if lvl.0 >= Compression::best().0 {
426 2
427 } else if lvl.0 <= Compression::fast().0 {
428 4
429 } else {
430 0
431 };
432
433 header[9] = operating_system.unwrap_or(255);
438 header
439 }
440}
441
442#[cfg(test)]
443mod tests {
444 use std::io::prelude::*;
445
446 use super::{read, write, GzBuilder, GzHeaderParser};
447 use crate::{Compression, GzHeader};
448 use rand::{rng, Rng};
449
450 #[test]
451 fn roundtrip() {
452 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
453 e.write_all(b"foo bar baz").unwrap();
454 let inner = e.finish().unwrap();
455 let mut d = read::GzDecoder::new(&inner[..]);
456 let mut s = String::new();
457 d.read_to_string(&mut s).unwrap();
458 assert_eq!(s, "foo bar baz");
459 }
460
461 #[test]
462 fn roundtrip_zero() {
463 let e = write::GzEncoder::new(Vec::new(), Compression::default());
464 let inner = e.finish().unwrap();
465 let mut d = read::GzDecoder::new(&inner[..]);
466 let mut s = String::new();
467 d.read_to_string(&mut s).unwrap();
468 assert_eq!(s, "");
469 }
470
471 #[test]
472 fn roundtrip_big() {
473 let mut real = Vec::new();
474 let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
475 let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
476 for _ in 0..200 {
477 let to_write = &v[..rng().random_range(0..v.len())];
478 real.extend(to_write.iter().copied());
479 w.write_all(to_write).unwrap();
480 }
481 let result = w.finish().unwrap();
482 let mut r = read::GzDecoder::new(&result[..]);
483 let mut v = Vec::new();
484 r.read_to_end(&mut v).unwrap();
485 assert_eq!(v, real);
486 }
487
488 #[test]
489 fn roundtrip_big2() {
490 let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
491 let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
492 let mut res = Vec::new();
493 r.read_to_end(&mut res).unwrap();
494 assert_eq!(res, v);
495 }
496
497 struct Rfc1952Crc {
500 crc_table: [u32; 256],
502 }
503
504 impl Rfc1952Crc {
505 fn new() -> Self {
506 let mut crc = Rfc1952Crc {
507 crc_table: [0; 256],
508 };
509 for n in 0usize..256 {
511 let mut c = n as u32;
512 for _k in 0..8 {
513 if c & 1 != 0 {
514 c = 0xedb88320 ^ (c >> 1);
515 } else {
516 c = c >> 1;
517 }
518 }
519 crc.crc_table[n] = c;
520 }
521 crc
522 }
523
524 fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
531 let mut c = crc ^ 0xffffffff;
532
533 for b in buf {
534 c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
535 }
536 c ^ 0xffffffff
537 }
538
539 fn crc(&self, buf: &[u8]) -> u32 {
541 self.update_crc(0, buf)
542 }
543 }
544
545 #[test]
546 fn roundtrip_header() {
547 let mut header = GzBuilder::new()
548 .mtime(1234)
549 .operating_system(57)
550 .filename("filename")
551 .comment("comment")
552 .into_header(Compression::fast());
553
554 header[3] = header[3] ^ super::FHCRC;
556 let rfc1952_crc = Rfc1952Crc::new();
557 let crc32 = rfc1952_crc.crc(&header);
558 let crc16 = crc32 as u16;
559 header.extend(&crc16.to_le_bytes());
560
561 let mut parser = GzHeaderParser::new();
562 parser.parse(&mut header.as_slice()).unwrap();
563 let actual = parser.header().unwrap();
564 assert_eq!(
565 actual,
566 &GzHeader {
567 extra: None,
568 filename: Some("filename".as_bytes().to_vec()),
569 comment: Some("comment".as_bytes().to_vec()),
570 operating_system: 57,
571 mtime: 1234
572 }
573 )
574 }
575
576 #[test]
577 fn fields() {
578 let r = vec![0, 2, 4, 6];
579 let e = GzBuilder::new()
580 .filename("foo.rs")
581 .comment("bar")
582 .extra(vec![0, 1, 2, 3])
583 .read(&r[..], Compression::default());
584 let mut d = read::GzDecoder::new(e);
585 assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
586 assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
587 assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
588 let mut res = Vec::new();
589 d.read_to_end(&mut res).unwrap();
590 assert_eq!(res, vec![0, 2, 4, 6]);
591 }
592
593 #[test]
594 fn keep_reading_after_end() {
595 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
596 e.write_all(b"foo bar baz").unwrap();
597 let inner = e.finish().unwrap();
598 let mut d = read::GzDecoder::new(&inner[..]);
599 let mut s = String::new();
600 d.read_to_string(&mut s).unwrap();
601 assert_eq!(s, "foo bar baz");
602 d.read_to_string(&mut s).unwrap();
603 assert_eq!(s, "foo bar baz");
604 }
605
606 #[test]
607 fn qc_reader() {
608 ::quickcheck::quickcheck(test as fn(_) -> _);
609
610 fn test(v: Vec<u8>) -> bool {
611 let r = read::GzEncoder::new(&v[..], Compression::default());
612 let mut r = read::GzDecoder::new(r);
613 let mut v2 = Vec::new();
614 r.read_to_end(&mut v2).unwrap();
615 v == v2
616 }
617 }
618
619 #[test]
620 fn flush_after_write() {
621 let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
622 write!(f, "Hello world").unwrap();
623 f.flush().unwrap();
624 }
625}