1
2
3
4
5 package zip
6
7 import (
8 "bufio"
9 "encoding/binary"
10 "errors"
11 "hash"
12 "hash/crc32"
13 "io"
14 "io/fs"
15 "os"
16 "path"
17 "sort"
18 "strings"
19 "sync"
20 "time"
21 )
22
23 var (
24 ErrFormat = errors.New("zip: not a valid zip file")
25 ErrAlgorithm = errors.New("zip: unsupported compression algorithm")
26 ErrChecksum = errors.New("zip: checksum error")
27 )
28
29
30 type Reader struct {
31 r io.ReaderAt
32 File []*File
33 Comment string
34 decompressors map[uint16]Decompressor
35
36
37
38 fileListOnce sync.Once
39 fileList []fileListEntry
40 }
41
42
43 type ReadCloser struct {
44 f *os.File
45 Reader
46 }
47
48
49
50
51 type File struct {
52 FileHeader
53 zip *Reader
54 zipr io.ReaderAt
55 headerOffset int64
56 zip64 bool
57 descErr error
58 }
59
60
61 func OpenReader(name string) (*ReadCloser, error) {
62 f, err := os.Open(name)
63 if err != nil {
64 return nil, err
65 }
66 fi, err := f.Stat()
67 if err != nil {
68 f.Close()
69 return nil, err
70 }
71 r := new(ReadCloser)
72 if err := r.init(f, fi.Size()); err != nil {
73 f.Close()
74 return nil, err
75 }
76 r.f = f
77 return r, nil
78 }
79
80
81
82 func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
83 if size < 0 {
84 return nil, errors.New("zip: size cannot be negative")
85 }
86 zr := new(Reader)
87 if err := zr.init(r, size); err != nil {
88 return nil, err
89 }
90 return zr, nil
91 }
92
93 func (z *Reader) init(r io.ReaderAt, size int64) error {
94 end, err := readDirectoryEnd(r, size)
95 if err != nil {
96 return err
97 }
98 z.r = r
99
100
101
102
103
104
105 if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
106 z.File = make([]*File, 0, end.directoryRecords)
107 }
108 z.Comment = end.comment
109 rs := io.NewSectionReader(r, 0, size)
110 if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
111 return err
112 }
113 buf := bufio.NewReader(rs)
114
115
116
117
118
119 for {
120 f := &File{zip: z, zipr: r}
121 err = readDirectoryHeader(f, buf)
122 if err == ErrFormat || err == io.ErrUnexpectedEOF {
123 break
124 }
125 if err != nil {
126 return err
127 }
128 z.File = append(z.File, f)
129 }
130 if uint16(len(z.File)) != uint16(end.directoryRecords) {
131
132
133 return err
134 }
135 return nil
136 }
137
138
139
140
141 func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) {
142 if z.decompressors == nil {
143 z.decompressors = make(map[uint16]Decompressor)
144 }
145 z.decompressors[method] = dcomp
146 }
147
148 func (z *Reader) decompressor(method uint16) Decompressor {
149 dcomp := z.decompressors[method]
150 if dcomp == nil {
151 dcomp = decompressor(method)
152 }
153 return dcomp
154 }
155
156
157 func (rc *ReadCloser) Close() error {
158 return rc.f.Close()
159 }
160
161
162
163
164
165
166 func (f *File) DataOffset() (offset int64, err error) {
167 bodyOffset, err := f.findBodyOffset()
168 if err != nil {
169 return
170 }
171 return f.headerOffset + bodyOffset, nil
172 }
173
174
175
176 func (f *File) Open() (io.ReadCloser, error) {
177 bodyOffset, err := f.findBodyOffset()
178 if err != nil {
179 return nil, err
180 }
181 size := int64(f.CompressedSize64)
182 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
183 dcomp := f.zip.decompressor(f.Method)
184 if dcomp == nil {
185 return nil, ErrAlgorithm
186 }
187 var rc io.ReadCloser = dcomp(r)
188 var desr io.Reader
189 if f.hasDataDescriptor() {
190 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen)
191 }
192 rc = &checksumReader{
193 rc: rc,
194 hash: crc32.NewIEEE(),
195 f: f,
196 desr: desr,
197 }
198 return rc, nil
199 }
200
201
202
203 func (f *File) OpenRaw() (io.Reader, error) {
204 bodyOffset, err := f.findBodyOffset()
205 if err != nil {
206 return nil, err
207 }
208 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64))
209 return r, nil
210 }
211
212 type checksumReader struct {
213 rc io.ReadCloser
214 hash hash.Hash32
215 nread uint64
216 f *File
217 desr io.Reader
218 err error
219 }
220
221 func (r *checksumReader) Stat() (fs.FileInfo, error) {
222 return headerFileInfo{&r.f.FileHeader}, nil
223 }
224
225 func (r *checksumReader) Read(b []byte) (n int, err error) {
226 if r.err != nil {
227 return 0, r.err
228 }
229 n, err = r.rc.Read(b)
230 r.hash.Write(b[:n])
231 r.nread += uint64(n)
232 if err == nil {
233 return
234 }
235 if err == io.EOF {
236 if r.nread != r.f.UncompressedSize64 {
237 return 0, io.ErrUnexpectedEOF
238 }
239 if r.desr != nil {
240 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil {
241 if err1 == io.EOF {
242 err = io.ErrUnexpectedEOF
243 } else {
244 err = err1
245 }
246 } else if r.hash.Sum32() != r.f.CRC32 {
247 err = ErrChecksum
248 }
249 } else {
250
251
252
253 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
254 err = ErrChecksum
255 }
256 }
257 }
258 r.err = err
259 return
260 }
261
262 func (r *checksumReader) Close() error { return r.rc.Close() }
263
264
265
266 func (f *File) findBodyOffset() (int64, error) {
267 var buf [fileHeaderLen]byte
268 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil {
269 return 0, err
270 }
271 b := readBuf(buf[:])
272 if sig := b.uint32(); sig != fileHeaderSignature {
273 return 0, ErrFormat
274 }
275 b = b[22:]
276 filenameLen := int(b.uint16())
277 extraLen := int(b.uint16())
278 return int64(fileHeaderLen + filenameLen + extraLen), nil
279 }
280
281
282
283
284 func readDirectoryHeader(f *File, r io.Reader) error {
285 var buf [directoryHeaderLen]byte
286 if _, err := io.ReadFull(r, buf[:]); err != nil {
287 return err
288 }
289 b := readBuf(buf[:])
290 if sig := b.uint32(); sig != directoryHeaderSignature {
291 return ErrFormat
292 }
293 f.CreatorVersion = b.uint16()
294 f.ReaderVersion = b.uint16()
295 f.Flags = b.uint16()
296 f.Method = b.uint16()
297 f.ModifiedTime = b.uint16()
298 f.ModifiedDate = b.uint16()
299 f.CRC32 = b.uint32()
300 f.CompressedSize = b.uint32()
301 f.UncompressedSize = b.uint32()
302 f.CompressedSize64 = uint64(f.CompressedSize)
303 f.UncompressedSize64 = uint64(f.UncompressedSize)
304 filenameLen := int(b.uint16())
305 extraLen := int(b.uint16())
306 commentLen := int(b.uint16())
307 b = b[4:]
308 f.ExternalAttrs = b.uint32()
309 f.headerOffset = int64(b.uint32())
310 d := make([]byte, filenameLen+extraLen+commentLen)
311 if _, err := io.ReadFull(r, d); err != nil {
312 return err
313 }
314 f.Name = string(d[:filenameLen])
315 f.Extra = d[filenameLen : filenameLen+extraLen]
316 f.Comment = string(d[filenameLen+extraLen:])
317
318
319 utf8Valid1, utf8Require1 := detectUTF8(f.Name)
320 utf8Valid2, utf8Require2 := detectUTF8(f.Comment)
321 switch {
322 case !utf8Valid1 || !utf8Valid2:
323
324 f.NonUTF8 = true
325 case !utf8Require1 && !utf8Require2:
326
327 f.NonUTF8 = false
328 default:
329
330
331
332
333 f.NonUTF8 = f.Flags&0x800 == 0
334 }
335
336 needUSize := f.UncompressedSize == ^uint32(0)
337 needCSize := f.CompressedSize == ^uint32(0)
338 needHeaderOffset := f.headerOffset == int64(^uint32(0))
339
340
341
342
343 var modified time.Time
344 parseExtras:
345 for extra := readBuf(f.Extra); len(extra) >= 4; {
346 fieldTag := extra.uint16()
347 fieldSize := int(extra.uint16())
348 if len(extra) < fieldSize {
349 break
350 }
351 fieldBuf := extra.sub(fieldSize)
352
353 switch fieldTag {
354 case zip64ExtraID:
355 f.zip64 = true
356
357
358
359
360
361 if needUSize {
362 needUSize = false
363 if len(fieldBuf) < 8 {
364 return ErrFormat
365 }
366 f.UncompressedSize64 = fieldBuf.uint64()
367 }
368 if needCSize {
369 needCSize = false
370 if len(fieldBuf) < 8 {
371 return ErrFormat
372 }
373 f.CompressedSize64 = fieldBuf.uint64()
374 }
375 if needHeaderOffset {
376 needHeaderOffset = false
377 if len(fieldBuf) < 8 {
378 return ErrFormat
379 }
380 f.headerOffset = int64(fieldBuf.uint64())
381 }
382 case ntfsExtraID:
383 if len(fieldBuf) < 4 {
384 continue parseExtras
385 }
386 fieldBuf.uint32()
387 for len(fieldBuf) >= 4 {
388 attrTag := fieldBuf.uint16()
389 attrSize := int(fieldBuf.uint16())
390 if len(fieldBuf) < attrSize {
391 continue parseExtras
392 }
393 attrBuf := fieldBuf.sub(attrSize)
394 if attrTag != 1 || attrSize != 24 {
395 continue
396 }
397
398 const ticksPerSecond = 1e7
399 ts := int64(attrBuf.uint64())
400 secs := int64(ts / ticksPerSecond)
401 nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond)
402 epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
403 modified = time.Unix(epoch.Unix()+secs, nsecs)
404 }
405 case unixExtraID, infoZipUnixExtraID:
406 if len(fieldBuf) < 8 {
407 continue parseExtras
408 }
409 fieldBuf.uint32()
410 ts := int64(fieldBuf.uint32())
411 modified = time.Unix(ts, 0)
412 case extTimeExtraID:
413 if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
414 continue parseExtras
415 }
416 ts := int64(fieldBuf.uint32())
417 modified = time.Unix(ts, 0)
418 }
419 }
420
421 msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
422 f.Modified = msdosModified
423 if !modified.IsZero() {
424 f.Modified = modified.UTC()
425
426
427
428
429
430
431
432
433
434 if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
435 f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
436 }
437 }
438
439
440
441
442
443
444
445
446
447 _ = needUSize
448
449 if needCSize || needHeaderOffset {
450 return ErrFormat
451 }
452
453 return nil
454 }
455
456 func readDataDescriptor(r io.Reader, f *File) error {
457 var buf [dataDescriptorLen]byte
458
459
460
461
462
463
464
465
466
467
468 if _, err := io.ReadFull(r, buf[:4]); err != nil {
469 return err
470 }
471 off := 0
472 maybeSig := readBuf(buf[:4])
473 if maybeSig.uint32() != dataDescriptorSignature {
474
475
476 off += 4
477 }
478 if _, err := io.ReadFull(r, buf[off:12]); err != nil {
479 return err
480 }
481 b := readBuf(buf[:12])
482 if b.uint32() != f.CRC32 {
483 return ErrChecksum
484 }
485
486
487
488
489
490
491
492 return nil
493 }
494
495 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) {
496
497 var buf []byte
498 var directoryEndOffset int64
499 for i, bLen := range []int64{1024, 65 * 1024} {
500 if bLen > size {
501 bLen = size
502 }
503 buf = make([]byte, int(bLen))
504 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
505 return nil, err
506 }
507 if p := findSignatureInBlock(buf); p >= 0 {
508 buf = buf[p:]
509 directoryEndOffset = size - bLen + int64(p)
510 break
511 }
512 if i == 1 || bLen == size {
513 return nil, ErrFormat
514 }
515 }
516
517
518 b := readBuf(buf[4:])
519 d := &directoryEnd{
520 diskNbr: uint32(b.uint16()),
521 dirDiskNbr: uint32(b.uint16()),
522 dirRecordsThisDisk: uint64(b.uint16()),
523 directoryRecords: uint64(b.uint16()),
524 directorySize: uint64(b.uint32()),
525 directoryOffset: uint64(b.uint32()),
526 commentLen: b.uint16(),
527 }
528 l := int(d.commentLen)
529 if l > len(b) {
530 return nil, errors.New("zip: invalid comment length")
531 }
532 d.comment = string(b[:l])
533
534
535 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
536 p, err := findDirectory64End(r, directoryEndOffset)
537 if err == nil && p >= 0 {
538 err = readDirectory64End(r, p, d)
539 }
540 if err != nil {
541 return nil, err
542 }
543 }
544
545 if o := int64(d.directoryOffset); o < 0 || o >= size {
546 return nil, ErrFormat
547 }
548 return d, nil
549 }
550
551
552
553
554 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
555 locOffset := directoryEndOffset - directory64LocLen
556 if locOffset < 0 {
557 return -1, nil
558 }
559 buf := make([]byte, directory64LocLen)
560 if _, err := r.ReadAt(buf, locOffset); err != nil {
561 return -1, err
562 }
563 b := readBuf(buf)
564 if sig := b.uint32(); sig != directory64LocSignature {
565 return -1, nil
566 }
567 if b.uint32() != 0 {
568 return -1, nil
569 }
570 p := b.uint64()
571 if b.uint32() != 1 {
572 return -1, nil
573 }
574 return int64(p), nil
575 }
576
577
578
579 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
580 buf := make([]byte, directory64EndLen)
581 if _, err := r.ReadAt(buf, offset); err != nil {
582 return err
583 }
584
585 b := readBuf(buf)
586 if sig := b.uint32(); sig != directory64EndSignature {
587 return ErrFormat
588 }
589
590 b = b[12:]
591 d.diskNbr = b.uint32()
592 d.dirDiskNbr = b.uint32()
593 d.dirRecordsThisDisk = b.uint64()
594 d.directoryRecords = b.uint64()
595 d.directorySize = b.uint64()
596 d.directoryOffset = b.uint64()
597
598 return nil
599 }
600
601 func findSignatureInBlock(b []byte) int {
602 for i := len(b) - directoryEndLen; i >= 0; i-- {
603
604 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
605
606 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
607 if n+directoryEndLen+i <= len(b) {
608 return i
609 }
610 }
611 }
612 return -1
613 }
614
615 type readBuf []byte
616
617 func (b *readBuf) uint8() uint8 {
618 v := (*b)[0]
619 *b = (*b)[1:]
620 return v
621 }
622
623 func (b *readBuf) uint16() uint16 {
624 v := binary.LittleEndian.Uint16(*b)
625 *b = (*b)[2:]
626 return v
627 }
628
629 func (b *readBuf) uint32() uint32 {
630 v := binary.LittleEndian.Uint32(*b)
631 *b = (*b)[4:]
632 return v
633 }
634
635 func (b *readBuf) uint64() uint64 {
636 v := binary.LittleEndian.Uint64(*b)
637 *b = (*b)[8:]
638 return v
639 }
640
641 func (b *readBuf) sub(n int) readBuf {
642 b2 := (*b)[:n]
643 *b = (*b)[n:]
644 return b2
645 }
646
647
648
649 type fileListEntry struct {
650 name string
651 file *File
652 isDir bool
653 }
654
655 type fileInfoDirEntry interface {
656 fs.FileInfo
657 fs.DirEntry
658 }
659
660 func (e *fileListEntry) stat() fileInfoDirEntry {
661 if !e.isDir {
662 return headerFileInfo{&e.file.FileHeader}
663 }
664 return e
665 }
666
667
668 func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem }
669 func (f *fileListEntry) Size() int64 { return 0 }
670 func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 }
671 func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir }
672 func (f *fileListEntry) IsDir() bool { return true }
673 func (f *fileListEntry) Sys() any { return nil }
674
675 func (f *fileListEntry) ModTime() time.Time {
676 if f.file == nil {
677 return time.Time{}
678 }
679 return f.file.FileHeader.Modified.UTC()
680 }
681
682 func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil }
683
684
685 func toValidName(name string) string {
686 name = strings.ReplaceAll(name, `\`, `/`)
687 p := path.Clean(name)
688 if strings.HasPrefix(p, "/") {
689 p = p[len("/"):]
690 }
691 for strings.HasPrefix(p, "../") {
692 p = p[len("../"):]
693 }
694 return p
695 }
696
697 func (r *Reader) initFileList() {
698 r.fileListOnce.Do(func() {
699 dirs := make(map[string]bool)
700 knownDirs := make(map[string]bool)
701 for _, file := range r.File {
702 isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
703 name := toValidName(file.Name)
704 if name == "" {
705 continue
706 }
707 for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
708 dirs[dir] = true
709 }
710 entry := fileListEntry{
711 name: name,
712 file: file,
713 isDir: isDir,
714 }
715 r.fileList = append(r.fileList, entry)
716 if isDir {
717 knownDirs[name] = true
718 }
719 }
720 for dir := range dirs {
721 if !knownDirs[dir] {
722 entry := fileListEntry{
723 name: dir,
724 file: nil,
725 isDir: true,
726 }
727 r.fileList = append(r.fileList, entry)
728 }
729 }
730
731 sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) })
732 })
733 }
734
735 func fileEntryLess(x, y string) bool {
736 xdir, xelem, _ := split(x)
737 ydir, yelem, _ := split(y)
738 return xdir < ydir || xdir == ydir && xelem < yelem
739 }
740
741
742
743
744
745 func (r *Reader) Open(name string) (fs.File, error) {
746 r.initFileList()
747
748 if !fs.ValidPath(name) {
749 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
750 }
751 e := r.openLookup(name)
752 if e == nil {
753 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
754 }
755 if e.isDir {
756 return &openDir{e, r.openReadDir(name), 0}, nil
757 }
758 rc, err := e.file.Open()
759 if err != nil {
760 return nil, err
761 }
762 return rc.(fs.File), nil
763 }
764
765 func split(name string) (dir, elem string, isDir bool) {
766 if len(name) > 0 && name[len(name)-1] == '/' {
767 isDir = true
768 name = name[:len(name)-1]
769 }
770 i := len(name) - 1
771 for i >= 0 && name[i] != '/' {
772 i--
773 }
774 if i < 0 {
775 return ".", name, isDir
776 }
777 return name[:i], name[i+1:], isDir
778 }
779
780 var dotFile = &fileListEntry{name: "./", isDir: true}
781
782 func (r *Reader) openLookup(name string) *fileListEntry {
783 if name == "." {
784 return dotFile
785 }
786
787 dir, elem, _ := split(name)
788 files := r.fileList
789 i := sort.Search(len(files), func(i int) bool {
790 idir, ielem, _ := split(files[i].name)
791 return idir > dir || idir == dir && ielem >= elem
792 })
793 if i < len(files) {
794 fname := files[i].name
795 if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name {
796 return &files[i]
797 }
798 }
799 return nil
800 }
801
802 func (r *Reader) openReadDir(dir string) []fileListEntry {
803 files := r.fileList
804 i := sort.Search(len(files), func(i int) bool {
805 idir, _, _ := split(files[i].name)
806 return idir >= dir
807 })
808 j := sort.Search(len(files), func(j int) bool {
809 jdir, _, _ := split(files[j].name)
810 return jdir > dir
811 })
812 return files[i:j]
813 }
814
815 type openDir struct {
816 e *fileListEntry
817 files []fileListEntry
818 offset int
819 }
820
821 func (d *openDir) Close() error { return nil }
822 func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat(), nil }
823
824 func (d *openDir) Read([]byte) (int, error) {
825 return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")}
826 }
827
828 func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
829 n := len(d.files) - d.offset
830 if count > 0 && n > count {
831 n = count
832 }
833 if n == 0 {
834 if count <= 0 {
835 return nil, nil
836 }
837 return nil, io.EOF
838 }
839 list := make([]fs.DirEntry, n)
840 for i := range list {
841 list[i] = d.files[d.offset+i].stat()
842 }
843 d.offset += n
844 return list, nil
845 }
846
View as plain text