3 * Guido Draheim <guidod@gmx.de>
4 * Tomi Ollila <too@iki.fi>
6 * Copyright (c) 1999,2000,2001,2002,2003 Guido Draheim
8 * use under the restrictions of the
9 * Lesser GNU General Public License
10 * or alternatively the restrictions
11 * of the Mozilla Public License 1.1
14 #include <zzip/lib.h> /* archive handling */
15 #include <zzip/file.h>
16 #include <zzip/format.h>
23 #ifdef ZZIP_HAVE_SYS_STAT_H
27 #include <zzip/__mmap.h>
28 #include <zzip/__debug.h>
30 #define __sizeof(X) ((zzip_ssize_t)(sizeof(X)))
32 /* per default, we use a little hack to correct bad z_rootseek parts */
33 #define ZZIP_CORRECT_ROOTSEEK 1
35 /* ------------------------- fetch helpers --------------------------------- */
38 * Make 32 bit value in host byteorder from little-endian mapped octet-data
39 * (works also on machines which SIGBUS on misaligned data access (eg. 68000))
41 uint32_t __zzip_get32(unsigned char * s)
43 return ((uint32_t)s[3] << 24) | ((uint32_t)s[2] << 16)
44 | ((uint32_t)s[1] << 8) | (uint32_t)s[0];
48 * This function does the same for a 16 bit value.
50 uint16_t __zzip_get16(unsigned char * s)
52 return ((uint16_t)s[1] << 8) | (uint16_t)s[0];
55 /* --------------------------- internals -------------------------------- */
56 /* internal functions of zziplib, avoid at all cost, changes w/o warning.
57 * we do export them for debugging purpose and special external tools
58 * which know what they do and which can adapt from version to version
61 int __zzip_find_disk_trailer( int fd, zzip_off_t filesize,
62 struct zzip_disk_trailer * trailer,
64 int __zzip_parse_root_directory( int fd,
65 struct zzip_disk_trailer * trailer,
66 struct zzip_dir_hdr ** hdr_return,
69 _zzip_inline char* __zzip_aligned4(char* p);
71 /* ------------------------ harden routines ------------------------------ */
75 * check for inconsistent values in trailer and prefer lower seek value
76 * - we fix values assuming the root directory was written at the end
77 * and it is just before the zip trailer. Therefore, ...
79 _zzip_inline static void __fixup_rootseek(
80 zzip_off_t offset_of_trailer,
81 struct zzip_disk_trailer* trailer)
83 if ( (zzip_off_t) ZZIP_GET32(trailer->z_rootseek) >
84 offset_of_trailer - (zzip_off_t) ZZIP_GET32(trailer->z_rootsize) &&
85 offset_of_trailer > (zzip_off_t) ZZIP_GET32(trailer->z_rootsize))
87 register zzip_off_t offset;
88 offset = offset_of_trailer - ZZIP_GET32(trailer->z_rootsize);
89 trailer->z_rootseek[0] = offset & 0xff;
90 trailer->z_rootseek[1] = offset >> 8 & 0xff;
91 trailer->z_rootseek[2] = offset >> 16 & 0xff;
92 trailer->z_rootseek[3] = offset >> 24 & 0xff;
93 HINT2("new rootseek=%li",
94 (long) ZZIP_GET32(trailer->z_rootseek));
97 #define __correct_rootseek(A,B,C)
99 #elif defined ZZIP_CORRECT_ROOTSEEK
100 /* store the seekvalue of the trailer into the "z_magic" field and with
101 * a 64bit off_t we overwrite z_disk/z_finaldisk as well. If you change
102 * anything in zziplib or dump the trailer structure then watch out that
103 * these are still unused, so that this code may still (ab)use those. */
104 #define __fixup_rootseek(_offset_of_trailer, _trailer) \
105 *(zzip_off_t*)_trailer = _offset_of_trailer;
106 #define __correct_rootseek( _u_rootseek, _u_rootsize, _trailer) \
107 if (_u_rootseek > *(zzip_off_t*)_trailer - _u_rootsize) \
108 _u_rootseek = *(zzip_off_t*)_trailer - _u_rootsize;
110 #define __fixup_rootseek(A,B)
111 #define __correct_rootseek(A,B,C)
116 _zzip_inline static void __debug_dir_hdr (struct zzip_dir_hdr* hdr)
118 if (sizeof(struct zzip_dir_hdr) > sizeof(struct zzip_root_dirent))
119 { WARN1("internal sizeof-mismatch may break wreakage"); }
120 /* the internal directory structure is never bigger than the
121 * external zip central directory space had been beforehand
122 * (as long as the following assertion holds...)
125 if (((unsigned)hdr)&3)
126 { NOTE1("this machine's malloc(3) returns sth. not u32-aligned"); }
127 /* we assume that if this machine's malloc has returned a non-aligned
128 * memory block, then it is actually safe to access misaligned data, and
129 * since it does only affect the first hdr it should not even bring about
130 * too much of that cpu's speed penalty
134 #define __debug_dir_hdr(X)
137 /* -------------------------- low-level interface -------------------------- */
140 #if BUFSIZ == 1024 || BUFSIZ == 512 || BUFSIZ == 256
141 #define ZZIP_BUFSIZ BUFSIZ
146 #define ZZIP_BUFSIZ 512
147 /* #define ZZIP_BUFSIZ 64 */ /* for testing */
151 * This function is used by => zzip_file_open. It tries to find
152 * the zip's central directory info that is usually a few
153 * bytes off the end of the file.
156 __zzip_find_disk_trailer(int fd, zzip_off_t filesize,
157 struct zzip_disk_trailer * trailer,
161 #define return(val) { e=val; HINT2("%s", zzip_strerror(e)); goto cleanup; }
163 #define return(val) { e=val; goto cleanup; }
168 auto char buffer[2*ZZIP_BUFSIZ];
171 char* buf = malloc(2*ZZIP_BUFSIZ);
173 zzip_off_t offset = 0;
174 zzip_ssize_t maplen = 0; /* mmap(),read(),getpagesize() use size_t !! */
180 if (filesize < __sizeof(struct zzip_disk_trailer))
181 { return(ZZIP_DIR_TOO_SHORT); }
184 { return(ZZIP_OUTOFMEM); }
186 offset = filesize; /* a.k.a. old offset */
187 while(1) /* outer loop */
189 register unsigned char* mapped;
191 if (offset <= 0) { return(ZZIP_DIR_EDH_MISSING); }
193 /* trailer cannot be farther away than 64K from fileend */
194 if (filesize-offset > 64*1024)
195 { return(ZZIP_DIR_EDH_MISSING); }
197 /* the new offset shall overlap with the area after the old offset! */
198 if (USE_MMAP && io->fd.sys)
200 zzip_off_t mapoff = offset;
202 zzip_ssize_t pagesize = _zzip_getpagesize (io->fd.sys);
203 if (pagesize < ZZIP_BUFSIZ) goto non_mmap; /* an error? */
204 if (mapoff == filesize && filesize > pagesize)
206 if (mapoff < pagesize) {
207 maplen = (zzip_ssize_t)mapoff + pagesize; mapoff = 0;
209 mapoff -= pagesize; maplen = 2*pagesize;
210 if ((zzip_ssize_t)mapoff & (pagesize-1)) { /*only 1. run */
211 pagesize -= (zzip_ssize_t)mapoff & (pagesize-1);
216 if (mapoff + maplen > filesize) maplen = filesize - mapoff;
219 fd_map = _zzip_mmap(io->fd.sys, fd, mapoff, (zzip_size_t)maplen);
220 if (fd_map == MAP_FAILED) goto non_mmap;
221 mapped = (unsigned char*) fd_map; offset = mapoff; /* success */
222 HINT3("mapped *%p len=%li", fd_map, (long) maplen);
225 fd_map = 0; /* have no mmap */
227 zzip_off_t pagesize = ZZIP_BUFSIZ;
228 if (offset == filesize && filesize > pagesize)
230 if (offset < pagesize) {
231 maplen = (zzip_ssize_t)offset + pagesize; offset = 0;
233 offset -= pagesize; maplen = 2*pagesize;
234 if ((zzip_ssize_t)offset & (pagesize-1)) { /*on 1st run*/
235 pagesize -= (zzip_ssize_t)offset & (pagesize-1);
240 if (offset + maplen > filesize) maplen = filesize - offset;
243 if (io->fd.seeks(fd, offset, SEEK_SET) < 0)
244 { return(ZZIP_DIR_SEEK); }
245 if (io->fd.read(fd, buf, (zzip_size_t)maplen) < maplen)
246 { return(ZZIP_DIR_READ); }
247 mapped = (unsigned char*) buf; /* success */
248 HINT5("offs=$%lx len=%li filesize=%li pagesize=%i",
249 (long)offset, (long)maplen, (long)filesize, ZZIP_BUFSIZ);
252 {/* now, check for the trailer-magic, hopefully near the end of file */
253 register unsigned char* end = mapped + maplen;
254 register unsigned char* tail;
255 for (tail = end-1; (tail >= mapped); tail--)
257 if ((*tail == 'P') && /* quick pre-check for trailer magic */
258 end-tail >= __sizeof(*trailer)-2 &&
259 ZZIP_DISK_TRAILER_CHECKMAGIC(tail))
261 /* if the file-comment is not present, it happens
262 that the z_comment field often isn't either */
263 if (end-tail >= __sizeof(*trailer))
265 memcpy (trailer, tail, sizeof(*trailer));
267 memcpy (trailer, tail, sizeof(*trailer)-2);
268 trailer->z_comment[0] = 0;
269 trailer->z_comment[1] = 0;
272 __fixup_rootseek (offset + tail-mapped, trailer);
278 if (USE_MMAP && fd_map)
280 HINT3("unmap *%p len=%li", fd_map, (long) maplen);
281 _zzip_munmap(io->fd.sys, fd_map, (zzip_size_t)maplen);
287 if (USE_MMAP && fd_map)
289 HINT3("unmap *%p len=%li", fd_map, (long) maplen);
290 _zzip_munmap(io->fd.sys, fd_map, (zzip_size_t)maplen);
300 * making pointer alignments to values that can be handled as structures
301 * is tricky. We assume here that an align(4) is sufficient even for
302 * 64 bit machines. Note that binary operations are not usually allowed
303 * to pointer types but we do need only the lower bits in this implementation,
304 * so we can just cast the value to a long value.
306 _zzip_inline char* __zzip_aligned4(char* p)
308 #define aligned4 __zzip_aligned4
309 p += ((long)p)&1; /* warnings about truncation of a "pointer" */
310 p += ((long)p)&2; /* to a "long int" may be safely ignored :) */
315 * This function is used by => zzip_file_open, it is usually called after
316 * => __zzip_find_disk_trailer. It will parse the zip's central directory
317 * information and create a zziplib private directory table in
321 __zzip_parse_root_directory(int fd,
322 struct zzip_disk_trailer * trailer,
323 struct zzip_dir_hdr ** hdr_return,
326 auto struct zzip_root_dirent dirent;
327 struct zzip_dir_hdr * hdr;
328 struct zzip_dir_hdr * hdr0;
329 uint16_t * p_reclen = 0;
331 long offset; /* offset from start of root directory */
334 uint16_t u_entries = ZZIP_GET16(trailer->z_entries);
335 uint32_t u_rootsize = ZZIP_GET32(trailer->z_rootsize);
336 uint32_t u_rootseek = ZZIP_GET32(trailer->z_rootseek);
337 __correct_rootseek (u_rootseek, u_rootsize, trailer);
339 hdr0 = (struct zzip_dir_hdr*) malloc(u_rootsize);
342 hdr = hdr0; __debug_dir_hdr (hdr);
344 if (USE_MMAP && io->fd.sys)
346 fd_gap = u_rootseek & (_zzip_getpagesize(io->fd.sys)-1) ;
347 HINT4(" mapseek=0x%x, maplen=%d, fd_gap=%d",
348 u_rootseek-fd_gap, u_rootsize+fd_gap, fd_gap);
349 fd_map = _zzip_mmap(io->fd.sys, fd,
350 u_rootseek-fd_gap, u_rootsize+fd_gap);
351 /* if mmap failed we will fallback to seek/read mode */
352 if (fd_map == MAP_FAILED) {
353 NOTE2("map failed: %s",strerror(errno));
356 HINT3("mapped *%p len=%i", fd_map, u_rootsize+fd_gap);
360 for (entries=u_entries, offset=0; entries > 0; entries--)
362 register struct zzip_root_dirent * d;
363 uint16_t u_extras, u_comment, u_namlen;
366 { d = (void*)(fd_map+fd_gap+offset); } /* fd_map+fd_gap==u_rootseek */
369 if (io->fd.seeks(fd, u_rootseek+offset, SEEK_SET) < 0)
370 return ZZIP_DIR_SEEK;
371 if (io->fd.read(fd, &dirent, sizeof(dirent)) < __sizeof(dirent))
372 return ZZIP_DIR_READ;
376 if (offset+sizeof(*d) > u_rootsize)
377 { FAIL2("%i's entry stretches beyond root directory", entries); break;}
379 # if 0 && defined DEBUG
380 zzip_debug_xbuf ((unsigned char*) d, sizeof(*d) + 8);
383 u_extras = ZZIP_GET16(d->z_extras);
384 u_comment = ZZIP_GET16(d->z_comment);
385 u_namlen = ZZIP_GET16(d->z_namlen);
386 HINT5("offset=0x%lx, size %ld, dirent *%p, hdr %p\n",
387 offset+u_rootseek, (long)u_rootsize, d, hdr);
389 /* writes over the read buffer, Since the structure where data is
390 copied is smaller than the data in buffer this can be done.
391 It is important that the order of setting the fields is considered
392 when filling the structure, so that some data is not trashed in
393 first structure read.
394 at the end the whole copied list of structures is copied into
395 newly allocated buffer */
396 hdr->d_crc32 = ZZIP_GET32(d->z_crc32);
397 hdr->d_csize = ZZIP_GET32(d->z_csize);
398 hdr->d_usize = ZZIP_GET32(d->z_usize);
399 hdr->d_off = ZZIP_GET32(d->z_off);
400 hdr->d_compr = (uint8_t)ZZIP_GET16(d->z_compr);
401 if (hdr->d_compr > 255) hdr->d_compr = 255;
403 if (offset+sizeof(*d) + u_namlen > u_rootsize)
404 { FAIL2("%i's name stretches beyond root directory", entries); break;}
407 { memcpy(hdr->d_name, fd_map+fd_gap+offset+sizeof(*d), u_namlen); }
408 else { io->fd.read(fd, hdr->d_name, u_namlen); }
409 hdr->d_name[u_namlen] = '\0';
410 hdr->d_namlen = u_namlen;
412 /* update offset by the total length of this entry -> next entry */
413 offset += sizeof(*d) + u_namlen + u_extras + u_comment;
415 if (offset > (long)u_rootsize)
416 { FAIL2("%i's end beyond root directory", entries); entries--; break;}
418 HINT5("file %d { compr=%d crc32=$%x offset=%d",
419 entries, hdr->d_compr, hdr->d_crc32, hdr->d_off);
420 HINT5("csize=%d usize=%d namlen=%d extras=%d",
421 hdr->d_csize, hdr->d_usize, u_namlen, u_extras);
422 HINT5("comment=%d name='%s' %s <sizeof %d> } ",
423 u_comment, hdr->d_name, "",(int) sizeof(*d));
425 p_reclen = &hdr->d_reclen;
427 { register char* p = (char*) hdr;
428 register char* q = aligned4 (p + sizeof(*hdr) + u_namlen + 1);
429 *p_reclen = (uint16_t)(q - p);
430 hdr = (struct zzip_dir_hdr*) q;
434 if (USE_MMAP && fd_map)
436 HINT3("unmap *%p len=%i", fd_map, u_rootsize+fd_gap);
437 _zzip_munmap(io->fd.sys, fd_map, u_rootsize+fd_gap);
442 *p_reclen = 0; /* mark end of list */
446 } /* else zero (sane) entries */
447 return (entries ? ZZIP_CORRUPTED : 0);
450 /* ------------------------- high-level interface ------------------------- */
456 static zzip_strings_t* zzip_get_default_ext(void)
458 static zzip_strings_t ext [] =
460 ".zip", ".ZIP", /* common extension */
461 # ifdef ZZIP_USE_ZIPLIKES
462 ".pk3", ".PK3", /* ID Software's Quake3 zipfiles */
463 ".jar", ".JAR", /* Java zipfiles */
472 * allocate a new ZZIP_DIR handle and do basic
473 * initializations before usage by => zzip_dir_fdopen
474 * => zzip_dir_open => zzip_file_open or through
476 * (ext==null flags uses { ".zip" , ".ZIP" } )
477 * (io ==null flags use of posix io defaults)
480 zzip_dir_alloc_ext_io (zzip_strings_t* ext, const zzip_plugin_io_t io)
483 if ((dir = (ZZIP_DIR *)calloc(1, sizeof(*dir))) == NULL)
486 /* dir->fileext is currently unused - so what, still initialize it */
487 dir->fileext = ext ? ext : zzip_get_default_ext();
488 dir->io = io ? io : zzip_get_default_io ();
492 /** => zzip_dir_alloc_ext_io
493 * this function is obsolete - it was generally used for implementation
494 * and exported to let other code build on it. It is now advised to
495 * use => zzip_dir_alloc_ext_io now on explicitly, just set that second
496 * argument to zero to achieve the same functionality as the old style.
499 zzip_dir_alloc (zzip_strings_t* fileext)
501 return zzip_dir_alloc_ext_io (fileext, 0);
505 * will free the zzip_dir handle unless there are still
506 * zzip_files attached (that may use its cache buffer).
507 * This is the inverse of => zzip_dir_alloc , and both
508 * are helper functions used implicitly in other zzipcalls
509 * e.g. => zzip_dir_close = zzip_close
511 * returns zero on sucess
512 * returns the refcount when files are attached.
515 zzip_dir_free(ZZIP_DIR * dir)
518 return (dir->refcount); /* still open files attached */
520 if (dir->fd >= 0) dir->io->fd.close(dir->fd);
521 if (dir->hdr0) free(dir->hdr0);
522 if (dir->cache.fp) free(dir->cache.fp);
523 if (dir->cache.buf32k) free(dir->cache.buf32k);
524 if (dir->realname) free(dir->realname);
530 * It will also => free(2) the => ZZIP_DIR-handle given.
531 * the counterpart for => zzip_dir_open
532 * see also => zzip_dir_free
535 zzip_dir_close(ZZIP_DIR * dir)
537 dir->refcount &=~ 0x10000000; /* explicit dir close */
538 return zzip_dir_free(dir);
542 * used by the => zzip_dir_open and zzip_opendir(2) call. Opens the
543 * zip-archive as specified with the fd which points to an
544 * already openend file. This function then search and parse
545 * the zip's central directory.
547 * NOTE: refcount is zero, so an _open/_close pair will also delete
551 zzip_dir_fdopen(int fd, zzip_error_t * errcode_p)
553 return zzip_dir_fdopen_ext_io(fd, errcode_p, 0, 0);
556 static zzip_error_t __zzip_dir_parse (ZZIP_DIR* dir); /* forward */
558 /** => zzip_dir_fdopen
559 * this function uses explicit ext and io instead of the internal
560 * defaults, setting these to zero is equivalent to => zzip_dir_fdopen
563 zzip_dir_fdopen_ext_io(int fd, zzip_error_t * errcode_p,
564 zzip_strings_t* ext, const zzip_plugin_io_t io)
569 if ((dir = zzip_dir_alloc_ext_io (ext, io)) == NULL)
570 { rv = ZZIP_OUTOFMEM; goto error; }
573 if ((rv = __zzip_dir_parse (dir)))
576 dir->hdr = dir->hdr0;
577 dir->refcount |= 0x10000000;
579 if (errcode_p) *errcode_p = rv;
582 if (dir) zzip_dir_free(dir);
583 if (errcode_p) *errcode_p = rv;
588 __zzip_dir_parse (ZZIP_DIR* dir)
592 struct zzip_disk_trailer trailer;
593 /* if (! dir || dir->fd < 0)
594 * { rv = EINVAL; goto error; }
597 HINT2("------------------ fd=%i", (int) dir->fd);
598 if ((filesize = dir->io->fd.filesize(dir->fd)) < 0)
599 { rv = ZZIP_DIR_STAT; goto error; }
601 HINT2("------------------ filesize=%ld", (long) filesize);
602 if ((rv = __zzip_find_disk_trailer(dir->fd, filesize, &trailer,
606 HINT5("directory = { entries= %d/%d, size= %d, seek= %d } ",
607 ZZIP_GET16(trailer.z_entries), ZZIP_GET16(trailer.z_finalentries),
608 ZZIP_GET32(trailer.z_rootsize), ZZIP_GET32(trailer.z_rootseek));
610 if ( (rv = __zzip_parse_root_directory(dir->fd, &trailer, &dir->hdr0,
618 * will attach a .zip extension and tries to open it
619 * the with => open(2). This is a helper function for
620 * => zzip_dir_open, => zzip_opendir and => zzip_open.
623 __zzip_try_open(zzip_char_t* filename, int filemode,
624 zzip_strings_t* ext, zzip_plugin_io_t io)
626 auto char file[PATH_MAX];
628 zzip_size_t len = strlen (filename);
630 if (len+4 >= PATH_MAX) return -1;
631 memcpy(file, filename, len+1);
633 if (!io) io = zzip_get_default_io();
634 if (!ext) ext = zzip_get_default_ext();
636 for ( ; *ext ; ++ext)
638 strcpy (file+len, *ext);
639 fd = io->fd.open(file, filemode);
640 if (fd != -1) return fd;
646 * Opens the zip-archive (if available).
647 * the two ext_io arguments will default to use posix io and
648 * a set of default fileext that can atleast add .zip ext itself.
651 zzip_dir_open(zzip_char_t* filename, zzip_error_t* e)
653 return zzip_dir_open_ext_io (filename, e, 0, 0);
657 * this function uses explicit ext and io instead of the internal
658 * defaults. Setting these to zero is equivalent to => zzip_dir_open
661 zzip_dir_open_ext_io(zzip_char_t* filename, zzip_error_t* e,
662 zzip_strings_t* ext, zzip_plugin_io_t io)
666 if (!io) io = zzip_get_default_io();
667 if (!ext) ext = zzip_get_default_ext();
669 fd = io->fd.open(filename, O_RDONLY|O_BINARY);
671 { return zzip_dir_fdopen_ext_io(fd, e, ext, io); }
674 fd = __zzip_try_open(filename, O_RDONLY|O_BINARY, ext, io);
676 { return zzip_dir_fdopen_ext_io(fd, e, ext, io); }
679 if (e) { *e = ZZIP_DIR_OPEN; }
686 * fills the dirent-argument with the values and
687 * increments the read-pointer of the dir-argument.
689 * returns 0 if there no entry (anymore).
692 zzip_dir_read(ZZIP_DIR * dir, ZZIP_DIRENT * d )
694 if (! dir || ! dir->hdr || ! d) return 0;
696 d->d_compr = dir->hdr->d_compr;
697 d->d_csize = dir->hdr->d_csize;
698 d->st_size = dir->hdr->d_usize;
699 d->d_name = dir->hdr->d_name;
701 if (! dir->hdr->d_reclen)
704 { dir->hdr = (struct zzip_dir_hdr *)((char *)dir->hdr + dir->hdr->d_reclen); }
711 * c-file-style: "stroustrup"