3 * NOTE: this is part of libzzipfseeko (i.e. it is not libzzip).
6 * These routines are fully independent from the traditional zzip
7 * implementation. They assume a readonly seekable stdio handle
8 * representing a complete zip file. The functions show how to
9 * parse the structure, find files and return a decoded bytestream.
11 * These routines are a bit simple and really here for documenting
12 * the way to access a zip file. The complexity of zip access comes
13 * from staggered reading of bytes and reposition of a filepointer in
14 * a big archive with lots of files and long compressed datastreams.
15 * Plus varaints of drop-in stdio replacements, obfuscation routines,
16 * auto fileextensions, drop-in dirent replacements, and so on...
18 * btw, we can _not_ use fgetpos/fsetpos since an fpos_t has no asserted
19 * relation to a linear seek value as specified in zip info headers. In
20 * general it is not a problem if your system has no fseeko/ftello pair
21 * since we can fallback to fseek/ftell which limits the zip disk size
22 * to 2MiBs but the zip-storable seek values are 32bit limited anyway.
25 * Guido Draheim <guidod@gmx.de>
27 * Copyright (c) 2003,2004 Guido Draheim
28 * All rights reserved,
29 * use under the restrictions of the
30 * Lesser GNU General Public License
31 * or alternatively the restrictions
32 * of the Mozilla Public License 1.1
35 #define _LARGEFILE_SOURCE 1
36 #define _ZZIP_ENTRY_STRUCT 1
38 #include <zzip/fseeko.h>
40 #include <zzip/fetch.h>
41 #include <zzip/__mmap.h>
42 #include <zzip/__fnmatch.h>
47 #if defined ZZIP_HAVE_STRING_H
49 #elif defined ZZIP_HAVE_STRINGS_H
53 #if defined ZZIP_HAVE_STDINT_H
57 #if __STDC_VERSION__+0 > 199900L
65 #ifndef ZZIP_HAVE_FSEEKO
70 /* note that the struct zzip_entry inherits the zzip_disk_entry values
71 * and usually carries a copy of its values (in disk format!). To make the
72 * following code more readable, we use a shorthand notation for the
73 * upcast needed in C (not needed in C++) as "disk_(entry)".
75 #ifdef __zzip_entry_extends_zzip_disk_entry
76 #define disk_(_entry_) _entry_
78 #define disk_(_entry_) (& (_entry_)->head)
81 /* we try to round all seeks to the pagesize - since we do not use
82 * the sys/mmap interface we have to guess a good value here: */
85 /* ====================================================================== */
87 /* helper functions */
89 /** => zzip_entry_data_offset
90 * This functions read the correspoding struct zzip_file_header from
91 * the zip disk of the given "entry". The returned off_t points to the
92 * end of the file_header where the current fseek pointer has stopped.
93 * This is used to immediatly parse out any filename/extras block following
94 * the file_header. The return value is null on error.
97 zzip_entry_fread_file_header(ZZIP_ENTRY * entry,
98 struct zzip_file_header *file_header)
100 if (! entry || ! file_header)
102 ___ zzip_off_t offset = zzip_disk_entry_fileoffset(disk_(entry));
103 if (0 > offset || offset >= entry->disksize)
106 if (fseeko(entry->diskfile, offset, SEEK_SET) == -1) return 0;
107 return (fread(file_header, sizeof(*file_header), 1, entry->diskfile)
108 ? offset + sizeof(*file_header) : 0);
112 /** helper functions for (fseeko) zip access api
114 * This functions returns the seekval offset of the data portion of the
115 * file referenced by the given zzip_entry. It requires an intermediate
116 * check of the file_header structure (i.e. it reads it from disk). After
117 * this call, the contained diskfile readposition is already set to the
118 * data_offset returned here. On error -1 is returned.
121 zzip_entry_data_offset(ZZIP_ENTRY * entry)
123 struct zzip_file_header file_header;
126 ___ zzip_off_t offset = zzip_entry_fread_file_header(entry, &file_header);
129 offset += zzip_file_header_sizeof_tails(&file_header);
130 if (fseeko(entry->diskfile, offset, SEEK_SET) == -1)
136 /** => zzip_entry_data_offset
137 * This function is a big helper despite its little name: in a zip file the
138 * encoded filenames are usually NOT zero-terminated but for common usage
139 * with libc we need it that way. Secondly, the filename SHOULD be present
140 * in the zip central directory but if not then we fallback to the filename
141 * given in the file_header of each compressed data portion.
143 * returns: new string buffer, or null on error (errno = EINVAL|ENOMEM|EBADMSG)
146 zzip_entry_strdup_name(ZZIP_ENTRY * entry)
155 if ((len = zzip_disk_entry_namlen(disk_(entry))))
157 char *name = malloc(len + 1);
160 memcpy(name, entry->tail, len);
164 ___ auto struct zzip_file_header header;
165 if (zzip_entry_fread_file_header(entry, &header)
166 && (len = zzip_file_header_namlen(&header)))
168 char *name = malloc(len + 1);
170 return 0; /* ENOMEM */
172 zzip_size_t n = fread(name, 1, len, entry->diskfile);
189 prescan_entry(ZZIP_ENTRY * entry)
193 ___ zzip_off_t tailsize = zzip_disk_entry_sizeof_tails(disk_(entry));
194 if (tailsize + 1 > entry->tailalloc)
196 char *newtail = realloc(entry->tail, tailsize + 1);
199 entry->tail = newtail;
200 entry->tailalloc = tailsize + 1;
202 # ifdef SIZE_MAX /* from stdint.h */
203 if (tailsize > (zzip_off_t)(SIZE_MAX)) { return EFBIG; }
205 ___ zzip_size_t readsize = fread(entry->tail, 1, tailsize, entry->diskfile);
206 /* name + comment + extras */
207 if ((zzip_off_t)readsize != tailsize) {
215 prescan_clear(ZZIP_ENTRY * entry)
222 entry->tailalloc = 0;
225 /* ====================================================================== */
227 /** => zzip_entry_findfile
229 * This function is the first call of all the zip access functions here.
230 * It contains the code to find the first entry of the zip central directory.
231 * Here we require the stdio handle to represent a real zip file where the
232 * disk_trailer is _last_ in the file area, so that its position would be at
233 * a fixed offset from the end of the file area if not for the comment field
234 * allowed to be of variable length (which needs us to do a little search
235 * for the disk_tailer). However, in this simple implementation we disregard
236 * any disk_trailer info telling about multidisk archives, so we just return
237 * a pointer to the first entry in the zip central directory of that file.
239 * For an actual means, we are going to search backwards from the end
240 * of the mmaped block looking for the PK-magic signature of a
241 * disk_trailer. If we see one then we check the rootseek value to
242 * find the first disk_entry of the root central directory. If we find
243 * the correct PK-magic signature of a disk_entry over there then we
244 * assume we are done and we are going to return a pointer to that label.
246 * The return value is a pointer to the first zzip_disk_entry being checked
247 * to be within the bounds of the file area specified by the arguments. If
248 * no disk_trailer was found then null is returned, and likewise we only
249 * accept a disk_trailer with a seekvalue that points to a disk_entry and
250 * both parts have valid PK-magic parts. Beyond some sanity check we try to
251 * catch a common brokeness with zip archives that still allows us to find
252 * the start of the zip central directory.
254 zzip__new__ ZZIP_ENTRY *
255 zzip_entry_findfirst(FILE * disk)
259 if (fseeko(disk, 0, SEEK_END) == -1)
261 ___ zzip_off_t disksize = ftello(disk);
262 if (disksize < (zzip_off_t) sizeof(struct zzip_disk_trailer))
264 /* we read out chunks of 8 KiB in the hope to match disk granularity */
265 ___ zzip_off_t pagesize = PAGESIZE; /* getpagesize() */
266 ___ ZZIP_ENTRY *entry = malloc(sizeof(*entry));
269 ___ unsigned char *buffer = malloc(pagesize);
273 if (pagesize / 2 <= (zzip_off_t) sizeof(struct zzip_disk_trailer));
275 /* at each step, we will fread a pagesize block which overlaps with the
276 * previous read by means of pagesize/2 step at the end of the while(1) */
277 ___ zzip_off_t mapoffs = disksize & ~(pagesize - 1);
278 ___ zzip_off_t mapsize = disksize - mapoffs;
279 if (mapoffs && mapsize < pagesize / 2)
281 mapoffs -= pagesize / 2;
282 mapsize += pagesize / 2;
284 if (mapsize >= 3*8192)
288 if (fseeko(disk, mapoffs, SEEK_SET) == -1)
290 if (fread(buffer, 1, mapsize, disk) != (zzip_size_t)mapsize)
292 ___ unsigned char *p =
293 buffer + mapsize - sizeof(struct zzip_disk_trailer);
294 for (; p >= buffer; p--)
296 zzip_off_t root; /* (struct zzip_disk_entry*) */
297 if (zzip_disk_trailer_check_magic(p))
299 root = zzip_disk_trailer_rootseek((struct zzip_disk_trailer *)
301 if (root > disksize - (long) sizeof(struct zzip_disk_trailer))
303 /* first disk_entry is after the disk_trailer? can't be! */
304 struct zzip_disk_trailer *trailer =
305 (struct zzip_disk_trailer *) p;
306 zzip_off_t rootsize = zzip_disk_trailer_rootsize(trailer);
307 if (rootsize > mapoffs)
309 /* a common brokeness that can be fixed: we just assume the
310 * central directory was written directly before : */
311 root = mapoffs - rootsize;
313 } else if (zzip_disk64_trailer_check_magic(p))
315 struct zzip_disk64_trailer *trailer =
316 (struct zzip_disk64_trailer *) p;
317 if (sizeof(zzip_off_t) < 8)
319 root = zzip_disk64_trailer_rootseek(trailer);
323 if (!(0 <= root && root < mapsize))
325 if (fseeko(disk, root, SEEK_SET) == -1)
327 if (fread(disk_(entry), 1, sizeof(*disk_(entry)), disk)
328 != sizeof(*disk_(entry))) goto error;
329 if (zzip_disk_entry_check_magic(entry))
332 entry->headseek = root;
333 entry->diskfile = disk;
334 entry->disksize = disksize;
335 if (prescan_entry(entry))
343 if (mapsize < pagesize / 2)
345 mapoffs -= pagesize / 2; /* mapsize += pagesize/2; */
346 mapsize = pagesize; /* if (mapsize > pagesize) ... */
347 if (disksize - mapoffs > 64 * 1024)
363 /** => zzip_entry_findfile
365 * This function takes an existing "entry" in the central root directory
366 * (e.g. from zzip_entry_findfirst) and moves it to point to the next entry.
367 * On error it returns 0, otherwise the old entry. If no further match is
368 * found then null is returned and the entry already free()d. If you want
369 * to stop searching for matches before that case then please call
370 * => zzip_entry_free on the cursor struct ZZIP_ENTRY.
372 zzip__new__ ZZIP_ENTRY *
373 zzip_entry_findnext(ZZIP_ENTRY * _zzip_restrict entry)
377 if (! zzip_disk_entry_check_magic(entry))
379 ___ zzip_off_t seek =
380 entry->headseek + zzip_disk_entry_sizeto_end(disk_(entry));
381 if (seek + (zzip_off_t) sizeof(*disk_(entry)) > entry->disksize)
384 if (fseeko(entry->diskfile, seek, SEEK_SET) == -1)
386 if (fread(disk_(entry), 1, sizeof(*disk_(entry)), entry->diskfile)
387 != sizeof(*disk_(entry))) goto err;
388 entry->headseek = seek;
389 if (! zzip_disk_entry_check_magic(entry))
391 if (prescan_entry(entry))
395 zzip_entry_free(entry);
400 /** => zzip_entry_findfile
401 * this function releases the malloc()ed areas needed for zzip_entry, the
402 * pointer is invalid afterwards. This function has #define synonyms of
403 * zzip_entry_findlast(), zzip_entry_findlastfile(), zzip_entry_findlastmatch()
406 zzip_entry_free(ZZIP_ENTRY * entry)
410 prescan_clear(entry);
415 /** search for files in the (fseeko) zip central directory
417 * This function is given a filename as an additional argument, to find the
418 * disk_entry matching a given filename. The compare-function is usually
419 * strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used.
420 * - use null as argument for "old"-entry when searching the first
421 * matching entry, otherwise the last returned value if you look for other
422 * entries with a special "compare" function (if null then a doubled search
423 * is rather useless with this variant of _findfile). If no further entry is
424 * found then null is returned and any "old"-entry gets already free()d.
426 zzip__new__ ZZIP_ENTRY *
427 zzip_entry_findfile(FILE * disk, char *filename,
428 ZZIP_ENTRY * _zzip_restrict entry, zzip_strcmp_fn_t compare)
430 if (! filename || ! disk)
436 entry = zzip_entry_findfirst(disk);
438 entry = zzip_entry_findnext(entry);
441 compare = (zzip_strcmp_fn_t) (strcmp);
443 for (; entry; entry = zzip_entry_findnext(entry))
445 /* filenames within zip files are often not null-terminated! */
446 char *realname = zzip_entry_strdup_name(entry);
449 return 0; /* errno = ENOMEM|EBADMSG */
451 if (! compare(filename, realname))
465 /** => zzip_entry_findfile
467 * This function uses a compare-function with an additional argument
468 * and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e.
469 * the argument filespec first and the ziplocal filename second with
470 * the integer-flags put in as third to the indirect call. If the
471 * platform has fnmatch available then null-compare will use that one
472 * and otherwise we fall back to mere strcmp, so if you need fnmatch
473 * searching then please provide an implementation somewhere else.
474 * - use null as argument for "after"-entry when searching the first
475 * matching entry, or the last disk_entry return-value to find the
476 * next entry matching the given filespec. If no further entry is
477 * found then null is returned and any "old"-entry gets already free()d.
479 zzip__new__ ZZIP_ENTRY *
480 zzip_entry_findmatch(FILE * disk, char *filespec,
481 ZZIP_ENTRY * _zzip_restrict entry,
482 zzip_fnmatch_fn_t compare, int flags)
484 if (! filespec || ! disk)
490 entry = zzip_entry_findfirst(disk);
492 entry = zzip_entry_findnext(entry);
495 compare = (zzip_fnmatch_fn_t) _zzip_fnmatch;
497 for (; entry; entry = zzip_entry_findnext(entry))
499 /* filenames within zip files are often not null-terminated! */
500 char *realname = zzip_entry_strdup_name(entry);
503 return 0; /* ENOMEM|EBADMSG */
505 if (! compare(filespec, realname, flags))
519 /* ====================================================================== */
522 * typedef struct zzip_disk_file ZZIP_ENTRY_FILE;
524 struct zzip_entry_file /* : zzip_file_header */
526 struct zzip_file_header header; /* fopen detected header */
527 ZZIP_ENTRY *entry; /* fopen entry */
528 zzip_off_t data; /* for stored blocks */
529 zzip_size_t avail; /* memorized for checks on EOF */
530 zzip_size_t compressed; /* compressed flag and datasize */
531 zzip_size_t dataoff; /* offset from data start */
532 z_stream zlib; /* for inflated blocks */
533 unsigned char buffer[PAGESIZE]; /* work buffer for inflate algorithm */
536 /** open a file within a zip disk for reading
538 * This function does take an "entry" argument and copies it (or just takes
539 * it over as owner) to a new ZZIP_ENTRY_FILE handle structure. That
540 * structure contains also a zlib buffer for decoding. This function does
541 * seek to the file_header of the given "entry" and validates it for the
542 * data buffer following it. We do also prefetch some data from the data
543 * buffer thereby trying to match the disk pagesize for faster access later.
544 * The => zzip_entry_fread will then read in chunks of pagesizes which is
545 * the size of the internal readahead buffer. If an error occurs then null
548 zzip__new__ ZZIP_ENTRY_FILE *
549 zzip_entry_fopen(ZZIP_ENTRY * entry, int takeover)
555 ZZIP_ENTRY *found = malloc(sizeof(*entry));
558 memcpy(found, entry, sizeof(*entry)); /* prescan_copy */
559 found->tail = malloc(found->tailalloc);
561 { free (found); return 0; }
562 memcpy(found->tail, entry->tail, entry->tailalloc);
565 ___ ZZIP_ENTRY_FILE *file = malloc(sizeof(*file));
569 if (! zzip_entry_fread_file_header(entry, &file->header))
571 file->avail = zzip_file_header_usize(&file->header);
572 file->data = zzip_entry_data_offset(entry);
575 if (! file->avail || zzip_file_header_data_stored(&file->header))
576 { file->compressed = 0; return file; }
578 file->compressed = zzip_file_header_csize(&file->header);
579 file->zlib.opaque = 0;
580 file->zlib.zalloc = Z_NULL;
581 file->zlib.zfree = Z_NULL;
583 ___ zzip_off_t seek = file->data;
584 seek += sizeof(file->buffer);
585 seek -= seek & (sizeof(file->buffer) - 1);
586 if (file->data >= seek) /* pre-read to next PAGESIZE boundary... */
588 if (fseeko(file->entry->diskfile, file->data + file->dataoff, SEEK_SET) == -1)
590 file->zlib.next_in = file->buffer;
591 file->zlib.avail_in = fread(file->buffer, 1, seek - file->data,
592 file->entry->diskfile);
593 file->dataoff += file->zlib.avail_in;
596 if (! zzip_file_header_data_deflated(&file->header)
597 || inflateInit2(&file->zlib, -MAX_WBITS) != Z_OK)
604 zzip_entry_free(entry);
609 /** => zzip_entry_fopen
611 * This function opens a file found by name, so it does a search into
612 * the zip central directory with => zzip_entry_findfile and whatever
613 * is found first is given to => zzip_entry_fopen
615 zzip__new__ ZZIP_ENTRY_FILE *
616 zzip_entry_ffile(FILE * disk, char *filename)
618 ZZIP_ENTRY *entry = zzip_entry_findfile(disk, filename, 0, 0);
621 return zzip_entry_fopen(entry, 1);
625 /** => zzip_entry_fopen
627 * This function reads more bytes into the output buffer specified as
628 * arguments. The return value is null on eof or error, the stdio-like
629 * interface can not distinguish between these so you need to check
630 * with => zzip_entry_feof for the difference.
633 zzip_entry_fread(void *ptr, zzip_size_t sized, zzip_size_t nmemb,
634 ZZIP_ENTRY_FILE * file)
638 ___ zzip_size_t size = sized * nmemb;
639 if (! file->compressed)
641 if (size > file->avail)
643 if (fread(ptr, 1, size, file->entry->diskfile) != size) return 0;
644 file->dataoff += size;
649 file->zlib.avail_out = size;
650 file->zlib.next_out = ptr;
651 ___ zzip_size_t total_old = file->zlib.total_out;
654 if (! file->zlib.avail_in)
656 size = file->compressed - file->dataoff;
657 if (size > sizeof(file->buffer))
658 size = sizeof(file->buffer);
659 /* fseek (file->data + file->dataoff, file->entry->diskfile); */
660 file->zlib.avail_in = fread(file->buffer, 1, size,
661 file->entry->diskfile);
662 file->zlib.next_in = file->buffer;
663 file->dataoff += file->zlib.avail_in;
665 if (! file->zlib.avail_in)
668 ___ int err = inflate(&file->zlib, Z_NO_FLUSH);
669 if (err == Z_STREAM_END)
671 else if (err == Z_OK)
672 file->avail -= file->zlib.total_out - total_old;
676 if (file->zlib.avail_out && ! file->zlib.avail_in)
678 return file->zlib.total_out - total_old;
684 /** => zzip_entry_fopen
685 * This function releases any zlib decoder info needed for decompression
686 * and dumps the ZZIP_ENTRY_FILE struct then.
689 zzip_entry_fclose(ZZIP_ENTRY_FILE * file)
693 if (file->compressed)
694 inflateEnd(&file->zlib);
695 zzip_entry_free(file->entry);
700 /** => zzip_entry_fopen
702 * This function allows to distinguish an error from an eof condition.
703 * Actually, if we found an error but we did already reach eof then we
704 * just keep on saying that it was an eof, so the app can just continue.
707 zzip_entry_feof(ZZIP_ENTRY_FILE * file)
709 return ! file || ! file->avail;