2 * These routines are fully independent from the traditional zzip
3 * implementation. They assume a readonly seekable stdio handle
4 * representing a complete zip file. The functions show how to
5 * parse the structure, find files and return a decoded bytestream.
7 * These routines are a bit simple and really here for documenting
8 * the way to access a zip file. The complexity of zip access comes
9 * from staggered reading of bytes and reposition of a filepointer in
10 * a big archive with lots of files and long compressed datastreams.
11 * Plus varaints of drop-in stdio replacements, obfuscation routines,
12 * auto fileextensions, drop-in dirent replacements, and so on...
14 * btw, we can _not_ use fgetpos/fsetpos since an fpos_t has no asserted
15 * relation to a linear seek value as specified in zip info headers. In
16 * general it is not a problem if your system has no fseeko/ftello pair
17 * since we can fallback to fseek/ftell which limits the zip disk size
18 * to 2MiBs but the zip-storable seek values are 32bit limited anyway.
21 * Guido Draheim <guidod@gmx.de>
23 * Copyright (c) 2003,2004 Guido Draheim
24 * All rights reserved,
25 * use under the restrictions of the
26 * Lesser GNU General Public License
27 * or alternatively the restrictions
28 * of the Mozilla Public License 1.1
31 #define _LARGEFILE_SOURCE
33 #include <zzip/fseeko.h>
38 #ifdef ZZIP_HAVE_FNMATCH_H
42 #if defined ZZIP_HAVE_STRING_H
44 #elif defined ZZIP_HAVE_STRINGS_H
49 #include <zzip/format.h>
50 #include <zzip/fetch.h>
51 #include <zzip/__mmap.h>
53 #if __STDC_VERSION__+0 > 199900L
61 #ifndef ZZIP_HAVE_FSEEKO
66 /* note that the struct zzip_entry inherits the zzip_disk_entry values
67 * and usually carries a copy of its values (in disk format!). To make the
68 * following code more readable, we use a shorthand notation for the
69 * upcast needed in C (not needed in C++) as "disk_(entry)".
72 #define disk_(_entry_) _entry_
74 #define disk_(_entry_) (& (_entry_)->head)
78 struct zzip_entry : public struct zzip_disk_entry
80 char* _zzip_restrict tail;
81 zzip_off_t tailalloc; /* the allocated size of tail */
82 FILE* diskfile; /* a file reference */
83 zzip_off_t disksize; /* the size of the file */
84 zzip_off_t headseek; /* the offset within the file */
86 zzip_off_t zz_csize; /* items scanned from header */
87 zzip_off_t zz_offset; /* or zip64 extension block */
91 struct zzip_entry /* : struct zzip_disk_entry */
93 struct zzip_disk_entry head;
94 char* _zzip_restrict tail;
95 zzip_off_t tailalloc; /* the allocated size of tail */
96 FILE* diskfile; /* a file reference */
97 zzip_off_t disksize; /* the size of the file */
98 zzip_off_t headseek; /* the offset within the file */
100 zzip_off_t zz_csize; /* items scanned from header */
101 zzip_off_t zz_offset; /* or zip64 extension block */
106 /* we try to round all seeks to the pagesize - since we do not use
107 * the sys/mmap interface we have to guess a good value here: */
108 #define PAGESIZE 8192
110 /* ====================================================================== */
111 /* helper functions */
113 /** => zzip_entry_data_offset
114 * This functions read the correspoding struct zzip_file_header from
115 * the zip disk of the given "entry". The returned off_t points to the
116 * end of the file_header where the current fseek pointer has stopped.
117 * This is used to immediatly parse out any filename/extras block following
118 * the file_header. The return value is null on error.
121 zzip_entry_fread_file_header (ZZIP_ENTRY* entry,
122 struct zzip_file_header* file_header)
124 if (! entry || ! file_header) return 0;
125 ___ zzip_off_t offset = zzip_disk_entry_fileoffset (disk_(entry));
126 if (0 > offset || offset >= entry->disksize) return 0;
128 fseeko (entry->diskfile, offset, SEEK_SET);
129 return (fread (file_header, sizeof(*file_header), 1, entry->diskfile)
130 ? offset+sizeof(*file_header) : 0 ); ____;
133 /** helper functions for (fseeko) zip access api
135 * This functions returns the seekval offset of the data portion of the
136 * file referenced by the given zzip_entry. It requires an intermediate
137 * check of the file_header structure (i.e. it reads it from disk). After
138 * this call, the contained diskfile readposition is already set to the
139 * data_offset returned here. On error -1 is returned.
142 zzip_entry_data_offset(ZZIP_ENTRY* entry)
144 struct zzip_file_header file_header;
145 if (! entry) return -1;
146 ___ zzip_off_t offset =
147 zzip_entry_fread_file_header (entry, & file_header);
148 if (! offset) return -1;
149 offset += zzip_file_header_sizeof_tails (& file_header);
150 fseeko (entry->diskfile, offset, SEEK_SET);
154 /** => zzip_entry_data_offset
155 * This function is a big helper despite its little name: in a zip file the
156 * encoded filenames are usually NOT zero-terminated but for common usage
157 * with libc we need it that way. Secondly, the filename SHOULD be present
158 * in the zip central directory but if not then we fallback to the filename
159 * given in the file_header of each compressed data portion.
162 zzip_entry_strdup_name(ZZIP_ENTRY* entry)
164 if (! entry) return 0;
167 if ((len = zzip_disk_entry_namlen (disk_(entry)))) {
168 char* name = malloc (len+1);
169 if (! name) return 0;
170 memcpy (name, entry->tail, len);
174 ___ auto struct zzip_file_header header;
175 if (zzip_entry_fread_file_header (entry, &header)
176 && ( len = zzip_file_header_namlen(&header) )) {
177 char* name = malloc (len+1);
178 if (! name) return 0;
179 fread (name, 1, len, entry->diskfile);
188 prescan_entry(ZZIP_ENTRY* entry)
191 ___ zzip_off_t tailsize = zzip_disk_entry_sizeof_tails (disk_(entry));
192 if (tailsize+1 > entry->tailalloc) {
193 char* newtail = realloc (entry->tail, tailsize+1);
194 if (! newtail) return ENOMEM;
195 entry->tail = newtail;
196 entry->tailalloc = tailsize+1;
198 fread (entry->tail, 1, tailsize, entry->diskfile);
199 /* name + comment + extras */
204 prescan_clear(ZZIP_ENTRY* entry)
207 if (entry->tail) free (entry->tail);
208 entry->tail = 0; entry->tailalloc = 0;
211 /* ====================================================================== */
213 /** => zzip_entry_findfile
215 * This function is the first call of all the zip access functions here.
216 * It contains the code to find the first entry of the zip central directory.
217 * Here we require the stdio handle to represent a real zip file where the
218 * disk_trailer is _last_ in the file area, so that its position would be at
219 * a fixed offset from the end of the file area if not for the comment field
220 * allowed to be of variable length (which needs us to do a little search
221 * for the disk_tailer). However, in this simple implementation we disregard
222 * any disk_trailer info telling about multidisk archives, so we just return
223 * a pointer to the first entry in the zip central directory of that file.
225 * For an actual means, we are going to search backwards from the end
226 * of the mmaped block looking for the PK-magic signature of a
227 * disk_trailer. If we see one then we check the rootseek value to
228 * find the first disk_entry of the root central directory. If we find
229 * the correct PK-magic signature of a disk_entry over there then we
230 * assume we are done and we are going to return a pointer to that label.
232 * The return value is a pointer to the first zzip_disk_entry being checked
233 * to be within the bounds of the file area specified by the arguments. If
234 * no disk_trailer was found then null is returned, and likewise we only
235 * accept a disk_trailer with a seekvalue that points to a disk_entry and
236 * both parts have valid PK-magic parts. Beyond some sanity check we try to
237 * catch a common brokeness with zip archives that still allows us to find
238 * the start of the zip central directory.
240 ZZIP_ENTRY* _zzip_new
241 zzip_entry_findfirst(FILE* disk)
243 if (! disk) return 0;
244 fseeko (disk, 0, SEEK_END);
245 ___ zzip_off_t disksize = ftello (disk);
246 if (disksize < (zzip_off_t) sizeof(struct zzip_disk_trailer)) return 0;
247 /* we read out chunks of 8 KiB in the hope to match disk granularity */
248 ___ zzip_off_t pagesize = PAGESIZE; /* getpagesize() */
249 ___ ZZIP_ENTRY* entry = malloc (sizeof(*entry)); if (! entry) return 0;
250 ___ char* buffer = malloc (pagesize); if (! buffer) goto nomem;
252 assert (pagesize/2 > (zzip_off_t) sizeof (struct zzip_disk_trailer));
253 /* at each step, we will fread a pagesize block which overlaps with the
254 * previous read by means of pagesize/2 step at the end of the while(1) */
255 ___ zzip_off_t mapoffs = disksize &~ (pagesize-1);
256 ___ zzip_off_t mapsize = disksize - mapoffs;
257 if (mapoffs && mapsize < pagesize/2) {
258 mapoffs -= pagesize/2; mapsize += pagesize/2; }
260 fseeko (disk, mapoffs, SEEK_SET);
261 fread (buffer, 1, mapsize, disk);
262 ___ char* p = buffer + mapsize - sizeof(struct zzip_disk_trailer);
263 for (; p >= buffer ; p--)
265 zzip_off_t root; /* (struct zzip_disk_entry*) */
266 if (zzip_disk_trailer_check_magic(p)) {
267 root = zzip_disk_trailer_rootseek (
268 (struct zzip_disk_trailer*)p);
269 if (root > disksize - (long)sizeof(struct zzip_disk_trailer)) {
270 /* first disk_entry is after the disk_trailer? can't be! */
271 zzip_off_t rootsize = zzip_disk_trailer_rootsize (
272 (struct zzip_disk_trailer*)p);
273 if (rootsize > mapoffs) continue;
274 /* a common brokeness that can be fixed: we just assume the
275 * central directory was written directly before : */
276 root = mapoffs - rootsize;
278 } else if (zzip_disk64_trailer_check_magic(p)) {
279 if (sizeof(zzip_off_t) < 8) return 0;
280 root = zzip_disk64_trailer_rootseek (
281 (struct zzip_disk64_trailer*)p);
284 assert (0 <= root && root < mapsize);
285 fseeko (disk, root, SEEK_SET);
286 fread (disk_(entry), 1, sizeof(*disk_(entry)), disk);
287 if (zzip_disk_entry_check_magic(entry)) {
289 entry->headseek = root;
290 entry->diskfile = disk;
291 entry->disksize = disksize;
292 if (prescan_entry(entry)) goto nomem;
296 if (! mapoffs) break; assert (mapsize >= pagesize/2);
297 mapoffs -= pagesize/2; /* mapsize += pagesize/2; */
298 mapsize = pagesize; /* if (mapsize > pagesize) ... */
299 if (disksize - mapoffs > 64*1024) break;
303 free (entry); ____;____;____;____;____;____;
307 /** => zzip_entry_findfile
309 * This function takes an existing "entry" in the central root directory
310 * (e.g. from zzip_entry_findfirst) and moves it to point to the next entry.
311 * On error it returns 0, otherwise the old entry. If no further match is
312 * found then null is returned and the entry already free()d. If you want
313 * to stop searching for matches before that case then please call
314 * => zzip_entry_free on the cursor struct ZZIP_ENTRY.
316 ZZIP_ENTRY* _zzip_new
317 zzip_entry_findnext(ZZIP_ENTRY* _zzip_restrict entry)
319 if (! entry) return entry;
320 if (! zzip_disk_entry_check_magic (entry)) goto err;
321 ___ zzip_off_t seek =
322 entry->headseek + zzip_disk_entry_sizeto_end (disk_(entry));
323 if (seek + (zzip_off_t) sizeof(*disk_(entry)) > entry->disksize) goto err;
325 fseeko (entry->diskfile, seek, SEEK_SET);
326 fread (disk_(entry), 1, sizeof(*disk_(entry)), entry->diskfile);
327 entry->headseek = seek;
328 if (! zzip_disk_entry_check_magic (entry)) goto err;
329 if (prescan_entry(entry)) goto err;
332 zzip_entry_free (entry);
336 /** => zzip_entry_findfile
337 * this function releases the malloc()ed areas needed for zzip_entry, the
338 * pointer is invalid afterwards. This function has #define synonyms of
339 * zzip_entry_findlast(), zzip_entry_findlastfile(), zzip_entry_findlastmatch()
342 zzip_entry_free(ZZIP_ENTRY* entry)
344 if (! entry) return 0;
345 prescan_clear (entry);
350 /** search for files in the (fseeko) zip central directory
352 * This function is given a filename as an additional argument, to find the
353 * disk_entry matching a given filename. The compare-function is usually
354 * strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used.
355 * - use null as argument for "old"-entry when searching the first
356 * matching entry, otherwise the last returned value if you look for other
357 * entries with a special "compare" function (if null then a doubled search
358 * is rather useless with this variant of _findfile). If no further entry is
359 * found then null is returned and any "old"-entry gets already free()d.
361 ZZIP_ENTRY* _zzip_new
362 zzip_entry_findfile(FILE* disk, char* filename,
363 ZZIP_ENTRY* _zzip_restrict entry,
364 zzip_strcmp_fn_t compare)
366 if (! filename || ! disk) return 0;
367 entry = ( ! entry ) ? zzip_entry_findfirst (disk)
368 : zzip_entry_findnext (entry);
369 if (! compare) compare = (zzip_strcmp_fn_t)(strcmp);
371 for (; entry ; entry = zzip_entry_findnext (entry))
372 { /* filenames within zip files are often not null-terminated! */
373 char* realname = zzip_entry_strdup_name (entry);
374 if (! realname) continue;
375 if (! compare (filename, realname)) {
376 free (realname); return entry;
378 free (realname); continue;
384 #ifdef ZZIP_HAVE_FNMATCH_H
385 #define _zzip_fnmatch fnmatch
387 # define _zzip_fnmatch_CASEFOLD FNM_CASEFOLD
389 # define _zzip_fnmatch_CASEFOLD 0
392 # define _zzip_fnmatch_CASEFOLD 0
393 /* if your system does not have fnmatch, we fall back to strcmp: */
394 static int _zzip_fnmatch(char* pattern, char* string, int flags)
396 puts ("<zzip:strcmp>");
397 return strcmp (pattern, string);
401 /** => zzip_entry_findfile
403 * This function uses a compare-function with an additional argument
404 * and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e.
405 * the argument filespec first and the ziplocal filename second with
406 * the integer-flags put in as third to the indirect call. If the
407 * platform has fnmatch available then null-compare will use that one
408 * and otherwise we fall back to mere strcmp, so if you need fnmatch
409 * searching then please provide an implementation somewhere else.
410 * - use null as argument for "after"-entry when searching the first
411 * matching entry, or the last disk_entry return-value to find the
412 * next entry matching the given filespec. If no further entry is
413 * found then null is returned and any "old"-entry gets already free()d.
415 ZZIP_ENTRY* _zzip_new
416 zzip_entry_findmatch(FILE* disk, char* filespec,
417 ZZIP_ENTRY* _zzip_restrict entry,
418 zzip_fnmatch_fn_t compare, int flags)
420 if (! filespec || ! disk) return 0;
421 entry = ( ! entry ) ? zzip_entry_findfirst (disk)
422 : zzip_entry_findnext (entry);
423 if (! compare) compare = (zzip_fnmatch_fn_t) _zzip_fnmatch;
425 for (; entry ; entry = zzip_entry_findnext (entry))
426 { /* filenames within zip files are often not null-terminated! */
427 char* realname = zzip_entry_strdup_name (entry);
428 if (! realname) continue;
429 if (! compare (filespec, realname, flags))
430 free (realname); return entry;
432 free (realname); continue;
438 /* ====================================================================== */
441 * typedef struct zzip_disk_file ZZIP_ENTRY_FILE;
443 struct zzip_entry_file /* : zzip_file_header */
445 struct zzip_file_header header; /* fopen detected header */
446 ZZIP_ENTRY* entry; /* fopen entry */
447 zzip_off_t data; /* for stored blocks */
448 zzip_size_t avail; /* memorized for checks on EOF */
449 zzip_size_t compressed; /* compressed flag and datasize */
450 zzip_size_t dataoff; /* offset from data start */
451 z_stream zlib; /* for inflated blocks */
452 char buffer[PAGESIZE]; /* work buffer for inflate algorithm */
455 /** open a file within a zip disk for reading
457 * This function does take an "entry" argument and copies it (or just takes
458 * it over as owner) to a new ZZIP_ENTRY_FILE handle structure. That
459 * structure contains also a zlib buffer for decoding. This function does
460 * seek to the file_header of the given "entry" and validates it for the
461 * data buffer following it. We do also prefetch some data from the data
462 * buffer thereby trying to match the disk pagesize for faster access later.
463 * The => zzip_entry_fread will then read in chunks of pagesizes which is
464 * the size of the internal readahead buffer. If an error occurs then null
467 ZZIP_ENTRY_FILE* _zzip_new
468 zzip_entry_fopen (ZZIP_ENTRY* entry, int takeover)
470 if (! entry) return 0;
472 ZZIP_ENTRY* found = malloc (sizeof(*entry));
473 if (! found) return 0;
474 memcpy (found, entry, sizeof(*entry)); /* prescan_copy */
475 found->tail = malloc (found->tailalloc);
476 if (! found->tail) { free (found); return 0; }
477 memcpy (found->tail, entry->tail, entry->tailalloc);
480 ___ ZZIP_ENTRY_FILE* file = malloc(sizeof(*file));
481 if (! file) goto fail1;
483 if (! zzip_entry_fread_file_header (entry, &file->header))
485 file->avail = zzip_file_header_usize (&file->header);
486 file->data = zzip_entry_data_offset (entry);
489 if (! file->avail || zzip_file_header_data_stored (&file->header))
490 { file->compressed = 0; return file; }
492 file->compressed = zzip_file_header_csize (&file->header);
493 file->zlib.opaque = 0;
494 file->zlib.zalloc = Z_NULL;
495 file->zlib.zfree = Z_NULL;
497 ___ zzip_off_t seek = file->data;
498 seek += sizeof(file->buffer); seek -= seek & (sizeof(file->buffer)-1);
499 assert (file->data < seek); /* pre-read to next PAGESIZE boundary... */
500 fseeko (file->entry->diskfile, file->data + file->dataoff, SEEK_SET);
501 file->zlib.next_in = file->buffer;
502 file->zlib.avail_in = fread (file->buffer, 1, seek - file->data,
503 file->entry->diskfile);
504 file->dataoff += file->zlib.avail_in; ____;
506 if (! zzip_file_header_data_deflated (&file->header)
507 || inflateInit2 (& file->zlib, -MAX_WBITS) != Z_OK) goto fail2;
513 zzip_entry_free (entry);
517 /** => zzip_entry_fopen
519 * This function opens a file found by name, so it does a search into
520 * the zip central directory with => zzip_entry_findfile and whatever
521 * is found first is given to => zzip_entry_fopen
523 ZZIP_ENTRY_FILE* _zzip_new
524 zzip_entry_ffile (FILE* disk, char* filename)
526 ZZIP_ENTRY* entry = zzip_entry_findfile (disk, filename, 0, 0);
527 if (! entry) return 0;
528 return zzip_entry_fopen (entry, 1);
532 /** => zzip_entry_fopen
534 * This function reads more bytes into the output buffer specified as
535 * arguments. The return value is null on eof or error, the stdio-like
536 * interface can not distinguish between these so you need to check
537 * with => zzip_entry_feof for the difference.
540 zzip_entry_fread (void* ptr, zzip_size_t sized, zzip_size_t nmemb,
541 ZZIP_ENTRY_FILE* file)
543 if (! file) return 0;
544 ___ zzip_size_t size = sized*nmemb;
545 if (! file->compressed) {
546 if (size > file->avail) size = file->avail;
547 fread (ptr, 1, size, file->entry->diskfile);
548 file->dataoff += size;
553 file->zlib.avail_out = size;
554 file->zlib.next_out = ptr;
555 ___ zzip_size_t total_old = file->zlib.total_out;
557 if (! file->zlib.avail_in) {
558 size = file->compressed - file->dataoff;
559 if (size > sizeof(file->buffer)) size = sizeof(file->buffer);
560 /* fseek (file->data + file->dataoff, file->entry->diskfile); */
561 file->zlib.avail_in = fread (file->buffer, 1, size,
562 file->entry->diskfile);
563 file->zlib.next_in = file->buffer;
564 file->dataoff += file->zlib.avail_in;
566 if (! file->zlib.avail_in) return 0;
568 ___ int err = inflate (& file->zlib, Z_NO_FLUSH);
569 if (err == Z_STREAM_END)
571 else if (err == Z_OK)
572 file->avail -= file->zlib.total_out - total_old;
576 if (file->zlib.avail_out && ! file->zlib.avail_in) continue;
577 return file->zlib.total_out - total_old;
581 /** => zzip_entry_fopen
582 * This function releases any zlib decoder info needed for decompression
583 * and dumps the ZZIP_ENTRY_FILE struct then.
586 zzip_entry_fclose (ZZIP_ENTRY_FILE* file)
588 if (! file) return 0;
589 if (file->compressed)
590 inflateEnd (& file->zlib);
591 zzip_entry_free (file->entry);
596 /** => zzip_entry_fopen
598 * This function allows to distinguish an error from an eof condition.
599 * Actually, if we found an error but we did already reach eof then we
600 * just keep on saying that it was an eof, so the app can just continue.
603 zzip_entry_feof (ZZIP_ENTRY_FILE* file)
605 return ! file || ! file->avail;