2 * These routines are fully independent from the traditional zzip
3 * implementation. They assume a readonly mmapped sharedmem block
4 * representing a complete zip file. The functions show how to
5 * parse the structure, find files and return a decoded bytestream.
7 * These routines are a bit simple and really here for documenting
8 * the way to access a zip file. The complexity of zip access comes
9 * from staggered reading of bytes and reposition of a filepointer in
10 * a big archive with lots of files and long compressed datastreams.
11 * Plus varaints of drop-in stdio replacements, obfuscation routines,
12 * auto fileextensions, drop-in dirent replacements, and so on...
15 * Guido Draheim <guidod@gmx.de>
17 * Copyright (c) 2003,2004 Guido Draheim
18 * All rights reserved,
19 * use under the restrictions of the
20 * Lesser GNU General Public License
21 * or alternatively the restrictions
22 * of the Mozilla Public License 1.1
25 #include <zzip/mmapped.h>
29 #ifdef ZZIP_HAVE_FNMATCH_H
33 #if defined ZZIP_HAVE_UNISTD_H
35 #elif defined ZZIP_HAVE_IO_H
39 #if defined ZZIP_HAVE_STRING_H
41 #elif defined ZZIP_HAVE_STRINGS_H
46 #include <zzip/format.h>
47 #include <zzip/fetch.h>
48 #include <zzip/__mmap.h>
50 #if __STDC_VERSION__+0 > 199900L
59 * This function does primary initialization of a disk-buffer struct.
62 zzip_disk_init(ZZIP_DISK* disk, char* buffer, zzip_size_t buflen)
64 disk->buffer = buffer;
65 disk->endbuf = buffer+buflen;
69 /* do not touch disk->user */
70 /* do not touch disk->code */
75 * This function allocates a new disk-buffer with => malloc(3)
77 ZZIP_DISK* _zzip_restrict
80 ZZIP_DISK* disk = malloc(sizeof(disk));
81 if (! disk) return disk;
82 zzip_disk_init (disk, 0, 0);
86 /** turn a filehandle into a mmapped zip disk archive handle
88 * This function uses the given file-descriptor to detect the length of the
89 * file and calls the system => mmap(2) to put it in main memory. If it is
90 * successful then a newly allocated ZZIP_DISK* is returned with
91 * disk->buffer pointing to the mapview of the zipdisk content.
93 ZZIP_DISK* _zzip_restrict
94 zzip_disk_mmap(int fd)
97 if (fstat (fd, &st) || !st.st_size) return 0;
98 ___ ZZIP_DISK* disk = zzip_disk_new (); if (! disk) return 0;
99 disk->buffer = _zzip_mmap (& zzip->mapped, fd, 0, st.st_size);
100 if (disk->buffer == MAP_FAILED) { free (disk); return 0; }
101 disk->endbuf = disk->buffer + st.st_size;
105 /** => zzip_disk_mmap
106 * This function is the inverse of => zzip_disk_mmap and using the system
107 * munmap(2) on the buffer area and => free(3) on the ZZIP_DISK structure.
110 zzip_disk_munmap(ZZIP_DISK* disk)
112 if (! disk) return 0;
113 _zzip_munmap (disk->mapped, disk->buffer, disk->endbuf-disk->buffer);
118 /** => zzip_disk_mmap
120 * This function opens the given archive by name and turn the filehandle
121 * to => zzip_disk_mmap for bringing it to main memory. If it can not
122 * be => mmap(2)'ed then we slurp the whole file into a newly => malloc(2)'ed
123 * memory block. Only if that fails too then we return null. Since handling
124 * of disk->buffer is ambigous it should not be snatched away please.
126 ZZIP_DISK* _zzip_restrict
127 zzip_disk_open(char* filename)
133 if (stat (filename, &st) || !st.st_size) return 0;
134 ___ int fd = open (filename, O_RDONLY|O_BINARY);
135 if (fd <= 0) return 0;
136 ___ ZZIP_DISK* disk = zzip_disk_mmap (fd);
137 if (disk) return disk;
138 ___ char* buffer = malloc (st.st_size);
139 if (! buffer) return 0;
140 if ((st.st_size == read (fd, buffer, st.st_size)) &&
141 (disk = zzip_disk_new ()))
143 disk->buffer = buffer;
144 disk->endbuf = buffer+st.st_size;
147 return disk; ____;____;____;
150 /** => zzip_disk_mmap
152 * This function will release all data needed to access a (mmapped)
153 * zip archive, including any malloc()ed blocks, sharedmem mappings
154 * and it dumps the handle struct as well.
157 zzip_disk_close(ZZIP_DISK* disk)
159 if (! disk) return 0;
160 if (disk->mapped != -1) return zzip_disk_munmap (disk);
166 /* ====================================================================== */
167 /* helper functions */
169 #ifdef ZZIP_HAVE_STRNDUP
170 #define _zzip_strndup strndup
172 /* if your system does not have strndup: */
173 static char* _zzip_restrict _zzip_strndup(char* p, int maxlen)
176 ___ char* r = malloc (maxlen+1);
178 strncpy (r, p, maxlen);
184 #if defined ZZIP_HAVE_STRCASECMP || defined strcasecmp
185 #define _zzip_strcasecmp strcasecmp
187 /* if your system does not have strcasecmp: */
188 static int _zzip_strcasecmp(char* __zzip_restrict a, char* _zzip_restrict b)
190 if (! a) return (b) ? 1 : 0;
194 int v = tolower(*a) - tolower(*b);
203 /** helper functions for (mmapped) zip access api
205 * This function augments the other zzip_disk_entry_* helpers: here we move
206 * a disk_entry pointer (as returned by _find* functions) into a pointer to
207 * the data block right after the file_header. Only disk->buffer would be
208 * needed to perform the seek but we check the mmapped range end as well.
211 zzip_disk_entry_to_data(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
213 struct zzip_file_header* file =
214 zzip_disk_entry_to_file_header(disk, entry);
215 if (file) return zzip_file_header_to_data (file);
219 /** => zzip_disk_entry_to_data
220 * This function does half the job of => zzip_disk_entry_to_data where it
221 * can augment with => zzip_file_header_to_data helper from format/fetch.h
223 struct zzip_file_header*
224 zzip_disk_entry_to_file_header(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
226 char* file_header = /* (struct zzip_file_header*) */
227 (disk->buffer + zzip_disk_entry_fileoffset (entry));
228 if (disk->buffer > file_header || file_header >= disk->endbuf)
230 return (struct zzip_file_header*) file_header;
233 /** => zzip_disk_entry_to_data
234 * This function is a big helper despite its little name: in a zip file the
235 * encoded filenames are usually NOT zero-terminated but for common usage
236 * with libc we need it that way. Secondly, the filename SHOULD be present
237 * in the zip central directory but if not then we fallback to the filename
238 * given in the file_header of each compressed data portion.
241 zzip_disk_entry_strdup_name(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
243 if (! disk || ! entry) return 0;
245 ___ char* name; zzip_size_t len;
246 struct zzip_file_header* file;
247 if ((len = zzip_disk_entry_namlen (entry)))
248 name = zzip_disk_entry_to_filename (entry);
249 else if ((file = zzip_disk_entry_to_file_header (disk, entry)) &&
250 (len = zzip_file_header_namlen (file)))
251 name = zzip_file_header_to_filename (file);
255 if (disk->buffer > name || name+len > disk->endbuf)
258 return _zzip_strndup (name, len); ____;
261 /* ====================================================================== */
263 /** => zzip_disk_findfile
265 * This function is the first call of all the zip access functions here.
266 * It contains the code to find the first entry of the zip central directory.
267 * Here we require the mmapped block to represent a real zip file where the
268 * disk_trailer is _last_ in the file area, so that its position would be at
269 * a fixed offset from the end of the file area if not for the comment field
270 * allowed to be of variable length (which needs us to do a little search
271 * for the disk_tailer). However, in this simple implementation we disregard
272 * any disk_trailer info telling about multidisk archives, so we just return
273 * a pointer to the zip central directory.
275 * For an actual means, we are going to search backwards from the end
276 * of the mmaped block looking for the PK-magic signature of a
277 * disk_trailer. If we see one then we check the rootseek value to
278 * find the first disk_entry of the root central directory. If we find
279 * the correct PK-magic signature of a disk_entry over there then we
280 * assume we are done and we are going to return a pointer to that label.
282 * The return value is a pointer to the first zzip_disk_entry being checked
283 * to be within the bounds of the file area specified by the arguments. If
284 * no disk_trailer was found then null is returned, and likewise we only
285 * accept a disk_trailer with a seekvalue that points to a disk_entry and
286 * both parts have valid PK-magic parts. Beyond some sanity check we try to
287 * catch a common brokeness with zip archives that still allows us to find
288 * the start of the zip central directory.
290 struct zzip_disk_entry*
291 zzip_disk_findfirst(ZZIP_DISK* disk)
293 if (disk->buffer > disk->endbuf-sizeof(struct zzip_disk_trailer))
295 ___ char* p = disk->endbuf-sizeof(struct zzip_disk_trailer);
296 for (; p >= disk->buffer ; p--)
298 if (! zzip_disk_trailer_check_magic(p)) continue;
299 ___ char* root = /* (struct zzip_disk_entry*) */ disk->buffer +
300 zzip_disk_trailer_get_rootseek ((struct zzip_disk_trailer*)p);
302 { /* the first disk_entry is after the disk_trailer? can't be! */
303 zzip_size_t rootsize =
304 zzip_disk_trailer_get_rootsize ((struct zzip_disk_trailer*)p);
305 if (disk->buffer+rootsize > p) continue;
306 /* a common brokeness that can be fixed: we just assume that the
307 * central directory was written directly before the trailer: */
310 if (root < disk->buffer) continue;
311 if (zzip_disk_entry_check_magic(root))
312 return (struct zzip_disk_entry*) root;
318 /** => zzip_disk_findfile
320 * This function takes an existing disk_entry in the central root directory
321 * (e.g. from zzip_disk_findfirst) and returns the next entry within in
322 * the given bounds of the mmapped file area.
324 struct zzip_disk_entry*
325 zzip_disk_findnext(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
327 if ((char*)entry < disk->buffer ||
328 (char*)entry > disk->endbuf-sizeof(entry) ||
329 zzip_disk_entry_sizeto_end (entry) > 64*1024)
331 entry = zzip_disk_entry_to_next_entry (entry);
332 if ((char*)entry > disk->endbuf-sizeof(entry) ||
333 zzip_disk_entry_sizeto_end (entry) > 64*1024 ||
334 zzip_disk_entry_skipto_end (entry) + sizeof(entry) > disk->endbuf)
340 /** search for files in the (mmapped) zip central directory
342 * This function is given a filename as an additional argument, to find the
343 * disk_entry matching a given filename. The compare-function is usually
344 * strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used.
345 * - use null as argument for "after"-entry when searching the first
346 * matching entry, otherwise the last returned value if you look for other
347 * entries with a special "compare" function (if null then a doubled search
348 * is rather useless with this variant of _findfile).
350 struct zzip_disk_entry*
351 zzip_disk_findfile(ZZIP_DISK* disk, char* filename,
352 struct zzip_disk_entry* after, zzip_strcmp_fn_t compare)
354 struct zzip_disk_entry* entry = (! after ? zzip_disk_findfirst (disk)
355 : zzip_disk_findnext (disk, after));
357 compare = (zzip_strcmp_fn_t)( (disk->flags&1) ?
358 (_zzip_strcasecmp) : (strcmp));
359 for (; entry ; entry = zzip_disk_findnext (disk, entry))
361 /* filenames within zip files are often not null-terminated! */
362 char* realname = zzip_disk_entry_strdup_name (disk, entry);
363 if (realname && ! compare(filename, realname))
373 #ifdef ZZIP_HAVE_FNMATCH_H
374 #define _zzip_fnmatch fnmatch
376 # define _zzip_fnmatch_CASEFOLD FNM_CASEFOLD
378 # define _zzip_fnmatch_CASEFOLD 0
381 # define _zzip_fnmatch_CASEFOLD 0
382 /* if your system does not have fnmatch, we fall back to strcmp: */
383 static int _zzip_fnmatch(char* pattern, char* string, int flags)
385 puts ("<zzip:mmapped:strcmp>");
386 return strcmp (pattern, string);
390 /** => zzip_disk_findfile
392 * This function uses a compare-function with an additional argument
393 * and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e.
394 * the argument filespec first and the ziplocal filename second with
395 * the integer-flags put in as third to the indirect call. If the
396 * platform has fnmatch available then null-compare will use that one
397 * and otherwise we fall back to mere strcmp, so if you need fnmatch
398 * searching then please provide an implementation somewhere else.
399 * - use null as argument for "after"-entry when searching the first
400 * matching entry, or the last disk_entry return-value to find the
401 * next entry matching the given filespec.
403 struct zzip_disk_entry*
404 zzip_disk_findmatch(ZZIP_DISK* disk, char* filespec,
405 struct zzip_disk_entry* after,
406 zzip_fnmatch_fn_t compare, int flags)
408 struct zzip_disk_entry* entry = (! after ? zzip_disk_findfirst (disk)
409 : zzip_disk_findnext (disk, after));
411 compare = (zzip_fnmatch_fn_t) _zzip_fnmatch;
412 if (disk->flags&1) disk->flags |= _zzip_fnmatch_CASEFOLD;
414 for (; entry ; entry = zzip_disk_findnext (disk, entry))
416 /* filenames within zip files are often not null-terminated! */
417 char* realname = zzip_disk_entry_strdup_name(disk, entry);
418 if (realname && ! compare(filespec, realname, flags))
428 /* ====================================================================== */
431 * typedef struct zzip_disk_file ZZIP_DISK_FILE;
433 struct zzip_disk_file
435 char* buffer; /* fopen disk->buffer */
436 char* endbuf; /* fopen disk->endbuf */
437 struct zzip_file_header* header; /* fopen detected header */
438 zzip_size_t avail; /* memorized for checks on EOF */
439 z_stream zlib; /* for inflated blocks */
440 char* stored; /* for stored blocks */
443 /** => zzip_disk_fopen
445 * the ZZIP_DISK_FILE* is rather simple in just encapsulating the
446 * arguments given to this function plus a zlib deflate buffer.
447 * Note that the ZZIP_DISK pointer does already contain the full
448 * mmapped file area of a zip disk, so open()ing a file part within
449 * that area happens to be a lookup of its bounds and encoding. That
450 * information is memorized on the ZZIP_DISK_FILE so that subsequent
451 * _read() operations will be able to get the next data portion or
452 * return an eof condition for that file part wrapped in the zip archive.
454 ZZIP_DISK_FILE* _zzip_restrict
455 zzip_disk_entry_fopen (ZZIP_DISK* disk, ZZIP_DISK_ENTRY* entry)
457 ZZIP_DISK_FILE* file = malloc(sizeof(ZZIP_DISK_FILE));
458 if (! file) return file;
459 file->buffer = disk->buffer;
460 file->endbuf = disk->endbuf;
461 file->header = zzip_disk_entry_to_file_header (disk, entry);
462 if (! file->header) { free (file); return 0; }
463 file->avail = zzip_file_header_usize (file->header);
465 if (! file->avail || zzip_file_header_data_stored (file->header))
466 { file->stored = zzip_file_header_to_data (file->header); return file; }
469 file->zlib.opaque = 0;
470 file->zlib.zalloc = Z_NULL;
471 file->zlib.zfree = Z_NULL;
472 file->zlib.avail_in = zzip_file_header_csize (file->header);
473 file->zlib.next_in = zzip_file_header_to_data (file->header);
475 if (! zzip_file_header_data_deflated (file->header) ||
476 inflateInit2 (& file->zlib, -MAX_WBITS) != Z_OK)
477 { free (file); return 0; }
482 /** openening a file part wrapped within a (mmapped) zip archive
484 * This function opens a file found by name, so it does a search into
485 * the zip central directory with => zzip_disk_findfile and whatever
486 * is found first is given to => zzip_disk_entry_fopen
488 ZZIP_DISK_FILE* _zzip_restrict
489 zzip_disk_fopen (ZZIP_DISK* disk, char* filename)
491 ZZIP_DISK_ENTRY* entry = zzip_disk_findfile (disk, filename, 0, 0);
492 if (! entry) return 0; else return zzip_disk_entry_fopen (disk, entry);
496 /** => zzip_disk_fopen
498 * This function reads more bytes into the output buffer specified as
499 * arguments. The return value is null on eof or error, the stdio-like
500 * interface can not distinguish between these so you need to check
501 * with => zzip_disk_feof for the difference.
504 zzip_disk_fread (void* ptr, zzip_size_t sized, zzip_size_t nmemb,
505 ZZIP_DISK_FILE* file)
507 zzip_size_t size = sized*nmemb;
508 if (size > file->avail) size = file->avail;
511 memcpy (ptr, file->stored, size);
512 file->stored += size;
517 file->zlib.avail_out = sized*nmemb;
518 file->zlib.next_out = ptr;
519 ___ zzip_size_t total_old = file->zlib.total_out;
520 ___ int err = inflate (& file->zlib, Z_NO_FLUSH);
521 if (err == Z_STREAM_END)
523 else if (err == Z_OK)
524 file->avail -= file->zlib.total_out - total_old;
527 return file->zlib.total_out - total_old;
531 /** => zzip_disk_fopen
532 * This function releases any zlib decoder info needed for decompression
533 * and dumps the ZZIP_DISK_FILE* then.
536 zzip_disk_fclose (ZZIP_DISK_FILE* file)
539 inflateEnd (& file->zlib);
544 /** => zzip_disk_fopen
546 * This function allows to distinguish an error from an eof condition.
547 * Actually, if we found an error but we did already reach eof then we
548 * just keep on saying that it was an eof, so the app can just continue.
551 zzip_disk_feof (ZZIP_DISK_FILE* file)
553 return ! file || ! file->avail;