/*
+ * NOTE: this is part of libzzipfseeko (i.e. it is not libzzip).
+ *
* These routines are fully independent from the traditional zzip
* implementation. They assume a readonly seekable stdio handle
* representing a complete zip file. The functions show how to
* of the Mozilla Public License 1.1
*/
-/* we want fseeko/ftello ... */
-#define _LARGEFILE_SOURCE
+#define _LARGEFILE_SOURCE 1
+#define _ZZIP_ENTRY_STRUCT 1
+
+#include <zzip/types.h>
-#include <zzip/fseeko.h>
#include <assert.h>
#include <stdlib.h>
#include <sys/stat.h>
-#ifdef ZZIP_HAVE_FNMATCH_H
-#include <fnmatch.h>
-#endif
-
#if defined ZZIP_HAVE_STRING_H
#include <string.h>
#elif defined ZZIP_HAVE_STRINGS_H
#include <zlib.h>
#include <zzip/format.h>
+#include <zzip/fseeko.h>
#include <zzip/fetch.h>
#include <zzip/__mmap.h>
+#include <zzip/__fnmatch.h>
#if __STDC_VERSION__+0 > 199900L
#define ___
* following code more readable, we use a shorthand notation for the
* upcast needed in C (not needed in C++) as "disk_(entry)".
*/
-#ifdef __cplusplus
+#ifdef __zzip_entry_extends_zzip_disk_entry
#define disk_(_entry_) _entry_
#else
#define disk_(_entry_) (& (_entry_)->head)
#endif
-#ifdef __cplusplus
-struct zzip_entry : public struct zzip_disk_entry
-{
- char* _zzip_restrict tail;
- zzip_off_t tailalloc; /* the allocated size of tail */
- FILE* diskfile; /* a file reference */
- zzip_off_t disksize; /* the size of the file */
- zzip_off_t headseek; /* the offset within the file */
-};
-#else
-struct zzip_entry /* : struct zzip_disk_entry */
-{
- struct zzip_disk_entry head;
- char* _zzip_restrict tail;
- zzip_off_t tailalloc; /* the allocated size of tail */
- FILE* diskfile; /* a file reference */
- zzip_off_t disksize; /* the size of the file */
- zzip_off_t headseek; /* the offset within the file */
-};
-#endif
-
/* we try to round all seeks to the pagesize - since we do not use
* the sys/mmap interface we have to guess a good value here: */
#define PAGESIZE 8192
/* ====================================================================== */
/* helper functions */
+/** => zzip_entry_data_offset
+ * This functions read the correspoding struct zzip_file_header from
+ * the zip disk of the given "entry". The returned off_t points to the
+ * end of the file_header where the current fseek pointer has stopped.
+ * This is used to immediatly parse out any filename/extras block following
+ * the file_header. The return value is null on error.
+ */
static zzip_off_t
zzip_entry_fread_file_header (ZZIP_ENTRY* entry,
struct zzip_file_header* file_header)
{
- zzip_off_t offset = zzip_disk_entry_fileoffset (disk_(entry));
+ if (! entry || ! file_header) return 0;
+ ___ zzip_off_t offset = zzip_disk_entry_fileoffset (disk_(entry));
if (0 > offset || offset >= entry->disksize) return 0;
+
fseeko (entry->diskfile, offset, SEEK_SET);
return (fread (file_header, sizeof(*file_header), 1, entry->diskfile)
- ? offset+sizeof(*file_header) : 0 );
+ ? offset+sizeof(*file_header) : 0 ); ____;
}
/** helper functions for (fseeko) zip access api
return offset; ____;
}
-/** => zzip_entry_to_data
+/** => zzip_entry_data_offset
* This function is a big helper despite its little name: in a zip file the
* encoded filenames are usually NOT zero-terminated but for common usage
* with libc we need it that way. Secondly, the filename SHOULD be present
* in the zip central directory but if not then we fallback to the filename
* given in the file_header of each compressed data portion.
*/
-char* _zzip_restrict
+char* _zzip_new
zzip_entry_strdup_name(ZZIP_ENTRY* entry)
{
if (! entry) return 0;
___ zzip_size_t len;
- if ((len = zzip_disk_entry_namlen (disk_(entry))))
- {
+ if ((len = zzip_disk_entry_namlen (disk_(entry)))) {
char* name = malloc (len+1);
if (! name) return 0;
memcpy (name, entry->tail, len);
return name;
}
___ auto struct zzip_file_header header;
- if (zzip_entry_fread_file_header (entry, &header) &&
- (( len = zzip_file_header_namlen(&header) )))
- {
+ if (zzip_entry_fread_file_header (entry, &header)
+ && ( len = zzip_file_header_namlen(&header) )) {
char* name = malloc (len+1);
if (! name) return 0;
fread (name, 1, len, entry->diskfile);
____;____;
}
+static int
+prescan_entry(ZZIP_ENTRY* entry)
+{
+ assert (entry);
+ ___ zzip_off_t tailsize = zzip_disk_entry_sizeof_tails (disk_(entry));
+ if (tailsize+1 > entry->tailalloc) {
+ char* newtail = realloc (entry->tail, tailsize+1);
+ if (! newtail) return ENOMEM;
+ entry->tail = newtail;
+ entry->tailalloc = tailsize+1;
+ }
+ fread (entry->tail, 1, tailsize, entry->diskfile);
+ /* name + comment + extras */
+ return 0; ____;
+}
+
+static void
+prescan_clear(ZZIP_ENTRY* entry)
+{
+ assert (entry);
+ if (entry->tail) free (entry->tail);
+ entry->tail = 0; entry->tailalloc = 0;
+}
+
/* ====================================================================== */
/** => zzip_entry_findfile
* catch a common brokeness with zip archives that still allows us to find
* the start of the zip central directory.
*/
-ZZIP_ENTRY* _zzip_restrict
+ZZIP_ENTRY* _zzip_new
zzip_entry_findfirst(FILE* disk)
{
if (! disk) return 0;
___ zzip_off_t mapsize = disksize - mapoffs;
if (mapoffs && mapsize < pagesize/2) {
mapoffs -= pagesize/2; mapsize += pagesize/2; }
- while(1)
- {
+ while(1) {
fseeko (disk, mapoffs, SEEK_SET);
fread (buffer, 1, mapsize, disk);
- char* p = buffer + mapsize - sizeof(struct zzip_disk_trailer);
+ ___ char* p = buffer + mapsize - sizeof(struct zzip_disk_trailer);
for (; p >= buffer ; p--)
{
- if (! zzip_disk_trailer_check_magic(p)) continue;
- ___ zzip_off_t root =
- zzip_disk_trailer_rootseek ((struct zzip_disk_trailer*)p);
- if ((char*) root > p)
- { /* the first disk_entry is after the disk_trailer? can't be! */
- zzip_off_t rootsize =
- zzip_disk_trailer_rootsize ((struct zzip_disk_trailer*)p);
- if (rootsize > mapoffs) continue;
- /* a common brokeness that can be fixed: we just assume that
- * the central directory was written directly before : */
- root = mapoffs - rootsize;
- }
+ zzip_off_t root; /* (struct zzip_disk_entry*) */
+ if (zzip_disk_trailer_check_magic(p)) {
+ root = zzip_disk_trailer_rootseek (
+ (struct zzip_disk_trailer*)p);
+ if (root > disksize - (long)sizeof(struct zzip_disk_trailer)) {
+ /* first disk_entry is after the disk_trailer? can't be! */
+ zzip_off_t rootsize = zzip_disk_trailer_rootsize (
+ (struct zzip_disk_trailer*)p);
+ if (rootsize > mapoffs) continue;
+ /* a common brokeness that can be fixed: we just assume the
+ * central directory was written directly before : */
+ root = mapoffs - rootsize;
+ }
+ } else if (zzip_disk64_trailer_check_magic(p)) {
+ if (sizeof(zzip_off_t) < 8) return 0;
+ root = zzip_disk64_trailer_rootseek (
+ (struct zzip_disk64_trailer*)p);
+ } else continue;
+
assert (0 <= root && root < mapsize);
fseeko (disk, root, SEEK_SET);
fread (disk_(entry), 1, sizeof(*disk_(entry)), disk);
- if (zzip_disk_entry_check_magic(entry))
- {
+ if (zzip_disk_entry_check_magic(entry)) {
free (buffer);
entry->headseek = root;
entry->diskfile = disk;
entry->disksize = disksize;
- ___ zzip_size_t tailsize =
- zzip_disk_entry_sizeof_tails (disk_(entry));
- if (!( entry->tail = malloc (tailsize+1) )) goto nomem;
- fread (entry->tail, 1, tailsize, disk);
- entry->tailalloc = tailsize+1;
- return entry; ____;
+ if (prescan_entry(entry)) goto nomem;
+ return entry;
}
- ____;
- }
+ } ____;
if (! mapoffs) break; assert (mapsize >= pagesize/2);
mapoffs -= pagesize/2; /* mapsize += pagesize/2; */
mapsize = pagesize; /* if (mapsize > pagesize) ... */
* This function takes an existing "entry" in the central root directory
* (e.g. from zzip_entry_findfirst) and moves it to point to the next entry.
* On error it returns 0, otherwise the old entry. If no further match is
- * found then null is returned and the entry already free()d.
+ * found then null is returned and the entry already free()d. If you want
+ * to stop searching for matches before that case then please call
+ * => zzip_entry_free on the cursor struct ZZIP_ENTRY.
*/
-ZZIP_ENTRY* _zzip_restrict
+ZZIP_ENTRY* _zzip_new
zzip_entry_findnext(ZZIP_ENTRY* _zzip_restrict entry)
{
if (! entry) return entry;
+ if (! zzip_disk_entry_check_magic (entry)) goto err;
___ zzip_off_t seek =
entry->headseek + zzip_disk_entry_sizeto_end (disk_(entry));
if (seek + (zzip_off_t) sizeof(*disk_(entry)) > entry->disksize) goto err;
+
fseeko (entry->diskfile, seek, SEEK_SET);
fread (disk_(entry), 1, sizeof(*disk_(entry)), entry->diskfile);
entry->headseek = seek;
- ___ zzip_off_t tailsize = zzip_disk_entry_sizeof_tails (disk_(entry));
- if (tailsize+1 > entry->tailalloc)
- {
- char* newtail = realloc (entry->tail, tailsize+1);
- if (! newtail) goto err;
- entry->tail = newtail;
- entry->tailalloc = tailsize+1;
- }
- fread (entry->tail, 1, tailsize, entry->diskfile);
- return entry; ____;
+ if (! zzip_disk_entry_check_magic (entry)) goto err;
+ if (prescan_entry(entry)) goto err;
+ return entry;
err:
zzip_entry_free (entry);
return 0; ____;
zzip_entry_free(ZZIP_ENTRY* entry)
{
if (! entry) return 0;
- free (entry->tail);
+ prescan_clear (entry);
free (entry);
return 1;
}
* is rather useless with this variant of _findfile). If no further entry is
* found then null is returned and any "old"-entry gets already free()d.
*/
-ZZIP_ENTRY* _zzip_restrict
+ZZIP_ENTRY* _zzip_new
zzip_entry_findfile(FILE* disk, char* filename,
ZZIP_ENTRY* _zzip_restrict entry,
zzip_strcmp_fn_t compare)
{
if (! filename || ! disk) return 0;
- entry = ! entry ? zzip_entry_findfirst (disk)
+ entry = ( ! entry ) ? zzip_entry_findfirst (disk)
: zzip_entry_findnext (entry);
-
if (! compare) compare = (zzip_strcmp_fn_t)(strcmp);
+
for (; entry ; entry = zzip_entry_findnext (entry))
- {
- /* filenames within zip files are often not null-terminated! */
+ { /* filenames within zip files are often not null-terminated! */
char* realname = zzip_entry_strdup_name (entry);
- if (realname && ! compare(filename, realname))
- {
- free (realname);
- return entry;
+ if (! realname) continue;
+ if (! compare (filename, realname)) {
+ free (realname); return entry;
+ } else {
+ free (realname); continue;
}
- free (realname);
}
return 0;
}
* next entry matching the given filespec. If no further entry is
* found then null is returned and any "old"-entry gets already free()d.
*/
-ZZIP_ENTRY* _zzip_restrict
+ZZIP_ENTRY* _zzip_new
zzip_entry_findmatch(FILE* disk, char* filespec,
ZZIP_ENTRY* _zzip_restrict entry,
zzip_fnmatch_fn_t compare, int flags)
{
if (! filespec || ! disk) return 0;
- entry = ! entry ? zzip_entry_findfirst (disk)
+ entry = ( ! entry ) ? zzip_entry_findfirst (disk)
: zzip_entry_findnext (entry);
-
if (! compare) compare = (zzip_fnmatch_fn_t) _zzip_fnmatch;
+
for (; entry ; entry = zzip_entry_findnext (entry))
- {
- /* filenames within zip files are often not null-terminated! */
- char* realname = zzip_entry_strdup_name(entry);
- if (realname && ! compare(filespec, realname, flags))
- {
- free (realname);
- return entry;
+ { /* filenames within zip files are often not null-terminated! */
+ char* realname = zzip_entry_strdup_name (entry);
+ if (! realname) continue;
+ if (! compare (filespec, realname, flags)) {
+ free (realname); return entry;
+ } else {
+ free (realname); continue;
}
- free (realname);
}
return 0;
}
char buffer[PAGESIZE]; /* work buffer for inflate algorithm */
};
-/** => zzip_disk_fopen
+/** open a file within a zip disk for reading
*
- * the ZZIP_DISK_FILE* is rather simple in just encapsulating the
- * arguments given to this function plus a zlib deflate buffer.
- * Note that the ZZIP_DISK pointer does already contain the full
- * mmapped file area of a zip disk, so open()ing a file part within
- * that area happens to be a lookup of its bounds and encoding. That
- * information is memorized on the ZZIP_DISK_FILE so that subsequent
- * _read() operations will be able to get the next data portion or
- * return an eof condition for that file part wrapped in the zip archive.
+ * This function does take an "entry" argument and copies it (or just takes
+ * it over as owner) to a new ZZIP_ENTRY_FILE handle structure. That
+ * structure contains also a zlib buffer for decoding. This function does
+ * seek to the file_header of the given "entry" and validates it for the
+ * data buffer following it. We do also prefetch some data from the data
+ * buffer thereby trying to match the disk pagesize for faster access later.
+ * The => zzip_entry_fread will then read in chunks of pagesizes which is
+ * the size of the internal readahead buffer. If an error occurs then null
+ * is returned.
*/
-ZZIP_ENTRY_FILE* _zzip_restrict
+ZZIP_ENTRY_FILE* _zzip_new
zzip_entry_fopen (ZZIP_ENTRY* entry, int takeover)
{
if (! entry) return 0;
- if (! takeover)
- {
+ if (! takeover) {
ZZIP_ENTRY* found = malloc (sizeof(*entry));
if (! found) return 0;
- memcpy (found, entry, sizeof(*entry));
+ memcpy (found, entry, sizeof(*entry)); /* prescan_copy */
found->tail = malloc (found->tailalloc);
if (! found->tail) { free (found); return 0; }
memcpy (found->tail, entry->tail, entry->tailalloc);
entry = found;
}
- ZZIP_ENTRY_FILE* file = malloc(sizeof(*file));
+ ___ ZZIP_ENTRY_FILE* file = malloc(sizeof(*file));
if (! file) goto fail1;
file->entry = entry;
if (! zzip_entry_fread_file_header (entry, &file->header))
file->entry->diskfile);
file->dataoff += file->zlib.avail_in; ____;
- if (! zzip_file_header_data_deflated (&file->header) ||
- inflateInit2 (& file->zlib, -MAX_WBITS) != Z_OK) goto fail2;
+ if (! zzip_file_header_data_deflated (&file->header)
+ || inflateInit2 (& file->zlib, -MAX_WBITS) != Z_OK) goto fail2;
return file;
fail2:
free (file);
fail1:
zzip_entry_free (entry);
- return 0;
+ return 0; ____;
}
-/** openening a file part wrapped within a (mmapped) zip archive
+/** => zzip_entry_fopen
*
* This function opens a file found by name, so it does a search into
- * the zip central directory with => zzip_disk_findfile and whatever
- * is found first is given to => zzip_disk_entry_fopen
+ * the zip central directory with => zzip_entry_findfile and whatever
+ * is found first is given to => zzip_entry_fopen
*/
-ZZIP_ENTRY_FILE* _zzip_restrict
+ZZIP_ENTRY_FILE* _zzip_new
zzip_entry_ffile (FILE* disk, char* filename)
{
ZZIP_ENTRY* entry = zzip_entry_findfile (disk, filename, 0, 0);
- if (! entry) return 0; else return zzip_entry_fopen (entry, 1);
+ if (! entry) return 0;
+ return zzip_entry_fopen (entry, 1);
}
-/** => zzip_disk_fopen
+/** => zzip_entry_fopen
*
* This function reads more bytes into the output buffer specified as
* arguments. The return value is null on eof or error, the stdio-like
* interface can not distinguish between these so you need to check
- * with => zzip_disk_feof for the difference.
+ * with => zzip_entry_feof for the difference.
*/
zzip_size_t
zzip_entry_fread (void* ptr, zzip_size_t sized, zzip_size_t nmemb,
ZZIP_ENTRY_FILE* file)
{
- zzip_size_t size = sized*nmemb;
- if (! file->compressed)
- {
+ if (! file) return 0;
+ ___ zzip_size_t size = sized*nmemb;
+ if (! file->compressed) {
if (size > file->avail) size = file->avail;
fread (ptr, 1, size, file->entry->diskfile);
file->dataoff += size;
file->zlib.avail_out = size;
file->zlib.next_out = ptr;
___ zzip_size_t total_old = file->zlib.total_out;
- while (1)
- {
- if (! file->zlib.avail_in)
- {
+ while (1) {
+ if (! file->zlib.avail_in) {
size = file->compressed - file->dataoff;
if (size > sizeof(file->buffer)) size = sizeof(file->buffer);
/* fseek (file->data + file->dataoff, file->entry->diskfile); */
____;
if (file->zlib.avail_out && ! file->zlib.avail_in) continue;
return file->zlib.total_out - total_old;
- }____;
+ }____;____;
}
/** => zzip_entry_fopen
* This function releases any zlib decoder info needed for decompression
- * and dumps the ZZIP_ENTRY_FILE* then.
+ * and dumps the ZZIP_ENTRY_FILE struct then.
*/
int
zzip_entry_fclose (ZZIP_ENTRY_FILE* file)
{
+ if (! file) return 0;
if (file->compressed)
inflateEnd (& file->zlib);
zzip_entry_free (file->entry);