#include "list.h"
#include "packfile.h"
#include "object-store.h"
+#include "dir.h"
+ #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
+ #define SIZE(obj) oe_size(&to_pack, obj)
+ #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
+ #define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
+ #define DELTA(obj) oe_delta(&to_pack, obj)
+ #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
+ #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
+ #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
+ #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
+ #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
+ #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
+
static const char *pack_usage[] = {
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
unuse_pack(&w_curs);
}
- entry->type = oid_object_info(the_repository, &entry->idx.oid,
- &entry->size);
- /*
- * The error condition is checked in prepare_pack(). This is
- * to permit a missing preferred base object to be ignored
- * as a preferred base. Doing so can result in a larger
- * pack file, but the transfer will still take place.
- */
- oe_set_type(entry, oid_object_info(&entry->idx.oid, &canonical_size));
++ oe_set_type(entry,
++ oid_object_info(the_repository, &entry->idx.oid, &canonical_size));
+ if (entry->type_valid) {
+ SET_SIZE(entry, canonical_size);
+ } else {
+ /*
+ * Bad object type is checked in prepare_pack(). This is
+ * to permit a missing preferred base object to be ignored
+ * as a preferred base. Doing so can result in a larger
+ * pack file, but the transfer will still take place.
+ */
+ }
}
static int pack_offset_sort(const void *_a, const void *_b)
*/
static void drop_reused_delta(struct object_entry *entry)
{
- struct object_entry **p = &entry->delta->delta_child;
+ unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
struct object_info oi = OBJECT_INFO_INIT;
+ enum object_type type;
+ unsigned long size;
+
+ while (*idx) {
+ struct object_entry *oe = &to_pack.objects[*idx - 1];
- while (*p) {
- if (*p == entry)
- *p = (*p)->delta_sibling;
+ if (oe == entry)
+ *idx = oe->delta_sibling_idx;
else
- p = &(*p)->delta_sibling;
+ idx = &oe->delta_sibling_idx;
}
- entry->delta = NULL;
+ SET_DELTA(entry, NULL);
entry->depth = 0;
- oi.sizep = &entry->size;
- oi.typep = &entry->type;
- if (packed_object_info(the_repository, entry->in_pack,
- entry->in_pack_offset, &oi) < 0) {
+ oi.sizep = &size;
+ oi.typep = &type;
- if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
++ if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
/*
* We failed to get the info from this pack for some reason;
* fall back to sha1_object_info, which may find another copy.
- * And if that fails, the error will be recorded in entry->type
+ * And if that fails, the error will be recorded in oe_type(entry)
* and dealt with in prepare_pack().
*/
- entry->type = oid_object_info(the_repository, &entry->idx.oid,
- &entry->size);
- oe_set_type(entry, oid_object_info(&entry->idx.oid, &size));
++ oe_set_type(entry,
++ oid_object_info(the_repository, &entry->idx.oid, &size));
+ } else {
+ oe_set_type(entry, type);
}
+ SET_SIZE(entry, size);
}
/*
for (i = 0; i < to_pack.nr_objects; i++) {
struct object_entry *entry = sorted_by_offset[i];
check_object(entry);
- if (big_file_threshold < entry->size)
+ if (entry->type_valid &&
+ oe_size_greater_than(&to_pack, entry, big_file_threshold))
entry->no_try_delta = 1;
+ display_progress(progress_state, i + 1);
}
+ stop_progress(&progress_state);
/*
* This must happen in a second pass, since we rely on the delta
#endif
- if (oid_object_info(&e->idx.oid, &size) < 0)
+ /*
+ * Return the size of the object without doing any delta
+ * reconstruction (so non-deltas are true object sizes, but deltas
+ * return the size of the delta data).
+ */
+ unsigned long oe_get_size_slow(struct packing_data *pack,
+ const struct object_entry *e)
+ {
+ struct packed_git *p;
+ struct pack_window *w_curs;
+ unsigned char *buf;
+ enum object_type type;
+ unsigned long used, avail, size;
+
+ if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
+ read_lock();
++ if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
+ die(_("unable to get size of %s"),
+ oid_to_hex(&e->idx.oid));
+ read_unlock();
+ return size;
+ }
+
+ p = oe_in_pack(pack, e);
+ if (!p)
+ BUG("when e->type is a delta, it must belong to a pack");
+
+ read_lock();
+ w_curs = NULL;
+ buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
+ used = unpack_object_header_buffer(buf, avail, &type, &size);
+ if (used == 0)
+ die(_("unable to parse object header of %s"),
+ oid_to_hex(&e->idx.oid));
+
+ unuse_pack(&w_curs);
+ read_unlock();
+ return size;
+ }
+
static int try_delta(struct unpacked *trg, struct unpacked *src,
unsigned max_depth, unsigned long *mem_usage)
{
}
}
+ prepare_packing_data(&to_pack);
+
if (progress)
- progress_state = start_progress(_("Counting objects"), 0);
+ progress_state = start_progress(_("Enumerating objects"), 0);
if (!use_internal_rev_list)
read_object_list_from_stdin();
else {
#ifndef PACK_OBJECTS_H
#define PACK_OBJECTS_H
+ #include "object-store.h"
+
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+
+ #define OE_DFS_STATE_BITS 2
+ #define OE_DEPTH_BITS 12
+ #define OE_IN_PACK_BITS 10
+ #define OE_Z_DELTA_BITS 20
+ /*
+ * Note that oe_set_size() becomes expensive when the given size is
+ * above this limit. Don't lower it too much.
+ */
+ #define OE_SIZE_BITS 31
+ #define OE_DELTA_SIZE_BITS 20
+
+ /*
+ * State flags for depth-first search used for analyzing delta cycles.
+ *
+ * The depth is measured in delta-links to the base (so if A is a delta
+ * against B, then A has a depth of 1, and B a depth of 0).
+ */
+ enum dfs_state {
+ DFS_NONE = 0,
+ DFS_ACTIVE,
+ DFS_DONE,
+ DFS_NUM_STATES
+ };
+
+ /*
+ * The size of struct nearly determines pack-objects's memory
+ * consumption. This struct is packed tight for that reason. When you
+ * add or reorder something in this struct, think a bit about this.
+ *
+ * basic object info
+ * -----------------
+ * idx.oid is filled up before delta searching starts. idx.crc32 is
+ * only valid after the object is written out and will be used for
+ * generating the index. idx.offset will be both gradually set and
+ * used in writing phase (base objects get offset first, then deltas
+ * refer to them)
+ *
+ * "size" is the uncompressed object size. Compressed size of the raw
+ * data for an object in a pack is not stored anywhere but is computed
+ * and made available when reverse .idx is made. Note that when a
+ * delta is reused, "size" is the uncompressed _delta_ size, not the
+ * canonical one after the delta has been applied.
+ *
+ * "hash" contains a path name hash which is used for sorting the
+ * delta list and also during delta searching. Once prepare_pack()
+ * returns it's no longer needed.
+ *
+ * source pack info
+ * ----------------
+ * The (in_pack, in_pack_offset) tuple contains the location of the
+ * object in the source pack. in_pack_header_size allows quickly
+ * skipping the header and going straight to the zlib stream.
+ *
+ * "type" and "in_pack_type" both describe object type. in_pack_type
+ * may contain a delta type, while type is always the canonical type.
+ *
+ * deltas
+ * ------
+ * Delta links (delta, delta_child and delta_sibling) are created to
+ * reflect that delta graph from the source pack then updated or added
+ * during delta searching phase when we find better deltas.
+ *
+ * delta_child and delta_sibling are last needed in
+ * compute_write_order(). "delta" and "delta_size" must remain valid
+ * at object writing phase in case the delta is not cached.
+ *
+ * If a delta is cached in memory and is compressed, delta_data points
+ * to the data and z_delta_size contains the compressed size. If it's
+ * uncompressed [1], z_delta_size must be zero. delta_size is always
+ * the uncompressed size and must be valid even if the delta is not
+ * cached.
+ *
+ * [1] during try_delta phase we don't bother with compressing because
+ * the delta could be quickly replaced with a better one.
+ */
struct object_entry {
struct pack_idx_entry idx;
- unsigned long size; /* uncompressed size */
- struct packed_git *in_pack; /* already in pack */
- off_t in_pack_offset;
- struct object_entry *delta; /* delta base object */
- struct object_entry *delta_child; /* deltified objects who bases me */
- struct object_entry *delta_sibling; /* other deltified objects who
- * uses the same base as me
- */
void *delta_data; /* cached delta (uncompressed) */
- unsigned long delta_size; /* delta data size (uncompressed) */
- unsigned long z_delta_size; /* delta data size (compressed) */
- enum object_type type;
- enum object_type in_pack_type; /* could be delta */
+ off_t in_pack_offset;
uint32_t hash; /* name hint hash */
- unsigned int in_pack_pos;
- unsigned char in_pack_header_size;
+ unsigned size_:OE_SIZE_BITS;
+ unsigned size_valid:1;
+ uint32_t delta_idx; /* delta base object */
+ uint32_t delta_child_idx; /* deltified objects who bases me */
+ uint32_t delta_sibling_idx; /* other deltified objects who
+ * uses the same base as me
+ */
+ unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
+ unsigned delta_size_valid:1;
+ unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */
+ unsigned z_delta_size:OE_Z_DELTA_BITS;
+ unsigned type_valid:1;
+ unsigned type_:TYPE_BITS;
+ unsigned no_try_delta:1;
+ unsigned in_pack_type:TYPE_BITS; /* could be delta */
unsigned preferred_base:1; /*
* we do not pack this, but is available
* to be used as the base object to delta