static struct packed_git *reuse_packfile;
static uint32_t reuse_packfile_objects;
-static struct bitmap *reuse_packfile_bitmap;
+static off_t reuse_packfile_offset;
static int use_bitmap_index_default = 1;
static int use_bitmap_index = -1;
-static int allow_pack_reuse = 1;
static enum {
WRITE_BITMAP_FALSE = 0,
WRITE_BITMAP_QUIET,
return wo;
}
-/*
- * Record the offsets needed in our reused packfile chunks due to
- * "gaps" where we omitted some objects.
- */
-static struct reused_chunk {
- off_t start;
- off_t offset;
-} *reused_chunks;
-static int reused_chunks_nr;
-static int reused_chunks_alloc;
-
-static void record_reused_object(off_t where, off_t offset)
-{
- if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].offset == offset)
- return;
-
- ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
- reused_chunks_alloc);
- reused_chunks[reused_chunks_nr].start = where;
- reused_chunks[reused_chunks_nr].offset = offset;
- reused_chunks_nr++;
-}
-
-/*
- * Binary search to find the chunk that "where" is in. Note
- * that we're not looking for an exact match, just the first
- * chunk that contains it (which implicitly ends at the start
- * of the next chunk.
- */
-static off_t find_reused_offset(off_t where)
+static off_t write_reused_pack(struct hashfile *f)
{
- int lo = 0, hi = reused_chunks_nr;
- while (lo < hi) {
- int mi = lo + ((hi - lo) / 2);
- if (where == reused_chunks[mi].start)
- return reused_chunks[mi].offset;
- if (where < reused_chunks[mi].start)
- hi = mi;
- else
- lo = mi + 1;
- }
-
- /*
- * The first chunk starts at zero, so we can't have gone below
- * there.
- */
- assert(lo);
- return reused_chunks[lo-1].offset;
-}
-
-static void write_reused_pack_one(size_t pos, struct hashfile *out,
- struct pack_window **w_curs)
-{
- off_t offset, next, cur;
- enum object_type type;
- unsigned long size;
-
- offset = reuse_packfile->revindex[pos].offset;
- next = reuse_packfile->revindex[pos + 1].offset;
-
- record_reused_object(offset, offset - hashfile_total(out));
-
- cur = offset;
- type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
- assert(type >= 0);
-
- if (type == OBJ_OFS_DELTA) {
- off_t base_offset;
- off_t fixup;
-
- unsigned char header[MAX_PACK_OBJECT_HEADER];
- unsigned len;
-
- base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
- assert(base_offset != 0);
-
- /* Convert to REF_DELTA if we must... */
- if (!allow_ofs_delta) {
- int base_pos = find_revindex_position(reuse_packfile, base_offset);
- const unsigned char *base_sha1 =
- nth_packed_object_sha1(reuse_packfile,
- reuse_packfile->revindex[base_pos].nr);
-
- len = encode_in_pack_object_header(header, sizeof(header),
- OBJ_REF_DELTA, size);
- hashwrite(out, header, len);
- hashwrite(out, base_sha1, 20);
- copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
- return;
- }
+ unsigned char buffer[8192];
+ off_t to_write, total;
+ int fd;
- /* Otherwise see if we need to rewrite the offset... */
- fixup = find_reused_offset(offset) -
- find_reused_offset(base_offset);
- if (fixup) {
- unsigned char ofs_header[10];
- unsigned i, ofs_len;
- off_t ofs = offset - base_offset - fixup;
-
- len = encode_in_pack_object_header(header, sizeof(header),
- OBJ_OFS_DELTA, size);
-
- i = sizeof(ofs_header) - 1;
- ofs_header[i] = ofs & 127;
- while (ofs >>= 7)
- ofs_header[--i] = 128 | (--ofs & 127);
-
- ofs_len = sizeof(ofs_header) - i;
-
- if (0) {
- off_t expected_size = cur - offset;
-
- if (len + ofs_len < expected_size) {
- unsigned max_pad = (len >= 4) ? 9 : 5;
- header[len - 1] |= 0x80;
- while (len < max_pad && len + ofs_len < expected_size)
- header[len++] = 0x80;
- header[len - 1] &= 0x7F;
- }
- }
+ if (!is_pack_valid(reuse_packfile))
+ die(_("packfile is invalid: %s"), reuse_packfile->pack_name);
- hashwrite(out, header, len);
- hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
- copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
- return;
- }
+ fd = git_open(reuse_packfile->pack_name);
+ if (fd < 0)
+ die_errno(_("unable to open packfile for reuse: %s"),
+ reuse_packfile->pack_name);
- /* ...otherwise we have no fixup, and can write it verbatim */
- }
+ if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
+ die_errno(_("unable to seek in reused packfile"));
- copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
-}
+ if (reuse_packfile_offset < 0)
+ reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz;
-static size_t write_reused_pack_verbatim(struct hashfile *out,
- struct pack_window **w_curs)
-{
- size_t pos = 0;
+ total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
- while (pos < reuse_packfile_bitmap->word_alloc &&
- reuse_packfile_bitmap->words[pos] == (eword_t)~0)
- pos++;
+ while (to_write) {
+ int read_pack = xread(fd, buffer, sizeof(buffer));
- if (pos) {
- off_t to_write;
+ if (read_pack <= 0)
+ die_errno(_("unable to read from reused packfile"));
- written = (pos * BITS_IN_EWORD);
- to_write = reuse_packfile->revindex[written].offset
- - sizeof(struct pack_header);
+ if (read_pack > to_write)
+ read_pack = to_write;
- record_reused_object(sizeof(struct pack_header), 0);
- hashflush(out);
- copy_pack_data(out, reuse_packfile, w_curs,
- sizeof(struct pack_header), to_write);
+ hashwrite(f, buffer, read_pack);
+ to_write -= read_pack;
+ /*
+ * We don't know the actual number of objects written,
+ * only how many bytes written, how many bytes total, and
+ * how many objects total. So we can fake it by pretending all
+ * objects we are writing are the same size. This gives us a
+ * smooth progress meter, and at the end it matches the true
+ * answer.
+ */
+ written = reuse_packfile_objects *
+ (((double)(total - to_write)) / total);
display_progress(progress_state, written);
}
- return pos;
-}
-
-static void write_reused_pack(struct hashfile *f)
-{
- size_t i = 0;
- uint32_t offset;
- struct pack_window *w_curs = NULL;
-
- if (allow_ofs_delta)
- i = write_reused_pack_verbatim(f, &w_curs);
- for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
- eword_t word = reuse_packfile_bitmap->words[i];
- size_t pos = (i * BITS_IN_EWORD);
-
- for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
- if ((word >> offset) == 0)
- break;
-
- offset += ewah_bit_ctz64(word >> offset);
- write_reused_pack_one(pos + offset, f, &w_curs);
- display_progress(progress_state, ++written);
- }
- }
-
- unuse_pack(&w_curs);
+ close(fd);
+ written = reuse_packfile_objects;
+ display_progress(progress_state, written);
+ return reuse_packfile_offset - sizeof(struct pack_header);
}
static const char no_split_warning[] = N_(
offset = write_pack_header(f, nr_remaining);
if (reuse_packfile) {
+ off_t packfile_size;
assert(pack_to_stdout);
- write_reused_pack(f);
- offset = hashfile_total(f);
+
+ packfile_size = write_reused_pack(f);
+ offset += packfile_size;
}
nr_written = 0;
{
struct object_entry *entry;
- if (reuse_packfile_bitmap &&
- bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
- return 1;
-
entry = packlist_find(&to_pack, oid);
if (!entry)
return 0;
create_object_entry(oid, type, pack_name_hash(name),
exclude, name && no_try_delta(name),
found_pack, found_offset);
-
return 1;
}
free(p);
}
-static int obj_is_packed(const struct object_id *oid)
-{
- return packlist_find(&to_pack, oid) ||
- (reuse_packfile_bitmap &&
- bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
-}
-
static void add_tag_chain(const struct object_id *oid)
{
struct tag *tag;
* it was included via bitmaps, we would not have parsed it
* previously).
*/
- if (obj_is_packed(oid))
+ if (packlist_find(&to_pack, oid))
return;
tag = lookup_tag(the_repository, oid);
if (starts_with(path, "refs/tags/") && /* is a tag? */
!peel_ref(path, &peeled) && /* peelable? */
- obj_is_packed(&peeled)) /* object packed? */
+ packlist_find(&to_pack, &peeled)) /* object packed? */
add_tag_chain(oid);
return 0;
}
if (nr_deltas && n > 1) {
unsigned nr_done = 0;
-
if (progress)
progress_state = start_progress(_("Compressing objects"),
nr_deltas);
use_bitmap_index_default = git_config_bool(k, v);
return 0;
}
- if (!strcmp(k, "pack.allowpackreuse")) {
- allow_pack_reuse = git_config_bool(k, v);
- return 0;
- }
if (!strcmp(k, "pack.threads")) {
delta_search_threads = git_config_int(k, v);
if (delta_search_threads < 0)
static int pack_options_allow_reuse(void)
{
return pack_to_stdout &&
+ allow_ofs_delta &&
!ignore_packed_keep_on_disk &&
!ignore_packed_keep_in_core &&
(!local || !have_non_local_packs) &&
if (!(bitmap_git = prepare_bitmap_walk(revs)))
return -1;
- if (allow_pack_reuse &&
- pack_options_allow_reuse() &&
+ if (pack_options_allow_reuse() &&
!reuse_partial_packfile_from_bitmap(
bitmap_git,
&reuse_packfile,
&reuse_packfile_objects,
- &reuse_packfile_bitmap)) {
+ &reuse_packfile_offset)) {
assert(reuse_packfile_objects);
nr_result += reuse_packfile_objects;
display_progress(progress_state, nr_result);
if (progress)
fprintf_ln(stderr,
_("Total %"PRIu32" (delta %"PRIu32"),"
- " reused %"PRIu32" (delta %"PRIu32"),"
- " pack-reused %"PRIu32),
- written, written_delta, reused, reused_delta,
- reuse_packfile_objects);
+ " reused %"PRIu32" (delta %"PRIu32")"),
+ written, written_delta, reused, reused_delta);
return 0;
}
munmap(bitmap_git->map, bitmap_git->map_size);
bitmap_git->map = NULL;
bitmap_git->map_size = 0;
-
- kh_destroy_oid_map(bitmap_git->bitmaps);
- bitmap_git->bitmaps = NULL;
-
- kh_destroy_oid_pos(bitmap_git->ext_index.positions);
- bitmap_git->ext_index.positions = NULL;
-
return -1;
}
enum object_type object_type,
show_reachable_fn show_reach)
{
- size_t i = 0;
+ size_t pos = 0, i = 0;
uint32_t offset;
struct ewah_iterator it;
struct bitmap *objects = bitmap_git->result;
+ if (bitmap_git->reuse_objects == bitmap_git->pack->num_objects)
+ return;
+
ewah_iterator_init(&it, type_filter);
- for (i = 0; i < objects->word_alloc &&
- ewah_iterator_next(&filter, &it); i++) {
+ while (i < objects->word_alloc && ewah_iterator_next(&filter, &it)) {
eword_t word = objects->words[i] & filter;
- size_t pos = (i * BITS_IN_EWORD);
-
- if (!word)
- continue;
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
struct object_id oid;
offset += ewah_bit_ctz64(word >> offset);
+ if (pos + offset < bitmap_git->reuse_objects)
+ continue;
+
entry = &bitmap_git->pack->revindex[pos + offset];
nth_packed_object_oid(&oid, bitmap_git->pack, entry->nr);
show_reach(&oid, object_type, 0, hash, bitmap_git->pack, entry->offset);
}
+
+ pos += BITS_IN_EWORD;
+ i++;
}
}
return NULL;
}
-static void try_partial_reuse(struct bitmap_index *bitmap_git,
- size_t pos,
- struct bitmap *reuse,
- struct pack_window **w_curs)
+int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
+ struct packed_git **packfile,
+ uint32_t *entries,
+ off_t *up_to)
{
- struct revindex_entry *revidx;
- off_t offset;
- enum object_type type;
- unsigned long size;
-
- if (pos >= bitmap_git->pack->num_objects)
- return; /* not actually in the pack */
-
- revidx = &bitmap_git->pack->revindex[pos];
- offset = revidx->offset;
- type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size);
- if (type < 0)
- return; /* broken packfile, punt */
-
- if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) {
- off_t base_offset;
- int base_pos;
-
- /*
- * Find the position of the base object so we can look it up
- * in our bitmaps. If we can't come up with an offset, or if
- * that offset is not in the revidx, the pack is corrupt.
- * There's nothing we can do, so just punt on this object,
- * and the normal slow path will complain about it in
- * more detail.
- */
- base_offset = get_delta_base(bitmap_git->pack, w_curs,
- &offset, type, revidx->offset);
- if (!base_offset)
- return;
- base_pos = find_revindex_position(bitmap_git->pack, base_offset);
- if (base_pos < 0)
- return;
-
- /*
- * We assume delta dependencies always point backwards. This
- * lets us do a single pass, and is basically always true
- * due to the way OFS_DELTAs work. You would not typically
- * find REF_DELTA in a bitmapped pack, since we only bitmap
- * packs we write fresh, and OFS_DELTA is the default). But
- * let's double check to make sure the pack wasn't written with
- * odd parameters.
- */
- if (base_pos >= pos)
- return;
-
- /*
- * And finally, if we're not sending the base as part of our
- * reuse chunk, then don't send this object either. The base
- * would come after us, along with other objects not
- * necessarily in the pack, which means we'd need to convert
- * to REF_DELTA on the fly. Better to just let the normal
- * object_entry code path handle it.
- */
- if (!bitmap_get(reuse, base_pos))
- return;
- }
-
/*
- * If we got here, then the object is OK to reuse. Mark it.
+ * Reuse the packfile content if we need more than
+ * 90% of its objects
*/
- bitmap_set(reuse, pos);
-}
+ static const double REUSE_PERCENT = 0.9;
-int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
- struct packed_git **packfile_out,
- uint32_t *entries,
- struct bitmap **reuse_out)
-{
struct bitmap *result = bitmap_git->result;
- struct bitmap *reuse;
- struct pack_window *w_curs = NULL;
- size_t i = 0;
- uint32_t offset;
+ uint32_t reuse_threshold;
+ uint32_t i, reuse_objects = 0;
assert(result);
- while (i < result->word_alloc && result->words[i] == (eword_t)~0)
- i++;
+ for (i = 0; i < result->word_alloc; ++i) {
+ if (result->words[i] != (eword_t)~0) {
+ reuse_objects += ewah_bit_ctz64(~result->words[i]);
+ break;
+ }
- /* Don't mark objects not in the packfile */
- if (i > bitmap_git->pack->num_objects / BITS_IN_EWORD)
- i = bitmap_git->pack->num_objects / BITS_IN_EWORD;
+ reuse_objects += BITS_IN_EWORD;
+ }
- reuse = bitmap_word_alloc(i);
- memset(reuse->words, 0xFF, i * sizeof(eword_t));
+#ifdef GIT_BITMAP_DEBUG
+ {
+ const unsigned char *sha1;
+ struct revindex_entry *entry;
- for (; i < result->word_alloc; ++i) {
- eword_t word = result->words[i];
- size_t pos = (i * BITS_IN_EWORD);
+ entry = &bitmap_git->reverse_index->revindex[reuse_objects];
+ sha1 = nth_packed_object_sha1(bitmap_git->pack, entry->nr);
- for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
- if ((word >> offset) == 0)
- break;
-
- offset += ewah_bit_ctz64(word >> offset);
- try_partial_reuse(bitmap_git, pos + offset, reuse, &w_curs);
- }
+ fprintf(stderr, "Failed to reuse at %d (%016llx)\n",
+ reuse_objects, result->words[i]);
+ fprintf(stderr, " %s\n", hash_to_hex(sha1));
}
+#endif
- unuse_pack(&w_curs);
-
- *entries = bitmap_popcount(reuse);
- if (!*entries) {
- bitmap_free(reuse);
+ if (!reuse_objects)
return -1;
+
+ if (reuse_objects >= bitmap_git->pack->num_objects) {
+ bitmap_git->reuse_objects = *entries = bitmap_git->pack->num_objects;
+ *up_to = -1; /* reuse the full pack */
+ *packfile = bitmap_git->pack;
+ return 0;
}
- /*
- * Drop any reused objects from the result, since they will not
- * need to be handled separately.
- */
- bitmap_and_not(result, reuse);
- *packfile_out = bitmap_git->pack;
- *reuse_out = reuse;
- return 0;
-}
+ reuse_threshold = bitmap_popcount(bitmap_git->result) * REUSE_PERCENT;
-int bitmap_walk_contains(struct bitmap_index *bitmap_git,
- struct bitmap *bitmap, const struct object_id *oid)
-{
- int idx;
+ if (reuse_objects < reuse_threshold)
+ return -1;
- if (!bitmap)
- return 0;
+ bitmap_git->reuse_objects = *entries = reuse_objects;
+ *up_to = bitmap_git->pack->revindex[reuse_objects].offset;
+ *packfile = bitmap_git->pack;
- idx = bitmap_position(bitmap_git, oid);
- return idx >= 0 && bitmap_get(bitmap, idx);
+ return 0;
}
void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git,