From: Junio C Hamano Date: Wed, 23 Oct 2019 04:58:22 +0000 (+0900) Subject: Revert "Merge branch 'jk/packfile-reuse-cleanup' into next" X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=88234ef216714321fafc6e9c9d28ec975dfdd180;p=git Revert "Merge branch 'jk/packfile-reuse-cleanup' into next" This reverts commit dc60b31833b45e3dd027531a7d21c774dad948ec, reversing changes made to 1bb1c0846ff9a7ac2334df39cc4232a6ae8a419c. --- diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1929470221..5876583220 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -92,11 +92,10 @@ static struct progress *progress_state; static struct packed_git *reuse_packfile; static uint32_t reuse_packfile_objects; -static struct bitmap *reuse_packfile_bitmap; +static off_t reuse_packfile_offset; static int use_bitmap_index_default = 1; static int use_bitmap_index = -1; -static int allow_pack_reuse = 1; static enum { WRITE_BITMAP_FALSE = 0, WRITE_BITMAP_QUIET, @@ -785,189 +784,57 @@ static struct object_entry **compute_write_order(void) return wo; } -/* - * Record the offsets needed in our reused packfile chunks due to - * "gaps" where we omitted some objects. - */ -static struct reused_chunk { - off_t start; - off_t offset; -} *reused_chunks; -static int reused_chunks_nr; -static int reused_chunks_alloc; - -static void record_reused_object(off_t where, off_t offset) -{ - if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].offset == offset) - return; - - ALLOC_GROW(reused_chunks, reused_chunks_nr + 1, - reused_chunks_alloc); - reused_chunks[reused_chunks_nr].start = where; - reused_chunks[reused_chunks_nr].offset = offset; - reused_chunks_nr++; -} - -/* - * Binary search to find the chunk that "where" is in. Note - * that we're not looking for an exact match, just the first - * chunk that contains it (which implicitly ends at the start - * of the next chunk. - */ -static off_t find_reused_offset(off_t where) +static off_t write_reused_pack(struct hashfile *f) { - int lo = 0, hi = reused_chunks_nr; - while (lo < hi) { - int mi = lo + ((hi - lo) / 2); - if (where == reused_chunks[mi].start) - return reused_chunks[mi].offset; - if (where < reused_chunks[mi].start) - hi = mi; - else - lo = mi + 1; - } - - /* - * The first chunk starts at zero, so we can't have gone below - * there. - */ - assert(lo); - return reused_chunks[lo-1].offset; -} - -static void write_reused_pack_one(size_t pos, struct hashfile *out, - struct pack_window **w_curs) -{ - off_t offset, next, cur; - enum object_type type; - unsigned long size; - - offset = reuse_packfile->revindex[pos].offset; - next = reuse_packfile->revindex[pos + 1].offset; - - record_reused_object(offset, offset - hashfile_total(out)); - - cur = offset; - type = unpack_object_header(reuse_packfile, w_curs, &cur, &size); - assert(type >= 0); - - if (type == OBJ_OFS_DELTA) { - off_t base_offset; - off_t fixup; - - unsigned char header[MAX_PACK_OBJECT_HEADER]; - unsigned len; - - base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset); - assert(base_offset != 0); - - /* Convert to REF_DELTA if we must... */ - if (!allow_ofs_delta) { - int base_pos = find_revindex_position(reuse_packfile, base_offset); - const unsigned char *base_sha1 = - nth_packed_object_sha1(reuse_packfile, - reuse_packfile->revindex[base_pos].nr); - - len = encode_in_pack_object_header(header, sizeof(header), - OBJ_REF_DELTA, size); - hashwrite(out, header, len); - hashwrite(out, base_sha1, 20); - copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur); - return; - } + unsigned char buffer[8192]; + off_t to_write, total; + int fd; - /* Otherwise see if we need to rewrite the offset... */ - fixup = find_reused_offset(offset) - - find_reused_offset(base_offset); - if (fixup) { - unsigned char ofs_header[10]; - unsigned i, ofs_len; - off_t ofs = offset - base_offset - fixup; - - len = encode_in_pack_object_header(header, sizeof(header), - OBJ_OFS_DELTA, size); - - i = sizeof(ofs_header) - 1; - ofs_header[i] = ofs & 127; - while (ofs >>= 7) - ofs_header[--i] = 128 | (--ofs & 127); - - ofs_len = sizeof(ofs_header) - i; - - if (0) { - off_t expected_size = cur - offset; - - if (len + ofs_len < expected_size) { - unsigned max_pad = (len >= 4) ? 9 : 5; - header[len - 1] |= 0x80; - while (len < max_pad && len + ofs_len < expected_size) - header[len++] = 0x80; - header[len - 1] &= 0x7F; - } - } + if (!is_pack_valid(reuse_packfile)) + die(_("packfile is invalid: %s"), reuse_packfile->pack_name); - hashwrite(out, header, len); - hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len); - copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur); - return; - } + fd = git_open(reuse_packfile->pack_name); + if (fd < 0) + die_errno(_("unable to open packfile for reuse: %s"), + reuse_packfile->pack_name); - /* ...otherwise we have no fixup, and can write it verbatim */ - } + if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1) + die_errno(_("unable to seek in reused packfile")); - copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset); -} + if (reuse_packfile_offset < 0) + reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz; -static size_t write_reused_pack_verbatim(struct hashfile *out, - struct pack_window **w_curs) -{ - size_t pos = 0; + total = to_write = reuse_packfile_offset - sizeof(struct pack_header); - while (pos < reuse_packfile_bitmap->word_alloc && - reuse_packfile_bitmap->words[pos] == (eword_t)~0) - pos++; + while (to_write) { + int read_pack = xread(fd, buffer, sizeof(buffer)); - if (pos) { - off_t to_write; + if (read_pack <= 0) + die_errno(_("unable to read from reused packfile")); - written = (pos * BITS_IN_EWORD); - to_write = reuse_packfile->revindex[written].offset - - sizeof(struct pack_header); + if (read_pack > to_write) + read_pack = to_write; - record_reused_object(sizeof(struct pack_header), 0); - hashflush(out); - copy_pack_data(out, reuse_packfile, w_curs, - sizeof(struct pack_header), to_write); + hashwrite(f, buffer, read_pack); + to_write -= read_pack; + /* + * We don't know the actual number of objects written, + * only how many bytes written, how many bytes total, and + * how many objects total. So we can fake it by pretending all + * objects we are writing are the same size. This gives us a + * smooth progress meter, and at the end it matches the true + * answer. + */ + written = reuse_packfile_objects * + (((double)(total - to_write)) / total); display_progress(progress_state, written); } - return pos; -} - -static void write_reused_pack(struct hashfile *f) -{ - size_t i = 0; - uint32_t offset; - struct pack_window *w_curs = NULL; - - if (allow_ofs_delta) - i = write_reused_pack_verbatim(f, &w_curs); - for (; i < reuse_packfile_bitmap->word_alloc; ++i) { - eword_t word = reuse_packfile_bitmap->words[i]; - size_t pos = (i * BITS_IN_EWORD); - - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) - break; - - offset += ewah_bit_ctz64(word >> offset); - write_reused_pack_one(pos + offset, f, &w_curs); - display_progress(progress_state, ++written); - } - } - - unuse_pack(&w_curs); + close(fd); + written = reuse_packfile_objects; + display_progress(progress_state, written); + return reuse_packfile_offset - sizeof(struct pack_header); } static const char no_split_warning[] = N_( @@ -1000,9 +867,11 @@ static void write_pack_file(void) offset = write_pack_header(f, nr_remaining); if (reuse_packfile) { + off_t packfile_size; assert(pack_to_stdout); - write_reused_pack(f); - offset = hashfile_total(f); + + packfile_size = write_reused_pack(f); + offset += packfile_size; } nr_written = 0; @@ -1131,10 +1000,6 @@ static int have_duplicate_entry(const struct object_id *oid, { struct object_entry *entry; - if (reuse_packfile_bitmap && - bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid)) - return 1; - entry = packlist_find(&to_pack, oid); if (!entry) return 0; @@ -1323,7 +1188,6 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, create_object_entry(oid, type, pack_name_hash(name), exclude, name && no_try_delta(name), found_pack, found_offset); - return 1; } @@ -2688,13 +2552,6 @@ static void ll_find_deltas(struct object_entry **list, unsigned list_size, free(p); } -static int obj_is_packed(const struct object_id *oid) -{ - return packlist_find(&to_pack, oid) || - (reuse_packfile_bitmap && - bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid)); -} - static void add_tag_chain(const struct object_id *oid) { struct tag *tag; @@ -2706,7 +2563,7 @@ static void add_tag_chain(const struct object_id *oid) * it was included via bitmaps, we would not have parsed it * previously). */ - if (obj_is_packed(oid)) + if (packlist_find(&to_pack, oid)) return; tag = lookup_tag(the_repository, oid); @@ -2730,7 +2587,7 @@ static int add_ref_tag(const char *path, const struct object_id *oid, int flag, if (starts_with(path, "refs/tags/") && /* is a tag? */ !peel_ref(path, &peeled) && /* peelable? */ - obj_is_packed(&peeled)) /* object packed? */ + packlist_find(&to_pack, &peeled)) /* object packed? */ add_tag_chain(oid); return 0; } @@ -2798,7 +2655,6 @@ static void prepare_pack(int window, int depth) if (nr_deltas && n > 1) { unsigned nr_done = 0; - if (progress) progress_state = start_progress(_("Compressing objects"), nr_deltas); @@ -2843,10 +2699,6 @@ static int git_pack_config(const char *k, const char *v, void *cb) use_bitmap_index_default = git_config_bool(k, v); return 0; } - if (!strcmp(k, "pack.allowpackreuse")) { - allow_pack_reuse = git_config_bool(k, v); - return 0; - } if (!strcmp(k, "pack.threads")) { delta_search_threads = git_config_int(k, v); if (delta_search_threads < 0) @@ -3179,6 +3031,7 @@ static void loosen_unused_packed_objects(void) static int pack_options_allow_reuse(void) { return pack_to_stdout && + allow_ofs_delta && !ignore_packed_keep_on_disk && !ignore_packed_keep_in_core && (!local || !have_non_local_packs) && @@ -3190,13 +3043,12 @@ static int get_object_list_from_bitmap(struct rev_info *revs) if (!(bitmap_git = prepare_bitmap_walk(revs))) return -1; - if (allow_pack_reuse && - pack_options_allow_reuse() && + if (pack_options_allow_reuse() && !reuse_partial_packfile_from_bitmap( bitmap_git, &reuse_packfile, &reuse_packfile_objects, - &reuse_packfile_bitmap)) { + &reuse_packfile_offset)) { assert(reuse_packfile_objects); nr_result += reuse_packfile_objects; display_progress(progress_state, nr_result); @@ -3657,9 +3509,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (progress) fprintf_ln(stderr, _("Total %"PRIu32" (delta %"PRIu32")," - " reused %"PRIu32" (delta %"PRIu32")," - " pack-reused %"PRIu32), - written, written_delta, reused, reused_delta, - reuse_packfile_objects); + " reused %"PRIu32" (delta %"PRIu32")"), + written, written_delta, reused, reused_delta); return 0; } diff --git a/csum-file.h b/csum-file.h index f9cbd317fb..a98b1eee53 100644 --- a/csum-file.h +++ b/csum-file.h @@ -42,15 +42,6 @@ void hashflush(struct hashfile *f); void crc32_begin(struct hashfile *); uint32_t crc32_end(struct hashfile *); -/* - * Returns the total number of bytes fed to the hashfile so far (including ones - * that have not been written out to the descriptor yet). - */ -static inline off_t hashfile_total(struct hashfile *f) -{ - return f->total + f->offset; -} - static inline void hashwrite_u8(struct hashfile *f, uint8_t data) { hashwrite(f, &data, sizeof(data)); diff --git a/ewah/bitmap.c b/ewah/bitmap.c index eac05485f1..52f1178db4 100644 --- a/ewah/bitmap.c +++ b/ewah/bitmap.c @@ -22,26 +22,21 @@ #define EWAH_MASK(x) ((eword_t)1 << (x % BITS_IN_EWORD)) #define EWAH_BLOCK(x) (x / BITS_IN_EWORD) -struct bitmap *bitmap_word_alloc(size_t word_alloc) +struct bitmap *bitmap_new(void) { struct bitmap *bitmap = xmalloc(sizeof(struct bitmap)); - bitmap->words = xcalloc(word_alloc, sizeof(eword_t)); - bitmap->word_alloc = word_alloc; + bitmap->words = xcalloc(32, sizeof(eword_t)); + bitmap->word_alloc = 32; return bitmap; } -struct bitmap *bitmap_new(void) -{ - return bitmap_word_alloc(32); -} - void bitmap_set(struct bitmap *self, size_t pos) { size_t block = EWAH_BLOCK(pos); if (block >= self->word_alloc) { size_t old_size = self->word_alloc; - self->word_alloc = (block + 1) * 2; + self->word_alloc = block * 2; REALLOC_ARRAY(self->words, self->word_alloc); memset(self->words + old_size, 0x0, (self->word_alloc - old_size) * sizeof(eword_t)); diff --git a/ewah/ewok.h b/ewah/ewok.h index 1b98b57c8b..84b2a29faa 100644 --- a/ewah/ewok.h +++ b/ewah/ewok.h @@ -172,7 +172,6 @@ struct bitmap { }; struct bitmap *bitmap_new(void); -struct bitmap *bitmap_word_alloc(size_t word_alloc); void bitmap_set(struct bitmap *self, size_t pos); int bitmap_get(struct bitmap *self, size_t pos); void bitmap_reset(struct bitmap *self); diff --git a/pack-bitmap.c b/pack-bitmap.c index cbfc544411..e07c798879 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -326,13 +326,6 @@ failed: munmap(bitmap_git->map, bitmap_git->map_size); bitmap_git->map = NULL; bitmap_git->map_size = 0; - - kh_destroy_oid_map(bitmap_git->bitmaps); - bitmap_git->bitmaps = NULL; - - kh_destroy_oid_pos(bitmap_git->ext_index.positions); - bitmap_git->ext_index.positions = NULL; - return -1; } @@ -629,7 +622,7 @@ static void show_objects_for_type( enum object_type object_type, show_reachable_fn show_reach) { - size_t i = 0; + size_t pos = 0, i = 0; uint32_t offset; struct ewah_iterator it; @@ -637,15 +630,13 @@ static void show_objects_for_type( struct bitmap *objects = bitmap_git->result; + if (bitmap_git->reuse_objects == bitmap_git->pack->num_objects) + return; + ewah_iterator_init(&it, type_filter); - for (i = 0; i < objects->word_alloc && - ewah_iterator_next(&filter, &it); i++) { + while (i < objects->word_alloc && ewah_iterator_next(&filter, &it)) { eword_t word = objects->words[i] & filter; - size_t pos = (i * BITS_IN_EWORD); - - if (!word) - continue; for (offset = 0; offset < BITS_IN_EWORD; ++offset) { struct object_id oid; @@ -657,6 +648,9 @@ static void show_objects_for_type( offset += ewah_bit_ctz64(word >> offset); + if (pos + offset < bitmap_git->reuse_objects) + continue; + entry = &bitmap_git->pack->revindex[pos + offset]; nth_packed_object_oid(&oid, bitmap_git->pack, entry->nr); @@ -665,6 +659,9 @@ static void show_objects_for_type( show_reach(&oid, object_type, 0, hash, bitmap_git->pack, entry->offset); } + + pos += BITS_IN_EWORD; + i++; } } @@ -771,139 +768,66 @@ cleanup: return NULL; } -static void try_partial_reuse(struct bitmap_index *bitmap_git, - size_t pos, - struct bitmap *reuse, - struct pack_window **w_curs) +int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, + struct packed_git **packfile, + uint32_t *entries, + off_t *up_to) { - struct revindex_entry *revidx; - off_t offset; - enum object_type type; - unsigned long size; - - if (pos >= bitmap_git->pack->num_objects) - return; /* not actually in the pack */ - - revidx = &bitmap_git->pack->revindex[pos]; - offset = revidx->offset; - type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size); - if (type < 0) - return; /* broken packfile, punt */ - - if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { - off_t base_offset; - int base_pos; - - /* - * Find the position of the base object so we can look it up - * in our bitmaps. If we can't come up with an offset, or if - * that offset is not in the revidx, the pack is corrupt. - * There's nothing we can do, so just punt on this object, - * and the normal slow path will complain about it in - * more detail. - */ - base_offset = get_delta_base(bitmap_git->pack, w_curs, - &offset, type, revidx->offset); - if (!base_offset) - return; - base_pos = find_revindex_position(bitmap_git->pack, base_offset); - if (base_pos < 0) - return; - - /* - * We assume delta dependencies always point backwards. This - * lets us do a single pass, and is basically always true - * due to the way OFS_DELTAs work. You would not typically - * find REF_DELTA in a bitmapped pack, since we only bitmap - * packs we write fresh, and OFS_DELTA is the default). But - * let's double check to make sure the pack wasn't written with - * odd parameters. - */ - if (base_pos >= pos) - return; - - /* - * And finally, if we're not sending the base as part of our - * reuse chunk, then don't send this object either. The base - * would come after us, along with other objects not - * necessarily in the pack, which means we'd need to convert - * to REF_DELTA on the fly. Better to just let the normal - * object_entry code path handle it. - */ - if (!bitmap_get(reuse, base_pos)) - return; - } - /* - * If we got here, then the object is OK to reuse. Mark it. + * Reuse the packfile content if we need more than + * 90% of its objects */ - bitmap_set(reuse, pos); -} + static const double REUSE_PERCENT = 0.9; -int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, - struct packed_git **packfile_out, - uint32_t *entries, - struct bitmap **reuse_out) -{ struct bitmap *result = bitmap_git->result; - struct bitmap *reuse; - struct pack_window *w_curs = NULL; - size_t i = 0; - uint32_t offset; + uint32_t reuse_threshold; + uint32_t i, reuse_objects = 0; assert(result); - while (i < result->word_alloc && result->words[i] == (eword_t)~0) - i++; + for (i = 0; i < result->word_alloc; ++i) { + if (result->words[i] != (eword_t)~0) { + reuse_objects += ewah_bit_ctz64(~result->words[i]); + break; + } - /* Don't mark objects not in the packfile */ - if (i > bitmap_git->pack->num_objects / BITS_IN_EWORD) - i = bitmap_git->pack->num_objects / BITS_IN_EWORD; + reuse_objects += BITS_IN_EWORD; + } - reuse = bitmap_word_alloc(i); - memset(reuse->words, 0xFF, i * sizeof(eword_t)); +#ifdef GIT_BITMAP_DEBUG + { + const unsigned char *sha1; + struct revindex_entry *entry; - for (; i < result->word_alloc; ++i) { - eword_t word = result->words[i]; - size_t pos = (i * BITS_IN_EWORD); + entry = &bitmap_git->reverse_index->revindex[reuse_objects]; + sha1 = nth_packed_object_sha1(bitmap_git->pack, entry->nr); - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) - break; - - offset += ewah_bit_ctz64(word >> offset); - try_partial_reuse(bitmap_git, pos + offset, reuse, &w_curs); - } + fprintf(stderr, "Failed to reuse at %d (%016llx)\n", + reuse_objects, result->words[i]); + fprintf(stderr, " %s\n", hash_to_hex(sha1)); } +#endif - unuse_pack(&w_curs); - - *entries = bitmap_popcount(reuse); - if (!*entries) { - bitmap_free(reuse); + if (!reuse_objects) return -1; + + if (reuse_objects >= bitmap_git->pack->num_objects) { + bitmap_git->reuse_objects = *entries = bitmap_git->pack->num_objects; + *up_to = -1; /* reuse the full pack */ + *packfile = bitmap_git->pack; + return 0; } - /* - * Drop any reused objects from the result, since they will not - * need to be handled separately. - */ - bitmap_and_not(result, reuse); - *packfile_out = bitmap_git->pack; - *reuse_out = reuse; - return 0; -} + reuse_threshold = bitmap_popcount(bitmap_git->result) * REUSE_PERCENT; -int bitmap_walk_contains(struct bitmap_index *bitmap_git, - struct bitmap *bitmap, const struct object_id *oid) -{ - int idx; + if (reuse_objects < reuse_threshold) + return -1; - if (!bitmap) - return 0; + bitmap_git->reuse_objects = *entries = reuse_objects; + *up_to = bitmap_git->pack->revindex[reuse_objects].offset; + *packfile = bitmap_git->pack; - idx = bitmap_position(bitmap_git, oid); - return idx >= 0 && bitmap_get(bitmap, idx); + return 0; } void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git, diff --git a/pack-bitmap.h b/pack-bitmap.h index ab64270e06..466c5afa09 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -3,7 +3,6 @@ #include "ewah/ewok.h" #include "khash.h" -#include "pack.h" #include "pack-objects.h" struct commit; @@ -50,13 +49,10 @@ void test_bitmap_walk(struct rev_info *revs); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs); int reuse_partial_packfile_from_bitmap(struct bitmap_index *, struct packed_git **packfile, - uint32_t *entries, - struct bitmap **bitmap); + uint32_t *entries, off_t *up_to); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); -int bitmap_walk_contains(struct bitmap_index *, - struct bitmap *bitmap, const struct object_id *oid); /* * After a traversal has been performed by prepare_bitmap_walk(), this can be diff --git a/packfile.c b/packfile.c index 81e66847bf..355066de17 100644 --- a/packfile.c +++ b/packfile.c @@ -1173,11 +1173,11 @@ const struct packed_git *has_packed_and_bad(struct repository *r, return NULL; } -off_t get_delta_base(struct packed_git *p, - struct pack_window **w_curs, - off_t *curpos, - enum object_type type, - off_t delta_obj_offset) +static off_t get_delta_base(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + enum object_type type, + off_t delta_obj_offset) { unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL); off_t base_offset; diff --git a/packfile.h b/packfile.h index ec536a4ae5..fc7904ec81 100644 --- a/packfile.h +++ b/packfile.h @@ -151,9 +151,6 @@ void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); -off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs, - off_t *curpos, enum object_type type, - off_t delta_obj_offset); void release_pack_memory(size_t);