From 65c2e0c349aa5c7f605defb52dc67f1b3658a1b9 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 2 Jun 2005 15:20:54 -0700
Subject: [PATCH] [PATCH] Find size of SHA1 object without inflating
 everything.

This adds sha1_file_size() helper function and uses it in the
rename/copy similarity estimator.  The helper function handles
deltified object as well.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 cache.h     |  1 +
 diff.c      | 11 +++++-----
 sha1_file.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/cache.h b/cache.h
index e54176b4b5..481f7c7870 100644
--- a/cache.h
+++ b/cache.h
@@ -154,6 +154,7 @@ extern void * map_sha1_file(const unsigned char *sha1, unsigned long *size);
 extern int unpack_sha1_header(z_stream *stream, void *map, unsigned long mapsize, void *buffer, unsigned long size);
 extern int parse_sha1_header(char *hdr, char *type, unsigned long *sizep);
 extern int sha1_delta_base(const unsigned char *, unsigned char *);
+extern int sha1_file_size(const unsigned char *, unsigned long *);
 extern void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size);
 extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size);
 extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
diff --git a/diff.c b/diff.c
index 7cf40daee5..5513632b9f 100644
--- a/diff.c
+++ b/diff.c
@@ -333,7 +333,6 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		close(fd);
 	}
 	else {
-		/* We cannot do size only for SHA1 blobs */
 		char type[20];
 		struct sha1_size_cache *e;
 
@@ -343,11 +342,13 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 				s->size = e->size;
 				return 0;
 			}
+			if (!sha1_file_size(s->sha1, &s->size))
+				locate_size_cache(s->sha1, s->size);
+		}
+		else {
+			s->data = read_sha1_file(s->sha1, type, &s->size);
+			s->should_free = 1;
 		}
-		s->data = read_sha1_file(s->sha1, type, &s->size);
-		s->should_free = 1;
-		if (s->data && size_only)
-			locate_size_cache(s->sha1, s->size);
 	}
 	return 0;
 }
diff --git a/sha1_file.c b/sha1_file.c
index ccfcca07c7..a2ba4c81db 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -432,6 +432,66 @@ int sha1_delta_base(const unsigned char *sha1, unsigned char *base_sha1)
 	return ret;
 }
 
+int sha1_file_size(const unsigned char *sha1, unsigned long *sizep)
+{
+	int ret, status;
+	unsigned long mapsize, size;
+	void *map;
+	z_stream stream;
+	char hdr[64], type[20];
+	const unsigned char *data;
+	unsigned char cmd;
+	int i;
+
+	map = map_sha1_file(sha1, &mapsize);
+	if (!map)
+		return -1;
+	ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
+	status = -1;
+	if (ret < Z_OK || parse_sha1_header(hdr, type, &size) < 0)
+		goto out;
+	if (strcmp(type, "delta")) {
+		*sizep = size;
+		status = 0;
+		goto out;
+	}
+
+	/* We are dealing with a delta object.  Inflated, the first
+	 * 20 bytes hold the base object SHA1, and delta data follows
+	 * immediately after it.
+	 *
+	 * The initial part of the delta starts at delta_data_head +
+	 * 20.  Borrow code from patch-delta to read the result size.
+	 */
+	data = hdr + strlen(hdr) + 1 + 20;
+
+	/* Skip over the source size; we are not interested in
+	 * it and we cannot verify it because we do not want
+	 * to read the base object.
+	 */
+	cmd = *data++;
+	while (cmd) {
+		if (cmd & 1)
+			data++;
+		cmd >>= 1;
+	}
+	/* Read the result size */
+	size = i = 0;
+	cmd = *data++;
+	while (cmd) {
+		if (cmd & 1)
+			size |= *data++ << i;
+		i += 8;
+		cmd >>= 1;
+	}
+	*sizep = size;
+	status = 0;
+ out:
+	inflateEnd(&stream);
+	munmap(map, mapsize);
+	return status;
+}
+
 void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size)
 {
 	unsigned long mapsize;
-- 
2.40.0