From 0cc30e0e842a25846e76e09f62a1d425a25ee556 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Apr 2015 05:01:38 -0400 Subject: [PATCH] strbuf_getwholeline: use getdelim if it is available We spend a lot of time in strbuf_getwholeline in a tight loop reading characters from a stdio handle into a buffer. The libc getdelim() function can do this for us with less overhead. It's in POSIX.1-2008, and was a GNU extension before that. Therefore we can't rely on it, but can fall back to the existing getc loop when it is not available. The HAVE_GETDELIM knob is turned on automatically for Linux, where we have glibc. We don't need to set any new feature-test macros, because we already define _GNU_SOURCE. Other systems that implement getdelim may need to other macros (probably _POSIX_C_SOURCE >= 200809L), but we can address that along with setting the Makefile knob after testing the feature on those systems. Running "git rev-parse refs/heads/does-not-exist" on a repo with an extremely large (1.6GB) packed-refs file went from (best-of-5): real 0m8.601s user 0m8.084s sys 0m0.524s to: real 0m6.768s user 0m6.340s sys 0m0.432s for a wall-clock speedup of 21%. Based on a patch from Rasmus Villemoes . Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Makefile | 6 ++++++ config.mak.uname | 1 + strbuf.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/Makefile b/Makefile index 5f3987fe3b..36655d5a16 100644 --- a/Makefile +++ b/Makefile @@ -359,6 +359,8 @@ all:: # compiler is detected to support it. # # Define HAVE_BSD_SYSCTL if your platform has a BSD-compatible sysctl function. +# +# Define HAVE_GETDELIM if your system has the getdelim() function. GIT-VERSION-FILE: FORCE @$(SHELL_PATH) ./GIT-VERSION-GEN @@ -1437,6 +1439,10 @@ ifdef HAVE_BSD_SYSCTL BASIC_CFLAGS += -DHAVE_BSD_SYSCTL endif +ifdef HAVE_GETDELIM + BASIC_CFLAGS += -DHAVE_GETDELIM +endif + ifeq ($(TCLTK_PATH),) NO_TCLTK = NoThanks endif diff --git a/config.mak.uname b/config.mak.uname index f4e77cb9e5..d26665fa54 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -36,6 +36,7 @@ ifeq ($(uname_S),Linux) HAVE_DEV_TTY = YesPlease HAVE_CLOCK_GETTIME = YesPlease HAVE_CLOCK_MONOTONIC = YesPlease + HAVE_GETDELIM = YesPlease endif ifeq ($(uname_S),GNU/kFreeBSD) HAVE_ALLOCA_H = YesPlease diff --git a/strbuf.c b/strbuf.c index 921619ea68..0d4f4e54ec 100644 --- a/strbuf.c +++ b/strbuf.c @@ -435,6 +435,47 @@ int strbuf_getcwd(struct strbuf *sb) return -1; } +#ifdef HAVE_GETDELIM +int strbuf_getwholeline(struct strbuf *sb, FILE *fp, int term) +{ + ssize_t r; + + if (feof(fp)) + return EOF; + + strbuf_reset(sb); + + /* Translate slopbuf to NULL, as we cannot call realloc on it */ + if (!sb->alloc) + sb->buf = NULL; + r = getdelim(&sb->buf, &sb->alloc, term, fp); + + if (r > 0) { + sb->len = r; + return 0; + } + assert(r == -1); + + /* + * Normally we would have called xrealloc, which will try to free + * memory and recover. But we have no way to tell getdelim() to do so. + * Worse, we cannot try to recover ENOMEM ourselves, because we have + * no idea how many bytes were read by getdelim. + * + * Dying here is reasonable. It mirrors what xrealloc would do on + * catastrophic memory failure. We skip the opportunity to free pack + * memory and retry, but that's unlikely to help for a malloc small + * enough to hold a single line of input, anyway. + */ + if (errno == ENOMEM) + die("Out of memory, getdelim failed"); + + /* Restore slopbuf that we moved out of the way before */ + if (!sb->buf) + strbuf_init(sb, 0); + return EOF; +} +#else int strbuf_getwholeline(struct strbuf *sb, FILE *fp, int term) { int ch; @@ -458,6 +499,7 @@ int strbuf_getwholeline(struct strbuf *sb, FILE *fp, int term) sb->buf[sb->len] = '\0'; return 0; } +#endif int strbuf_getline(struct strbuf *sb, FILE *fp, int term) { -- 2.40.0