From: Greg Stein <gstein@apache.org>
Date: Wed, 8 Nov 2000 11:22:07 +0000 (+0000)
Subject: fix the byterange filter.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5bdee5c69490c39d731d5d92e8bb4ef32b000aac;p=apache

fix the byterange filter.

there is still some bogosity in there (huge buffer allocs!), and some
optimizations to be made, but this appears to fix byterange handling.


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@86865 13f79535-47bb-0310-9956-ffa450edef68
---

diff --git a/include/httpd.h b/include/httpd.h
index 2768ff00dd..76a0ea3ca9 100644
--- a/include/httpd.h
+++ b/include/httpd.h
@@ -694,10 +694,8 @@ struct request_rec {
 
     /** sending chunked transfer-coding */
     int chunked;
-    /** number of byte ranges */
-    int byterange;
     /** multipart/byteranges boundary */
-    char *boundary;
+    const char *boundary;
     /** The Range: header */
     const char *range;
     /** The "real" content length */
diff --git a/modules/http/http_protocol.c b/modules/http/http_protocol.c
index 8e1c062327..6993df842e 100644
--- a/modules/http/http_protocol.c
+++ b/modules/http/http_protocol.c
@@ -181,37 +181,24 @@ static int parse_byterange(char *range, apr_off_t clength,
 
 typedef struct byterange_ctx {
     ap_bucket_brigade *bb;
+    const char *bound_head;
+    int num_ranges;
 } byterange_ctx;
 
-/* If we are dealing with a file bucket, there are some interesting
- * optimizations that can be made later, but for now this should work
- * for all bucket types, and later we should optimize this for
- * file buckets.
- */
-AP_CORE_DECLARE(apr_status_t) ap_byterange_filter(ap_filter_t *f, ap_bucket_brigade *bb)
+#define BYTERANGE_FMT "%" APR_OFF_T_FMT "-%" APR_OFF_T_FMT "/%" APR_OFF_T_FMT
+
+AP_CORE_DECLARE_NONSTD(apr_status_t) ap_byterange_filter(
+    ap_filter_t *f,
+    ap_bucket_brigade *bb)
 {
 #define MIN_LENGTH(len1, len2) ((len1 > len2) ? len2 : len1)
     request_rec *r = f->r;
-    byterange_ctx *ctx;
+    byterange_ctx *ctx = f->ctx;
     ap_bucket *e;
-    apr_off_t curr_offset = 0;
-    apr_size_t curr_length = 0;
-    ap_bucket_brigade *bsend = ap_brigade_create(r->pool);
-    long range_start, range_end;
-    char *range;
-    char *ts;
+    ap_bucket_brigade *bsend;
+    apr_off_t range_start;
+    apr_off_t range_end;
     char *current;
-    char *end = apr_pstrcat(r->pool, CRLF "--", r->boundary, "--" CRLF, NULL);
-    char *bound = apr_pstrcat(r->pool, CRLF "--", r->boundary,
-                                    CRLF "Content-type: ", 
-                                    make_content_type(r, r->content_type),
-                                    CRLF "Content-range: bytes ", 
-                                    NULL);
-
-    ctx = f->ctx;
-    if (ctx == NULL) {
-        f->ctx = ctx = apr_pcalloc(r->pool, sizeof(*ctx));
-    }
 
     /* We can't actually deal with byte-ranges until we have the whole brigade
      * because the byte-ranges can be in any order, and according to the RFC,
@@ -221,74 +208,117 @@ AP_CORE_DECLARE(apr_status_t) ap_byterange_filter(ap_filter_t *f, ap_bucket_brig
         ap_save_brigade(f, &ctx->bb, &bb);
         return APR_SUCCESS;
     }
-    AP_BRIGADE_CONCAT(ctx->bb, bb);
 
+    /* concat the passed brigade with our saved brigade */
+    AP_BRIGADE_CONCAT(ctx->bb, bb);
     bb = ctx->bb;
+    ctx->bb = NULL;     /* ### strictly necessary? */
+
+    /* this brigade holds what we will be sending */
+    bsend = ap_brigade_create(r->pool);
+
     while ((current = ap_getword(r->pool, &r->range, ',')) &&
            parse_byterange(current, r->clength, &range_start, &range_end)) {
         const char *str;
         apr_size_t n;
+        const char *range;
         char *loc;
+        apr_size_t range_length = (apr_size_t)(range_end - range_start + 1);
+        apr_size_t curr_length = range_length;
+        apr_size_t segment_length;
+        apr_off_t curr_offset = 0;
 
-        curr_length = range_end - range_start + 1;
-        loc = range = apr_pcalloc(r->pool, curr_length + 1);
+        /* ### this is so bogus, but not dealing with it right now */
+        range = loc = apr_pcalloc(r->pool, range_length + 1);
 
         e = AP_BRIGADE_FIRST(bb);
-        curr_offset = 0;
+
+        /* ### we should split() buckets rather than read() them. this
+           ### will allow us to avoid reading files or custom buckets
+           ### into memory. for example: we REALLY don't want to cycle
+           ### a 10gig file into memory just to send out 100 bytes from
+           ### the end of the file.
+           ###
+           ### content generators that used to call ap_each_byterange()
+           ### manually (thus, optimizing the output) can replace their
+           ### behavior with a new bucket type that understands split().
+           ### they can then defer reading actual content until a read()
+           ### occurs, using the split() as an internal "seek".
+        */
+
         ap_bucket_read(e, &str, &n, AP_NONBLOCK_READ);
+        /* ### using e->length doesn't account for pipes */
         while (range_start > (curr_offset + e->length)) {
             curr_offset += e->length;
             e = AP_BUCKET_NEXT(e);
+
             if (e == AP_BRIGADE_SENTINEL(bb)) {
                 break;
             }
+
+            /* ### eventually we can avoid this */
+            ap_bucket_read(e, &str, &n, AP_NONBLOCK_READ);
         }
         if (range_start != curr_offset) {
             /* If we get here, then we know that the beginning of this 
              * byte-range occurs someplace in the middle of the current bucket
              */
-
-            apr_cpystrn(loc, str + (range_start - curr_offset), 
-                        MIN_LENGTH(curr_length + 1, e->length));
-            loc += MIN_LENGTH(curr_length + 1, e->length);
-            curr_length -= MIN_LENGTH(curr_length + 1, e->length);
+            /* ### when we split above, we should read here */
+            segment_length = MIN_LENGTH(curr_length + 1, e->length);
+            memcpy(loc, str + (range_start - curr_offset), segment_length);
+            loc += segment_length;
+            curr_length -= segment_length;
             e = AP_BUCKET_NEXT(e);
         }
-        
-        do {
+
+        while (e != AP_BRIGADE_SENTINEL(bb)) {
             if (curr_length == 0) {
                 break;
             }
             ap_bucket_read(e, &str, &n, AP_NONBLOCK_READ);
-            apr_cpystrn(loc, str + (range_start - curr_offset), 
-                        MIN_LENGTH(curr_length + 1, e->length));
-            loc += MIN_LENGTH(curr_length + 1, e->length);
-            curr_length -= MIN_LENGTH(curr_length + 1, e->length);
+
+            /* ### we should use 'n', not e->length */
+            segment_length = MIN_LENGTH(curr_length + 1, e->length);
+
+            memcpy(loc, str, segment_length);
+            loc += segment_length;
+            curr_length -= segment_length;
             e = AP_BUCKET_NEXT(e);
-        } while (e != AP_BRIGADE_SENTINEL(bb));
+        }
+
+        if (ctx->num_ranges > 1) {
+            const char *ts;
 
-        if (r->byterange > 1) {
-            e = ap_bucket_create_pool(bound, strlen(bound), r->pool);
+            e = ap_bucket_create_pool(ctx->bound_head,
+                                      strlen(ctx->bound_head), r->pool);
             AP_BRIGADE_INSERT_TAIL(bsend, e);
 
-            ts = apr_pcalloc(r->pool, MAX_STRING_LEN);
-            apr_snprintf(ts, MAX_STRING_LEN, "%ld-%ld/%ld" CRLF CRLF, 
-                         range_start, range_end, r->clength);
+            ts = apr_psprintf(r->pool, BYTERANGE_FMT CRLF CRLF,
+                              range_start, range_end, r->clength);
             e = ap_bucket_create_pool(ts, strlen(ts), r->pool);
             AP_BRIGADE_INSERT_TAIL(bsend, e);
         }
         
-        e = ap_bucket_create_pool(range, strlen(range), r->pool);
+        e = ap_bucket_create_pool(range, range_length, r->pool);
         AP_BRIGADE_INSERT_TAIL(bsend, e);
     }
-    if (r->byterange > 1) {
+
+    if (ctx->num_ranges > 1) {
+        const char *end;
+
+        /* add the final boundary */
+        end = apr_pstrcat(r->pool, CRLF "--", r->boundary, "--" CRLF, NULL);
         e = ap_bucket_create_pool(end, strlen(end), r->pool);
         AP_BRIGADE_INSERT_TAIL(bsend, e);
     }
+
     e = ap_bucket_create_eos();
     AP_BRIGADE_INSERT_TAIL(bsend, e);
 
+    /* we're done with the original content */
     ap_brigade_destroy(bb);
+
+    /* send our multipart output */
     return ap_pass_brigade(f->next, bsend); 
 }    
 
@@ -2239,8 +2269,13 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_content_length_filter(ap_filter_t *f,
 
 static int ap_set_byterange(request_rec *r)
 {
-    const char *range, *if_range, *match;
-    long range_start, range_end;
+    const char *range;
+    const char *if_range;
+    const char *match;
+    const char *ct;
+    apr_off_t range_start;
+    apr_off_t range_end;
+    int num_ranges;
 
     if (!r->clength || r->assbackwards)
         return 0;
@@ -2277,54 +2312,95 @@ static int ap_set_byterange(request_rec *r)
             return 0;
     }
 
+    /* ### would be nice to pick this up from f->ctx */
+    ct = make_content_type(r, r->content_type);
+
     if (!ap_strchr_c(range, ',')) {
         /* A single range */
-        if (!parse_byterange(apr_pstrdup(r->pool, range + 6), r->clength, &range_start, &range_end)) {
+
+        /* parse_byterange() modifies the contents, so make a copy */
+        if (!parse_byterange(apr_pstrdup(r->pool, range + 6), r->clength,
+                             &range_start, &range_end)) {
             return 0;
         }
         apr_table_setn(r->headers_out, "Content-Range",
-                       apr_psprintf(r->pool,
-                                    "bytes %" APR_OFF_T_FMT "-%" APR_OFF_T_FMT
-                                    "/%" APR_OFF_T_FMT,
+                       apr_psprintf(r->pool, "bytes " BYTERANGE_FMT,
                                     range_start, range_end, r->clength));
         apr_table_setn(r->headers_out, "Content-Length",
                        apr_psprintf(r->pool, "%" APR_OFF_T_FMT,
                                     range_end - range_start + 1));
+	apr_table_setn(r->headers_out, "Content-Type", ct);
 
-        r->byterange = 1;
+        num_ranges = 1;
     }
     else {
-#if 0
-        const char *temp = apr_pstrdup(r->pool, range + 6);
+        const char *work_range = range;
         char *current;
-        long tlength = 0;
-#endif
+        apr_off_t tlength = 0;
+
         /* a multiple range */
 
-        r->byterange = 2;
+        num_ranges = 2;
+
+        /* ### it would be nice if r->boundary was in f->ctx */
         r->boundary = apr_psprintf(r->pool, "%qx%lx",
-				r->request_time, (long) getpid());
-#if 0
-        /* This computes the content-length for the actual data, but it does
-         * not add the length of the byte-range headers, so it is an invalid
-         * content-length.  I am leaving the code here, so that other people
-         * can see what I have done. rbb
-         */
-        while ((current = ap_getword(r->pool, &temp, ',')) &&
-               parse_byterange(current, r->clength, &range_start, &range_end)) {
-            tlength += (range_end - range_start + 2);
+                                   r->request_time, (long) getpid());
+
+        /* compute the length for the actual data plus the boundary headers */
+        while ((current = ap_getword(r->pool, &work_range, ','))
+               && parse_byterange(current, r->clength,
+                                  &range_start, &range_end)) {
+            char rng[MAX_STRING_LEN];
+
+            apr_snprintf(rng, sizeof(rng), BYTERANGE_FMT,
+                         range_start, range_end, r->clength);
+            /* CRLF "--" boundary
+               CRLF "Content-type: " ct
+               CRLF "Content-range: bytes " rng
+               CRLF CRLF
+               content
+            */
+            tlength += (4 + strlen(r->boundary) + 16 + strlen(ct) +
+                        23 + strlen(rng) + 4 +
+                        range_end - range_start + 1);
         }
+
+        /* add final boundary length: CRLF "--" boundary "--" CRLF */
+        tlength += 4 + strlen(r->boundary) + 4;
+
         apr_table_setn(r->headers_out, "Content-Length",
-            apr_psprintf(r->pool, "%ld", tlength));
-#endif
+                       apr_psprintf(r->pool, "%" APR_OFF_T_FMT, tlength));
+        apr_table_setn(r->headers_out, "Content-Type",
+		       apr_pstrcat(r->pool,
+                                   "multipart", use_range_x(r) ? "/x-" : "/",
+				   "byteranges; boundary=", r->boundary,
+                                   NULL));
     }
 
     r->status = HTTP_PARTIAL_CONTENT;
     r->range = range + 6;
 
-    return 1;
+    return num_ranges;
 }
 
+static void add_byterange_filter(request_rec *r, int num_ranges)
+{
+    byterange_ctx *ctx = apr_pcalloc(r->pool, sizeof(*ctx));
+    const char *ct = make_content_type(r, r->content_type);
+
+    /* we will always need this */
+    ctx->bb = ap_brigade_create(r->pool);
+
+    /* compute this once (it is an invariant) and store it away */
+    ctx->bound_head = apr_pstrcat(r->pool, CRLF "--", r->boundary,
+                                  CRLF "Content-type: ", ct,
+                                  CRLF "Content-range: bytes ", 
+                                  NULL);
+
+    ctx->num_ranges = num_ranges;
+
+    ap_add_output_filter("BYTERANGE", ctx, r, r->connection);
+}
 
 AP_CORE_DECLARE_NONSTD(apr_status_t) ap_http_header_filter(ap_filter_t *f, ap_bucket_brigade *b)
 {
@@ -2336,6 +2412,7 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_http_header_filter(ap_filter_t *f, ap_bu
     ap_bucket_brigade *b2;
     apr_size_t len = 0;
     header_struct h;
+    int num_ranges;
 
     AP_DEBUG_ASSERT(!r->main);
 
@@ -2379,23 +2456,7 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_http_header_filter(ap_filter_t *f, ap_bu
 /*      ap_add_output_filter("COALESCE", NULL, r, r->connection); */
     }
 
-    ap_set_byterange(r);
-
-    if (r->byterange > 1) {
-        apr_table_setn(r->headers_out, "Content-Type",
-		       apr_pstrcat(r->pool, "multipart",
-				   use_range_x(r) ? "/x-" : "/",
-				   "byteranges; boundary=",
-				   r->boundary, NULL));
-        /* Remove the content-length until we figure out how to compute
-         * it correctly.
-         */
-        apr_table_unset(r->headers_out, "Content-Length");
-    }
-    else {
-	apr_table_setn(r->headers_out, "Content-Type",
-		       make_content_type(r, r->content_type));
-    }
+    num_ranges = ap_set_byterange(r);
 
     if (r->content_encoding) {
         apr_table_setn(r->headers_out, "Content-Encoding",
@@ -2456,12 +2517,10 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_http_header_filter(ap_filter_t *f, ap_bu
          */
         ap_add_output_filter("CHUNK", NULL, r, r->connection);
     }
-    if (r->byterange) {
-        /* We can't add this filter until we have already sent the headers.
-         * If we add it before this point, then the headers will be chunked
-         * as well, and that is just wrong.
-         */
-        ap_add_output_filter("BYTERANGE", NULL, r, r->connection);
+    if (num_ranges) {
+        /* wait until the headers have been sent before adding the byterange
+           filter */
+        add_byterange_filter(r, num_ranges);
     }
 
     /* Don't remove this filter until after we have added the CHUNK filter.