From: Charles Kerr Date: Sat, 16 Feb 2019 20:19:38 +0000 (-0500) Subject: feat: make multiscrape limits adaptive (#837) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2013772aad85d4adde8fc35e44ca63a3deefb7e9;p=transmission feat: make multiscrape limits adaptive (#837) * feat: make multiscrape limits adaptive Previously hardcoded by TR_MULTISCRAPE_MAX. This change makes that the initial value, then incrementally lowers the value when multiscrapes get "req too long" style errors. * fix: don't log personal announce url * chore: treat HTTP 400 as scrape-too-long * chore: copyediting * chore: copyediting * move 'const' to the right of the type * make conditional tests explicitly boolean * make 'key' const * don't lookup a value we already have * make an array for known too-big scrape error messages * improved multiscrape throttle logging * fix: multiscraping of low numbers of torrents Handle the case of getting a 'multiscrape too big' error message back even if the user fewer than TR_MULTISCRAPE_MAX torrents. * uncrustify * fix oops * refactor: remove TR_MULTISCRAPE_MIN Is there any reason to have a minimum batch size? * make test explicit boolean Co-Authored-By: ckerr * improve declaration of too_long_errors Co-Authored-By: ckerr * make test explicitly boolean Co-Authored-By: ckerr * make test explicitly boolean Co-Authored-By: ckerr * improve looping decl of too_long_errors --- diff --git a/libtransmission/announcer-common.h b/libtransmission/announcer-common.h index 03562adbe..ee162d1da 100644 --- a/libtransmission/announcer-common.h +++ b/libtransmission/announcer-common.h @@ -25,8 +25,12 @@ enum * - ocelot has no upper bound * - opentracker has an upper bound of 64 * - udp protocol has an upper bound of 74 - * - xbtt has no upper bound */ - TR_MULTISCRAPE_MAX = 64 + * - xbtt has no upper bound + * + * This is only an upper bound: if the tracker complains about + * length, announcer will incrementally lower the batch size. + */ + TR_MULTISCRAPE_MAX = 100 }; typedef struct diff --git a/libtransmission/announcer.c b/libtransmission/announcer.c index b9ac1f90b..e59943afd 100644 --- a/libtransmission/announcer.c +++ b/libtransmission/announcer.c @@ -59,7 +59,10 @@ enum /* */ UPKEEP_INTERVAL_SECS = 1, /* this is how often to call the UDP tracker upkeep */ - TAU_UPKEEP_INTERVAL_SECS = 5 + TAU_UPKEEP_INTERVAL_SECS = 5, + + /* how many infohashes to remove when we get a scrape-too-long error */ + TR_MULTISCRAPE_STEP = 5 }; /*** @@ -130,7 +133,7 @@ static int compareStops(void const* va, void const* vb) return i; } - /* tertiary key: the tracker's announec url */ + /* tertiary key: the tracker's announce url */ return tr_strcmp0(a->url, b->url); } @@ -138,12 +141,35 @@ static int compareStops(void const* va, void const* vb) **** ***/ +struct tr_scrape_info +{ + char* url; + + int multiscrape_max; +}; + +static void scrapeInfoFree(void* va) +{ + struct tr_scrape_info* a = va; + + tr_free(a->url); + tr_free(a); +} + +static int compareScrapeInfo(void const* va, void const* vb) +{ + struct tr_scrape_info const* a = va; + struct tr_scrape_info const* b = vb; + return tr_strcmp0(a->url, b->url); +} + /** * "global" (per-tr_session) fields */ typedef struct tr_announcer { tr_ptrArray stops; /* tr_announce_request */ + tr_ptrArray scrape_info; /* struct tr_scrape_info */ tr_session* session; struct event* upkeepTimer; @@ -153,6 +179,31 @@ typedef struct tr_announcer } tr_announcer; +static struct tr_scrape_info* tr_announcerGetScrapeInfo(struct tr_announcer* announcer, char const* url) +{ + struct tr_scrape_info* info = NULL; + + if (url != NULL && *url != '\0') + { + bool found; + struct tr_scrape_info const key = { .url = (char*)url }; + int const pos = tr_ptrArrayLowerBound(&announcer->scrape_info, &key, compareScrapeInfo, &found); + if (found) + { + info = tr_ptrArrayNth(&announcer->scrape_info, pos); + } + else + { + info = tr_new0(struct tr_scrape_info, 1); + info->url = tr_strdup(url); + info->multiscrape_max = TR_MULTISCRAPE_MAX; + tr_ptrArrayInsert(&announcer->scrape_info, info, pos); + } + } + + return info; +} + bool tr_announcerHasBacklog(struct tr_announcer const* announcer) { return announcer->slotsAvailable < 1; @@ -189,6 +240,7 @@ void tr_announcerClose(tr_session* session) announcer->upkeepTimer = NULL; tr_ptrArrayDestruct(&announcer->stops, NULL); + tr_ptrArrayDestruct(&announcer->scrape_info, scrapeInfoFree); session->announcer = NULL; tr_free(announcer); @@ -203,7 +255,7 @@ typedef struct { char* key; char* announce; - char* scrape; + struct tr_scrape_info* scrape_info; char* tracker_id_str; @@ -234,12 +286,12 @@ static char* getKey(char const* url) return ret; } -static void trackerConstruct(tr_tracker* tracker, tr_tracker_info const* inf) +static void trackerConstruct(tr_announcer* announcer, tr_tracker* tracker, tr_tracker_info const* inf) { memset(tracker, 0, sizeof(tr_tracker)); tracker->key = getKey(inf->announce); tracker->announce = tr_strdup(inf->announce); - tracker->scrape = tr_strdup(inf->scrape); + tracker->scrape_info = tr_announcerGetScrapeInfo(announcer, inf->scrape); tracker->id = inf->id; tracker->seederCount = -1; tracker->leecherCount = -1; @@ -249,7 +301,6 @@ static void trackerConstruct(tr_tracker* tracker, tr_tracker_info const* inf) static void trackerDestruct(tr_tracker* tracker) { tr_free(tracker->tracker_id_str); - tr_free(tracker->scrape); tr_free(tracker->announce); tr_free(tracker->key); } @@ -681,7 +732,7 @@ static void addTorrentToTier(tr_torrent_tiers* tt, tr_torrent* tor) for (int i = 0; i < n; ++i) { - trackerConstruct(&tt->trackers[i], &infos[i]); + trackerConstruct(tor->session->announcer, &tt->trackers[i], &infos[i]); } /* count how many tiers there are */ @@ -1195,7 +1246,7 @@ static void on_announce_done(tr_announce_response const* response, void* vdata) /* if the tracker included scrape fields in its announce response, then a separate scrape isn't needed */ - if (scrape_fields >= 3 || (scrape_fields >= 1 && tracker->scrape == NULL)) + if (scrape_fields >= 3 || (scrape_fields >= 1 && tracker->scrape_info == NULL)) { tr_logAddTorDbg(tier->tor, "Announce response contained scrape info; " "rescheduling next scrape to %d seconds from now.", tier->scrapeIntervalSec); @@ -1303,6 +1354,33 @@ static void tierAnnounce(tr_announcer* announcer, tr_tier* tier) **** ***/ +static bool multiscrape_too_big(char const* errmsg) +{ + /* Found a tracker that returns some bespoke string for this case? + Add your patch here and open a PR */ + static char const* const too_long_errors[] = + { + "Bad Request", + "GET string too long", + "Request-URI Too Long" + }; + + if (errmsg == NULL) + { + return false; + } + + for (size_t i = 0; i < TR_N_ELEMENTS(too_long_errors); ++i) + { + if (strstr(errmsg, too_long_errors[i]) != NULL) + { + return true; + } + } + + return false; +} + static void on_scrape_error(tr_session* session, tr_tier* tier, char const* errmsg) { int interval; @@ -1337,7 +1415,9 @@ static tr_tier* find_tier(tr_torrent* tor, char const* scrape) { tr_tracker const* const tracker = tt->tiers[i].currentTracker; - if (tracker != NULL && tr_strcmp0(scrape, tracker->scrape) == 0) + if (tracker != NULL && + tracker->scrape_info != NULL && + tr_strcmp0(scrape, tracker->scrape_info->url) == 0) { return &tt->tiers[i]; } @@ -1434,6 +1514,38 @@ static void on_scrape_done(tr_scrape_response const* response, void* vsession) } } + /* Maybe reduce the number of torrents in a multiscrape req */ + if (multiscrape_too_big(response->errmsg)) + { + char const* url = response->url; + int* multiscrape_max = &tr_announcerGetScrapeInfo(announcer, url)->multiscrape_max; + + /* Lower the max only if it hasn't already lowered for a similar error. + For example if N parallel multiscrapes all have the same `max` and + error out, lower the value once for that batch, not N times. */ + if (*multiscrape_max >= response->row_count) + { + int const n = MAX(1, *multiscrape_max - TR_MULTISCRAPE_STEP); + if (*multiscrape_max != n) + { + char* scheme = NULL; + char* host = NULL; + int port; + if (tr_urlParse(url, strlen(url), &scheme, &host, &port, NULL)) + { + /* don't log the full URL, since that might have a personal announce id */ + char* sanitized_url = tr_strdup_printf("%s://%s:%d", scheme, host, port); + tr_logAddNamedInfo(sanitized_url, "Reducing multiscrape max to %d", n); + tr_free(sanitized_url); + tr_free(host); + tr_free(scheme); + } + + *multiscrape_max = n; + } + } + } + if (announcer) { ++announcer->slotsAvailable; @@ -1471,23 +1583,23 @@ static void multiscrape(tr_announcer* announcer, tr_ptrArray* tiers) for (int i = 0; i < tier_count; ++i) { tr_tier* tier = tr_ptrArrayNth(tiers, i); - char* url = tier->currentTracker->scrape; + struct tr_scrape_info* const scrape_info = tier->currentTracker->scrape_info; uint8_t const* hash = tier->tor->info.hash; bool found = false; - TR_ASSERT(url != NULL); + TR_ASSERT(scrape_info != NULL); /* if there's a request with this scrape URL and a free slot, use it */ for (int j = 0; !found && j < request_count; ++j) { tr_scrape_request* req = &requests[j]; - if (req->info_hash_count >= TR_MULTISCRAPE_MAX) + if (req->info_hash_count >= scrape_info->multiscrape_max) { continue; } - if (tr_strcmp0(req->url, url) != 0) + if (tr_strcmp0(req->url, scrape_info->url) != 0) { continue; } @@ -1502,7 +1614,7 @@ static void multiscrape(tr_announcer* announcer, tr_ptrArray* tiers) if (!found && request_count < max_request_count) { tr_scrape_request* req = &requests[request_count++]; - req->url = url; + req->url = scrape_info->url; tier_build_log_name(tier, req->log_name, sizeof(req->log_name)); memcpy(req->info_hash[req->info_hash_count++], hash, SHA_DIGEST_LENGTH); @@ -1540,7 +1652,7 @@ static bool tierNeedsToAnnounce(tr_tier const* tier, time_t const now) static bool tierNeedsToScrape(tr_tier const* tier, time_t const now) { return !tier->isScraping && tier->scrapeAt != 0 && tier->scrapeAt <= now && tier->currentTracker != NULL && - tier->currentTracker->scrape != NULL; + tier->currentTracker->scrape_info != NULL; } static int compareTiers(void const* va, void const* vb) @@ -1691,9 +1803,9 @@ tr_tracker_stat* tr_announcerStats(tr_torrent const* torrent, int* setmeTrackerC st->isBackup = tracker != tier->currentTracker; st->lastScrapeStartTime = tier->lastScrapeStartTime; - if (tracker->scrape != NULL) + if (tracker->scrape_info != NULL) { - tr_strlcpy(st->scrape, tracker->scrape, sizeof(st->scrape)); + tr_strlcpy(st->scrape, tracker->scrape_info->url, sizeof(st->scrape)); } else {