From: Mike Gelfand <mikedld@mikedld.com>
Date: Sun, 23 Jun 2019 13:23:22 +0000 (+0300)
Subject: Sanitize suspicious path components instead of rejecting them
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f24b202743e8f37df0e4078f104a2619e3ed7067;p=transmission

Sanitize suspicious path components instead of rejecting them

Apply the same rules on all the supported platforms to avoid issues
with network shares and alien file systems.

For future compatibility, explicitly mark adjusted paths as renamed.
---

diff --git a/libtransmission/metainfo-test.c b/libtransmission/metainfo-test.c
index 930d15141..928075001 100644
--- a/libtransmission/metainfo-test.c
+++ b/libtransmission/metainfo-test.c
@@ -9,6 +9,7 @@
 #include "libtransmission-test.h"
 
 #include "transmission.h"
+#include "metainfo.h"
 #include "utils.h"
 
 #include <errno.h>
@@ -66,16 +67,16 @@ static int test_metainfo(void)
         { 0, TR_PARSE_OK, BEFORE_PATH "0:5:a.txt" AFTER_PATH },
         { 0, TR_PARSE_ERR, BEFORE_PATH "0:0:" AFTER_PATH },
 
-        /* don't allow path components in a filename */
-        { 0, TR_PARSE_ERR, BEFORE_PATH "7:a/a.txt" AFTER_PATH },
+        /* allow path separators in a filename (replaced with '_') */
+        { 0, TR_PARSE_OK, BEFORE_PATH "7:a/a.txt" AFTER_PATH },
 
-        /* fail on "." components */
-        { 0, TR_PARSE_ERR, BEFORE_PATH "1:.5:a.txt" AFTER_PATH },
-        { 0, TR_PARSE_ERR, BEFORE_PATH "5:a.txt1:." AFTER_PATH },
+        /* allow "." components (skipped) */
+        { 0, TR_PARSE_OK, BEFORE_PATH "1:.5:a.txt" AFTER_PATH },
+        { 0, TR_PARSE_OK, BEFORE_PATH "5:a.txt1:." AFTER_PATH },
 
-        /* fail on ".." components */
-        { 0, TR_PARSE_ERR, BEFORE_PATH "2:..5:a.txt" AFTER_PATH },
-        { 0, TR_PARSE_ERR, BEFORE_PATH "5:a.txt2:.." AFTER_PATH },
+        /* allow ".." components (replaced with "__") */
+        { 0, TR_PARSE_OK, BEFORE_PATH "2:..5:a.txt" AFTER_PATH },
+        { 0, TR_PARSE_OK, BEFORE_PATH "5:a.txt2:.." AFTER_PATH },
 
         /* fail on empty string */
         { EILSEQ, TR_PARSE_ERR, "" }
@@ -101,12 +102,76 @@ static int test_metainfo(void)
     return 0;
 }
 
+static int test_sanitize(void)
+{
+    struct
+    {
+        char const* str;
+        size_t len;
+        char const* expected_result;
+        bool expected_is_adjusted;
+    }
+    const test_data[] =
+    {
+        /* skipped */
+        { "", 0, NULL, false },
+        { ".", 1, NULL, false },
+        { "..", 2, NULL, true },
+        { ".....", 5, NULL, false },
+        { "  ", 2, NULL, false },
+        { " . ", 3, NULL, false },
+        { ". . .", 5, NULL, false },
+        /* replaced with '_'  */
+        { "/", 1, "_", true },
+        { "////", 4, "____", true },
+        { "\\\\", 2, "__", true },
+        { "/../", 4, "_.._", true },
+        { "foo<bar:baz/boo", 15, "foo_bar_baz_boo", true },
+        { "t\0e\x01s\tt\ri\nn\fg", 13, "t_e_s_t_i_n_g", true },
+        /* appended with '_' */
+        { "con", 3, "con_", true },
+        { "cOm4", 4, "cOm4_", true },
+        { "LPt9.txt", 8, "LPt9_.txt", true },
+        { "NUL.tar.gz", 10, "NUL_.tar.gz", true },
+        /* trimmed */
+        { " foo", 4, "foo", true },
+        { "foo ", 4, "foo", true },
+        { " foo ", 5, "foo", true },
+        { "foo.", 4, "foo", true },
+        { "foo...", 6, "foo", true },
+        { " foo... ", 8, "foo", true },
+        /* unmodified */
+        { "foo", 3, "foo", false },
+        { ".foo", 4, ".foo", false },
+        { "..foo", 5, "..foo", false },
+        { "foo.bar.baz", 11, "foo.bar.baz", false }
+    };
+
+    for (size_t i = 0; i < TR_N_ELEMENTS(test_data); ++i)
+    {
+        bool is_adjusted;
+        char* const result = tr_metainfo_sanitize_path_component(test_data[i].str, test_data[i].len, &is_adjusted);
+
+        check_str(result, ==, test_data[i].expected_result);
+
+        if (test_data[i].expected_result != NULL)
+        {
+            check_bool(is_adjusted, ==, test_data[i].expected_is_adjusted);
+        }
+
+        tr_free(result);
+    }
+
+    return 0;
+}
+
 int main(void)
 {
     testFunc const tests[] =
     {
         test_magnet_link,
-        test_metainfo
+        test_metainfo,
+        test_sanitize
     };
 
     return runTests(tests, NUM_TESTS(tests));
diff --git a/libtransmission/metainfo.c b/libtransmission/metainfo.c
index 9edccd994..18e757a7f 100644
--- a/libtransmission/metainfo.c
+++ b/libtransmission/metainfo.c
@@ -86,21 +86,90 @@ static char* getTorrentFilename(tr_session const* session, tr_info const* inf, e
 ****
 ***/
 
-static bool path_component_is_suspicious(char const* component)
+char* tr_metainfo_sanitize_path_component(char const* str, size_t len, bool* is_adjusted)
 {
-    return component == NULL || strpbrk(component, PATH_DELIMITER_CHARS) != NULL || strcmp(component, ".") == 0 ||
-        strcmp(component, "..") == 0;
+    if (len == 0 || (len == 1 && str[0] == '.'))
+    {
+        return NULL;
+    }
+
+    *is_adjusted = false;
+
+    /* https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file */
+    char const* const reserved_chars = "<>:\"/\\|?*";
+    char const* const reserved_names[] =
+    {
+        "CON", "PRN", "AUX", "NUL",
+        "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
+        "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"
+    };
+
+    char* const ret = tr_new(char, len + 2);
+    memcpy(ret, str, len);
+    ret[len] = '\0';
+
+    for (size_t i = 0; i < len; ++i)
+    {
+        if (strchr(reserved_chars, ret[i]) != NULL || (unsigned char)ret[i] < 0x20)
+        {
+            ret[i] = '_';
+            *is_adjusted = true;
+        }
+    }
+
+    for (size_t i = 0; i < TR_N_ELEMENTS(reserved_names); ++i)
+    {
+        size_t const reserved_name_len = strlen(reserved_names[i]);
+        if (evutil_ascii_strncasecmp(ret, reserved_names[i], reserved_name_len) != 0 ||
+            (ret[reserved_name_len] != '\0' && ret[reserved_name_len] != '.'))
+        {
+            continue;
+        }
+
+        memmove(&ret[reserved_name_len + 1], &ret[reserved_name_len], len - reserved_name_len + 1);
+        ret[reserved_name_len] = '_';
+        *is_adjusted = true;
+        ++len;
+        break;
+    }
+
+    size_t start_pos = 0;
+    size_t end_pos = len;
+
+    while (start_pos < len && ret[start_pos] == ' ')
+    {
+        ++start_pos;
+    }
+
+    while (end_pos > start_pos && (ret[end_pos - 1] == ' ' || ret[end_pos - 1] == '.'))
+    {
+        --end_pos;
+    }
+
+    if (start_pos == end_pos)
+    {
+        tr_free(ret);
+        return NULL;
+    }
+
+    if (start_pos != 0 || end_pos != len)
+    {
+        len = end_pos - start_pos;
+        memmove(ret, &ret[start_pos], len);
+        ret[len] = '\0';
+        *is_adjusted = true;
+    }
+
+    return ret;
 }
 
-static bool getfile(char** setme, char const* root, tr_variant* path, struct evbuffer* buf)
+static bool getfile(char** setme, bool* is_adjusted, char const* root, tr_variant* path, struct evbuffer* buf)
 {
-    /* root's already been checked by caller */
-    TR_ASSERT(!path_component_is_suspicious(root));
-
     bool success = false;
     size_t root_len = 0;
 
     *setme = NULL;
+    *is_adjusted = false;
 
     if (tr_variantIsList(path))
     {
@@ -114,19 +183,25 @@ static bool getfile(char** setme, char const* root, tr_variant* path, struct evb
             size_t len;
             char const* str;
 
-            if (!tr_variantGetStr(tr_variantListChild(path, i), &str, &len) || path_component_is_suspicious(str))
+            if (!tr_variantGetStr(tr_variantListChild(path, i), &str, &len))
             {
                 success = false;
                 break;
             }
 
-            if (*str == '\0')
+            bool is_component_adjusted;
+            char* final_str = tr_metainfo_sanitize_path_component(str, len, &is_component_adjusted);
+            if (final_str == NULL)
             {
                 continue;
             }
 
+            *is_adjusted = *is_adjusted || is_component_adjusted;
+
             evbuffer_add(buf, TR_PATH_DELIMITER_STR, 1);
-            evbuffer_add(buf, str, len);
+            evbuffer_add(buf, final_str, strlen(final_str));
+
+            tr_free(final_str);
         }
     }
 
@@ -137,8 +212,15 @@ static bool getfile(char** setme, char const* root, tr_variant* path, struct evb
 
     if (success)
     {
-        *setme = tr_utf8clean((char*)evbuffer_pullup(buf, -1), evbuffer_get_length(buf));
-        /* fprintf(stderr, "[%s]\n", *setme); */
+        char const* const buf_data = (char*)evbuffer_pullup(buf, -1);
+        size_t const buf_len = evbuffer_get_length(buf);
+
+        *setme = tr_utf8clean(buf_data, buf_len);
+
+        if (!*is_adjusted)
+        {
+            *is_adjusted = buf_len != strlen(*setme) || strncmp(buf_data, *setme, buf_len) != 0;
+        }
     }
 
     return success;
@@ -150,15 +232,18 @@ static char const* parseFiles(tr_info* inf, tr_variant* files, tr_variant const*
 
     inf->totalSize = 0;
 
+    bool is_root_adjusted;
+    char* const root_name = tr_metainfo_sanitize_path_component(inf->name, strlen(inf->name), &is_root_adjusted);
+    if (root_name == NULL)
+    {
+        return "path";
+    }
+
+    char const* result = NULL;
+
     if (tr_variantIsList(files)) /* multi-file mode */
     {
         struct evbuffer* buf;
-        char const* result;
-
-        if (path_component_is_suspicious(inf->name))
-        {
-            return "path";
-        }
 
         buf = evbuffer_new();
         result = NULL;
@@ -189,7 +274,8 @@ static char const* parseFiles(tr_info* inf, tr_variant* files, tr_variant const*
                 }
             }
 
-            if (!getfile(&inf->files[i].name, inf->name, path, buf))
+            bool is_file_adjusted;
+            if (!getfile(&inf->files[i].name, &is_file_adjusted, root_name, path, buf))
             {
                 result = "path";
                 break;
@@ -202,32 +288,29 @@ static char const* parseFiles(tr_info* inf, tr_variant* files, tr_variant const*
             }
 
             inf->files[i].length = len;
+            inf->files[i].is_renamed = is_root_adjusted || is_file_adjusted;
             inf->totalSize += len;
         }
 
         evbuffer_free(buf);
-        return result;
     }
     else if (tr_variantGetInt(length, &len)) /* single-file mode */
     {
-        if (path_component_is_suspicious(inf->name))
-        {
-            return "path";
-        }
-
         inf->isFolder = false;
         inf->fileCount = 1;
         inf->files = tr_new0(tr_file, 1);
-        inf->files[0].name = tr_strdup(inf->name);
+        inf->files[0].name = tr_strdup(root_name);
         inf->files[0].length = len;
+        inf->files[0].is_renamed = is_root_adjusted;
         inf->totalSize += len;
     }
     else
     {
-        return "length";
+        result = "length";
     }
 
-    return NULL;
+    tr_free(root_name);
+    return result;
 }
 
 static char* tr_convertAnnounceToScrape(char const* announce)
diff --git a/libtransmission/metainfo.h b/libtransmission/metainfo.h
index ee00df57a..edb307cc5 100644
--- a/libtransmission/metainfo.h
+++ b/libtransmission/metainfo.h
@@ -30,3 +30,6 @@ char* tr_metainfoGetBasename(tr_info const*, enum tr_metainfo_basename_format fo
 
 void tr_metainfoMigrateFile(tr_session const* session, tr_info const* info, enum tr_metainfo_basename_format old_format,
     enum tr_metainfo_basename_format new_format);
+
+/** @brief Private function that's exposed here only for unit tests */
+char* tr_metainfo_sanitize_path_component(char const* str, size_t len, bool* is_adjusted);