From 66e5351e0adda5891b2ff17ccbafc81f620c0e01 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Sat, 28 Dec 2024 14:47:01 +0100 Subject: [PATCH] urlapi: fix redirect to a new fragment or query (only) The redirect logic was broken when the redirect-to URL was a relative URL only as a fragment or query (starting with '#' or '?'). Extended test 1560 to reproduce, then verify. Reported-by: Jeroen Ooms Fixes #15836 Closes #15848 Conflict:remove doc CURLOPT_PATH_AS_IS.md which is not exist context adapt Reference:https://github.com/curl/curl/commit/66e5351e0adda5891b2ff17ccbafc81f620c0e01 --- lib/urlapi.c | 102 ++++++------------------ tests/data/test391 | 2 +- tests/libtest/lib1560.c | 32 +++++++ 3 files changed, 60 insertions(+), 76 deletions(-) diff --git a/lib/urlapi.c b/lib/urlapi.c index b676c4d..0f8ffbf 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -275,8 +275,6 @@ static char *concat_url(const char *base, const char *relurl) problems in the future... */ char *newest; - char *protsep; - char *pathsep; size_t newlen; bool host_changed = FALSE; @@ -291,66 +289,35 @@ static char *concat_url(const char *base, const char *relurl) return NULL; /* skip out of this NOW */ /* protsep points to the start of the host name */ - protsep = strstr(url_clone, "//"); + char *protsep = strstr(url_clone, "//"); + DEBUGASSERT(protsep); if(!protsep) protsep = url_clone; else protsep += 2; /* pass the slashes */ - if('/' != relurl[0]) { - int level = 0; - - /* First we need to find out if there's a ?-letter in the URL, + if(('/' != relurl[0]) && ('#' != relurl[0])) { + /* First we need to find out if there is a ?-letter in the original URL, and cut it and the right-side of that off */ - pathsep = strchr(protsep, '?'); + char *pathsep = strchr(protsep, '?'); if(pathsep) *pathsep = 0; - - /* we have a relative path to append to the last slash if there's one - available, or if the new URL is just a query string (starts with a - '?') we append the new one at the end of the entire currently worked - out URL */ - if(useurl[0] != '?') { - pathsep = strrchr(protsep, '/'); + else { + /* if not, cut off the potential fragment */ + pathsep = strchr(protsep, '#'); if(pathsep) *pathsep = 0; } - /* Check if there's any slash after the host name, and if so, remember - that position instead */ - pathsep = strchr(protsep, '/'); - if(pathsep) - protsep = pathsep + 1; - else - protsep = NULL; - - /* now deal with one "./" or any amount of "../" in the newurl - and act accordingly */ - - if((useurl[0] == '.') && (useurl[1] == '/')) - useurl += 2; /* just skip the "./" */ - - while((useurl[0] == '.') && - (useurl[1] == '.') && - (useurl[2] == '/')) { - level++; - useurl += 3; /* pass the "../" */ - } - - if(protsep) { - while(level--) { - /* cut off one more level from the right of the original URL */ - pathsep = strrchr(protsep, '/'); - if(pathsep) - *pathsep = 0; - else { - *protsep = 0; - break; - } - } + /* if the redirect-to piece is not just a query, cut the path after the + last slash */ + if(useurl[0] != '?') { + pathsep = strrchr(protsep, '/'); + if(pathsep) + pathsep[1] = 0; /* leave the slash */ } } - else { + else if('/' == relurl[0]) { /* We got a new absolute path for this server */ if(relurl[1] == '/') { @@ -362,29 +329,20 @@ static char *concat_url(const char *base, const char *relurl) host_changed = TRUE; } else { - /* cut off the original URL from the first slash, or deal with URLs - without slash */ - pathsep = strchr(protsep, '/'); - if(pathsep) { - /* When people use badly formatted URLs, such as - "http://www.url.com?dir=/home/daniel" we must not use the first - slash, if there's a ?-letter before it! */ - char *sep = strchr(protsep, '?'); - if(sep && (sep < pathsep)) - pathsep = sep; + /* cut the original URL at first slash */ + char *pathsep = strchr(protsep, '/'); + if(pathsep) *pathsep = 0; - } - else { - /* There was no slash. Now, since we might be operating on a badly - formatted URL, such as "http://www.url.com?id=2380" which doesn't - use a slash separator as it is supposed to, we need to check for a - ?-letter as well! */ - pathsep = strchr(protsep, '?'); - if(pathsep) - *pathsep = 0; - } } } + else { + /* the relative piece starts with '#' */ + + /* If there is a fragment in the original URL, cut it off */ + char *pathsep = strchr(protsep, '#'); + if(pathsep) + *pathsep = 0; + } /* If the new part contains a space, this is a mighty stupid redirect but we still make an effort to do "right". To the left of a '?' @@ -406,12 +364,6 @@ static char *concat_url(const char *base, const char *relurl) /* copy over the root url part */ memcpy(newest, url_clone, urllen); - /* check if we need to append a slash */ - if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) - ; - else - newest[urllen++]='/'; - /* then append the new piece on the right side */ strcpy_url(&newest[urllen], useurl, !host_changed); @@ -1463,7 +1415,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, free(redired_url); return CURLUE_OUT_OF_MEMORY; } - result = parseurl(redired_url, handle2, flags); + result = parseurl(redired_url, handle2, flags&~CURLU_PATH_AS_IS); free(redired_url); if(!result) mv_urlhandle(handle2, u); diff --git a/tests/data/test391 b/tests/data/test391 index 24428a08f0f2..279c562de317 100644 --- a/tests/data/test391 +++ b/tests/data/test391 @@ -62,7 +62,7 @@ Host: %HOSTIP:%HTTPPORT User-Agent: curl/%VERSION Accept: */* -GET /../%TESTNUMBER0002 HTTP/1.1 +GET /%TESTNUMBER0002 HTTP/1.1 Host: %HOSTIP:%HTTPPORT User-Agent: curl/%VERSION Accept: */* diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index d5253a739a10..3103c69f2090 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -1143,6 +1143,38 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags) } static struct redircase set_url_list[] = { + {"http://example.org#withs/ash", "/moo#frag", + "http://example.org/moo#frag", + 0, 0, CURLUE_OK}, + {"http://example.org/", "../path/././../././../moo", + "http://example.org/moo", + 0, 0, CURLUE_OK}, + + {"http://example.org?bar/moo", "?weird", + "http://example.org/?weird", 0, 0, CURLUE_OK}, + {"http://example.org/foo?bar", "?weird", + "http://example.org/foo?weird", 0, 0, CURLUE_OK}, + {"http://example.org/foo", "?weird", + "http://example.org/foo?weird", 0, 0, CURLUE_OK}, + {"http://example.org", "?weird", + "http://example.org/?weird", 0, 0, CURLUE_OK}, + {"http://example.org/#original", "?weird#moo", + "http://example.org/?weird#moo", 0, 0, CURLUE_OK}, + + {"http://example.org?bar/moo#yes/path", "#new/slash", + "http://example.org/?bar/moo#new/slash", 0, 0, CURLUE_OK}, + {"http://example.org/foo?bar", "#weird", + "http://example.org/foo?bar#weird", 0, 0, CURLUE_OK}, + {"http://example.org/foo?bar#original", "#weird", + "http://example.org/foo?bar#weird", 0, 0, CURLUE_OK}, + {"http://example.org/foo#original", "#weird", + "http://example.org/foo#weird", 0, 0, CURLUE_OK}, + {"http://example.org/#original", "#weird", + "http://example.org/#weird", 0, 0, CURLUE_OK}, + {"http://example.org#original", "#weird", + "http://example.org/#weird", 0, 0, CURLUE_OK}, + {"http://example.org/foo?bar", "moo?hey#weird", + "http://example.org/moo?hey#weird", 0, 0, CURLUE_OK}, {"http://example.org/static/favicon/wikipedia.ico", "//fake.example.com/licenses/by-sa/3.0/", "http://fake.example.com/licenses/by-sa/3.0/",