From 50e61f579cfc8cf64031845f3e73f82593ad2ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Wed, 31 May 2023 10:36:43 +0200 Subject: [PATCH] Allow special characters in flake paths Support using nix flakes in paths with spaces or abitrary unicode characters. This introduces the convention that the path part of the URL should be percent-encoded when dealing with `path:` urls and not when using filepaths (following the convention of firefox). Co-authored-by: Rendal --- src/libexpr/flake/flakeref.cc | 85 +++++++++++++++++++---------------- src/libutil/tests/url.cc | 9 ++++ src/libutil/url.cc | 2 +- 3 files changed, 57 insertions(+), 39 deletions(-) diff --git a/src/libexpr/flake/flakeref.cc b/src/libexpr/flake/flakeref.cc index e1bce90bc..2155c5f12 100644 --- a/src/libexpr/flake/flakeref.cc +++ b/src/libexpr/flake/flakeref.cc @@ -77,14 +77,6 @@ std::pair parseFlakeRefWithFragment( { using namespace fetchers; - static std::string fnRegex = "[0-9a-zA-Z-._~!$&'\"()*+,;=]+"; - - static std::regex pathUrlRegex( - "(/?" + fnRegex + "(?:/" + fnRegex + ")*/?)" - + "(?:\\?(" + queryRegex + "))?" - + "(?:#(" + queryRegex + "))?", - std::regex::ECMAScript); - static std::regex flakeRegex( "((" + flakeIdRegexS + ")(?:/(?:" + refAndOrRevRegex + "))?)" + "(?:#(" + queryRegex + "))?", @@ -92,26 +84,23 @@ std::pair parseFlakeRefWithFragment( std::smatch match; - /* Check if 'url' is a flake ID. This is an abbreviated syntax for - 'flake:?ref=&rev='. */ - - if (std::regex_match(url, match, flakeRegex)) { - auto parsedURL = ParsedURL{ - .url = url, - .base = "flake:" + match.str(1), - .scheme = "flake", - .authority = "", - .path = match[1], - }; - - return std::make_pair( - FlakeRef(Input::fromURL(parsedURL, isFlake), ""), - percentDecode(match.str(6))); - } - - else if (std::regex_match(url, match, pathUrlRegex)) { - std::string path = match[1]; - std::string fragment = percentDecode(match.str(3)); + auto parsePathFlakeRef = [&]() { + std::string path = url; + std::string fragment = ""; + std::map query = {}; + auto pathEnd = url.find_first_of("#?"); + auto fragmentStart = pathEnd; + if (pathEnd != std::string::npos && url[pathEnd] == '?') + fragmentStart = url.find("#"); + if (pathEnd != std::string::npos) { + path = url.substr(0, pathEnd); + } + if (fragmentStart != std::string::npos) { + fragment = percentDecode(url.substr(fragmentStart+1)); + } + if (fragmentStart != std::string::npos && pathEnd != std::string::npos) { + query = decodeQuery(url.substr(pathEnd+1, fragmentStart)); + } if (baseDir) { /* Check if 'url' is a path (either absolute or relative @@ -163,7 +152,7 @@ std::pair parseFlakeRefWithFragment( .scheme = "git+file", .authority = "", .path = flakeRoot, - .query = decodeQuery(match[2]), + .query = query, }; if (subdir != "") { @@ -188,7 +177,6 @@ std::pair parseFlakeRefWithFragment( } else { if (!hasPrefix(path, "/")) throw BadURL("flake reference '%s' is not an absolute path", url); - auto query = decodeQuery(match[2]); path = canonPath(path + "/" + getOr(query, "dir", "")); } @@ -197,19 +185,40 @@ std::pair parseFlakeRefWithFragment( attrs.insert_or_assign("path", path); return std::make_pair(FlakeRef(Input::fromAttrs(std::move(attrs)), ""), fragment); + }; + + /* Check if 'url' is a flake ID. This is an abbreviated syntax for + 'flake:?ref=&rev='. */ + + if (std::regex_match(url, match, flakeRegex)) { + auto parsedURL = ParsedURL{ + .url = url, + .base = "flake:" + match.str(1), + .scheme = "flake", + .authority = "", + .path = match[1], + }; + + return std::make_pair( + FlakeRef(Input::fromURL(parsedURL), ""), + percentDecode(match.str(6))); } else { - auto parsedURL = parseURL(url); - std::string fragment; - std::swap(fragment, parsedURL.fragment); + try { + auto parsedURL = parseURL(url); + std::string fragment; + std::swap(fragment, parsedURL.fragment); - auto input = Input::fromURL(parsedURL, isFlake); - input.parent = baseDir; + auto input = Input::fromURL(parsedURL, isFlake); + input.parent = baseDir; - return std::make_pair( - FlakeRef(std::move(input), getOr(parsedURL.query, "dir", "")), - fragment); + return std::make_pair( + FlakeRef(std::move(input), getOr(parsedURL.query, "dir", "")), + fragment); + } catch (BadURL &) { + return parsePathFlakeRef(); + } } } diff --git a/src/libutil/tests/url.cc b/src/libutil/tests/url.cc index a908631e6..a678dad20 100644 --- a/src/libutil/tests/url.cc +++ b/src/libutil/tests/url.cc @@ -335,4 +335,13 @@ namespace nix { ASSERT_EQ(d, s); } + TEST(percentEncode, yen) { + // https://en.wikipedia.org/wiki/Percent-encoding#Character_data + std::string s = reinterpret_cast(u8"円"); + std::string e = "%E5%86%86"; + + ASSERT_EQ(percentEncode(s), e); + ASSERT_EQ(percentDecode(e), s); + } + } diff --git a/src/libutil/url.cc b/src/libutil/url.cc index a8f7d39fd..e700c8eaf 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -103,7 +103,7 @@ std::string percentEncode(std::string_view s, std::string_view keep) || keep.find(c) != std::string::npos) res += c; else - res += fmt("%%%02X", (unsigned int) c); + res += fmt("%%%02X", c & 0xFF); return res; }