1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-07-06 21:41:48 +02:00

fetchTree: Allow fetching plain files

Add a new `file` fetcher type, which will fetch a plain file over
http(s), or from the local file.

Because plain `http(s)://` or `file://` urls can already correspond to
`tarball` inputs (if the path ends-up with a know archive extension),
the URL parsing logic is a bit convuluted in that:

- {http,https,file}:// urls will be interpreted as either a tarball or a
  file input, depending on the extensions of the path part (so
  `https://foo.com/bar` will be a `file` input and
  `https://foo.com/bar.tar.gz` as a `tarball` input)
- `file+{something}://` urls will be interpreted as `file` urls (with
  the `file+` part removed)
- `tarball+{something}://` urls will be interpreted as `tarball` urls (with
  the `tarball+` part removed)

Fix #3785

Co-Authored-By: Tony Olagbaiye <me@fron.io>
This commit is contained in:
Tony Olagbaiye 2020-10-16 00:35:24 +01:00 committed by Théophane Hufschmitt
parent 78dc64ec1e
commit 5b8c1deb18
7 changed files with 221 additions and 25 deletions

View file

@ -6,6 +6,7 @@
#include "archive.hh"
#include "tarfile.hh"
#include "types.hh"
#include "split.hh"
namespace nix::fetchers {
@ -168,24 +169,34 @@ std::pair<Tree, time_t> downloadTarball(
};
}
struct TarballInputScheme : InputScheme
// An input scheme corresponding to a curable ressource
struct CurlInputScheme : InputScheme
{
virtual const std::string inputType() const = 0;
const std::set<std::string> transportUrlSchemes = {"file", "http", "https"};
const bool hasTarballExtension(std::string_view path) const
{
return hasSuffix(path, ".zip") || hasSuffix(path, ".tar")
|| hasSuffix(path, ".tgz") || hasSuffix(path, ".tar.gz")
|| hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
|| hasSuffix(path, ".tar.zst");
}
virtual bool isValidURL(const ParsedURL & url) const = 0;
std::optional<Input> inputFromURL(const ParsedURL & url) override
{
if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return {};
if (!hasSuffix(url.path, ".zip")
&& !hasSuffix(url.path, ".tar")
&& !hasSuffix(url.path, ".tgz")
&& !hasSuffix(url.path, ".tar.gz")
&& !hasSuffix(url.path, ".tar.xz")
&& !hasSuffix(url.path, ".tar.bz2")
&& !hasSuffix(url.path, ".tar.zst"))
return {};
if (!isValidURL(url))
return std::nullopt;
Input input;
input.attrs.insert_or_assign("type", "tarball");
input.attrs.insert_or_assign("url", url.to_string());
auto urlWithoutApplicationScheme = url;
urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport;
input.attrs.insert_or_assign("type", inputType());
input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string());
auto narHash = url.query.find("narHash");
if (narHash != url.query.end())
input.attrs.insert_or_assign("narHash", narHash->second);
@ -194,14 +205,17 @@ struct TarballInputScheme : InputScheme
std::optional<Input> inputFromAttrs(const Attrs & attrs) override
{
if (maybeGetStrAttr(attrs, "type") != "tarball") return {};
auto type = maybeGetStrAttr(attrs, "type");
if (type != inputType()) return {};
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"};
for (auto & [name, value] : attrs)
if (name != "type" && name != "url" && /* name != "hash" && */ name != "narHash" && name != "name")
throw Error("unsupported tarball input attribute '%s'", name);
if (!allowedNames.count(name))
throw Error("unsupported %s input attribute '%s'", *type, name);
Input input;
input.attrs = attrs;
//input.locked = (bool) maybeGetStrAttr(input.attrs, "hash");
return input;
}
@ -209,14 +223,9 @@ struct TarballInputScheme : InputScheme
ParsedURL toURL(const Input & input) override
{
auto url = parseURL(getStrAttr(input.attrs, "url"));
// NAR hashes are preferred over file hashes since tar/zip files
// don't have a canonical representation.
// NAR hashes are preferred over file hashes since tar/zip files // don't have a canonical representation.
if (auto narHash = input.getNarHash())
url.query.insert_or_assign("narHash", narHash->to_string(SRI, true));
/*
else if (auto hash = maybeGetStrAttr(input.attrs, "hash"))
url.query.insert_or_assign("hash", Hash(*hash).to_string(SRI, true));
*/
return url;
}
@ -225,6 +234,42 @@ struct TarballInputScheme : InputScheme
return true;
}
};
struct FileInputScheme : CurlInputScheme
{
const std::string inputType() const override { return "file"; }
bool isValidURL(const ParsedURL & url) const override
{
auto parsedUrlScheme = parseUrlScheme(url.scheme);
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
&& (parsedUrlScheme.application
? parsedUrlScheme.application.value() == inputType()
: !hasTarballExtension(url.path));
}
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
{
auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
return {std::move(file.storePath), input};
}
};
struct TarballInputScheme : CurlInputScheme
{
const std::string inputType() const override { return "tarball"; }
bool isValidURL(const ParsedURL & url) const override
{
auto parsedUrlScheme = parseUrlScheme(url.scheme);
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
&& (parsedUrlScheme.application
? parsedUrlScheme.application.value() == inputType()
: hasTarballExtension(url.path));
}
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
{
auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first;
@ -233,5 +278,6 @@ struct TarballInputScheme : InputScheme
};
static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); });
static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); });
}

View file

@ -1,6 +1,7 @@
#include "url.hh"
#include "url-parts.hh"
#include "util.hh"
#include "split.hh"
namespace nix {
@ -136,4 +137,21 @@ bool ParsedURL::operator ==(const ParsedURL & other) const
&& fragment == other.fragment;
}
/**
* Parse a URL scheme of the form '(applicationScheme\+)?transportScheme'
* into a tuple '(applicationScheme, transportScheme)'
*
* > parseUrlScheme("http") == ParsedUrlScheme{ {}, "http"}
* > parseUrlScheme("tarball+http") == ParsedUrlScheme{ {"tarball"}, "http"}
*/
ParsedUrlScheme parseUrlScheme(std::string_view scheme)
{
auto application = splitPrefixTo(scheme, '+');
auto transport = scheme;
return ParsedUrlScheme {
.application = application,
.transport = transport,
};
}
}

View file

@ -27,4 +27,19 @@ std::map<std::string, std::string> decodeQuery(const std::string & query);
ParsedURL parseURL(const std::string & url);
/*
* Although thats not really standardized anywhere, an number of tools
* use a scheme of the form 'x+y' in urls, where y is the transport layer
* scheme, and x is the application layer scheme.
*
* For example git uses `git+https` to designate remotes using a Git
* protocol over http.
*/
struct ParsedUrlScheme {
std::optional<std::string_view> application;
std::string_view transport;
};
ParsedUrlScheme parseUrlScheme(std::string_view scheme);
}

View file

@ -181,9 +181,17 @@ Currently the `type` attribute can be one of the following:
* `tarball`: Tarballs. The location of the tarball is specified by the
attribute `url`.
In URL form, the schema must be `http://`, `https://` or `file://`
URLs and the extension must be `.zip`, `.tar`, `.tgz`, `.tar.gz`,
`.tar.xz`, `.tar.bz2` or `.tar.zst`.
In URL form, the schema must be `tarball+http://`, `tarball+https://` or `tarball+file://`.
If the extension corresponds to a known archive format (`.zip`, `.tar`,
`.tgz`, `.tar.gz`, `.tar.xz`, `.tar.bz2` or `.tar.zst`), then the `tarball+`
can be dropped.
* `file`: Plain files or directory tarballs, either over http(s) or from the local
disk.
In URL form, the schema must be `file+http://`, `file+https://` or `file+file://`.
If the extension doesnt correspond to a known archive format (as defined by the
`tarball` fetcher), then the `file+` prefix can be dropped.
* `github`: A more efficient way to fetch repositories from
GitHub. The following attributes are required: