1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-25 06:31:14 +02:00
nix/src/libfetchers/git-lfs-fetch.cc
Robert Hensing 55297f865c Format
clang-format: 18.1.8 -> 19.1.7
2025-04-01 15:33:49 +02:00

280 lines
10 KiB
C++

#include "nix/git-lfs-fetch.hh"
#include "nix/git-utils.hh"
#include "nix/filetransfer.hh"
#include "nix/processes.hh"
#include "nix/url.hh"
#include "nix/users.hh"
#include "nix/hash.hh"
#include <git2/attr.h>
#include <git2/config.h>
#include <git2/errors.h>
#include <git2/remote.h>
#include <nlohmann/json.hpp>
namespace nix::lfs {
// if authHeader is "", downloadToSink assumes no auth is expected
static void downloadToSink(
const std::string & url,
const std::string & authHeader,
// FIXME: passing a StringSink is superfluous, we may as well
// return a string. Or use an abstract Sink for streaming.
StringSink & sink,
std::string sha256Expected,
size_t sizeExpected)
{
FileTransferRequest request(url);
Headers headers;
if (!authHeader.empty())
headers.push_back({"Authorization", authHeader});
request.headers = headers;
getFileTransfer()->download(std::move(request), sink);
auto sizeActual = sink.s.length();
if (sizeExpected != sizeActual)
throw Error("size mismatch while fetching %s: expected %d but got %d", url, sizeExpected, sizeActual);
auto sha256Actual = hashString(HashAlgorithm::SHA256, sink.s).to_string(HashFormat::Base16, false);
if (sha256Actual != sha256Expected)
throw Error(
"hash mismatch while fetching %s: expected sha256:%s but got sha256:%s", url, sha256Expected, sha256Actual);
}
static std::string getLfsApiToken(const ParsedURL & url)
{
auto [status, output] = runProgram(
RunOptions{
.program = "ssh",
.args = {*url.authority, "git-lfs-authenticate", url.path, "download"},
});
if (output.empty())
throw Error(
"git-lfs-authenticate: no output (cmd: ssh %s git-lfs-authenticate %s download)",
url.authority.value_or(""),
url.path);
auto queryResp = nlohmann::json::parse(output);
if (!queryResp.contains("header"))
throw Error("no header in git-lfs-authenticate response");
if (!queryResp["header"].contains("Authorization"))
throw Error("no Authorization in git-lfs-authenticate response");
return queryResp["header"]["Authorization"].get<std::string>();
}
typedef std::unique_ptr<git_config, Deleter<git_config_free>> GitConfig;
typedef std::unique_ptr<git_config_entry, Deleter<git_config_entry_free>> GitConfigEntry;
static std::string getLfsEndpointUrl(git_repository * repo)
{
GitConfig config;
if (git_repository_config(Setter(config), repo)) {
GitConfigEntry entry;
if (!git_config_get_entry(Setter(entry), config.get(), "lfs.url")) {
auto value = std::string(entry->value);
if (!value.empty()) {
debug("Found explicit lfs.url value: %s", value);
return value;
}
}
}
git_remote * remote = nullptr;
if (git_remote_lookup(&remote, repo, "origin"))
return "";
const char * url_c_str = git_remote_url(remote);
if (!url_c_str)
return "";
return std::string(url_c_str);
}
static std::optional<Pointer> parseLfsPointer(std::string_view content, std::string_view filename)
{
// https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md
//
// example git-lfs pointer file:
// version https://git-lfs.github.com/spec/v1
// oid sha256:f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf
// size 10000000
// (ending \n)
if (!content.starts_with("version ")) {
// Invalid pointer file
return std::nullopt;
}
if (!content.starts_with("version https://git-lfs.github.com/spec/v1")) {
// In case there's new spec versions in the future, but for now only v1 exists
debug("Invalid version found on potential lfs pointer file, skipping");
return std::nullopt;
}
std::string oid;
std::string size;
for (auto & line : tokenizeString<Strings>(content, "\n")) {
if (line.starts_with("version ")) {
continue;
}
if (line.starts_with("oid sha256:")) {
oid = line.substr(11); // skip "oid sha256:"
continue;
}
if (line.starts_with("size ")) {
size = line.substr(5); // skip "size "
continue;
}
debug("Custom extension '%s' found, ignoring", line);
}
if (oid.length() != 64 || !std::all_of(oid.begin(), oid.end(), ::isxdigit)) {
debug("Invalid sha256 %s, skipping", oid);
return std::nullopt;
}
if (size.length() == 0 || !std::all_of(size.begin(), size.end(), ::isdigit)) {
debug("Invalid size %s, skipping", size);
return std::nullopt;
}
return std::make_optional(Pointer{oid, std::stoul(size)});
}
Fetch::Fetch(git_repository * repo, git_oid rev)
{
this->repo = repo;
this->rev = rev;
const auto remoteUrl = lfs::getLfsEndpointUrl(repo);
this->url = nix::parseURL(nix::fixGitURL(remoteUrl)).canonicalise();
}
bool Fetch::shouldFetch(const CanonPath & path) const
{
const char * attr = nullptr;
git_attr_options opts = GIT_ATTR_OPTIONS_INIT;
opts.attr_commit_id = this->rev;
opts.flags = GIT_ATTR_CHECK_INCLUDE_COMMIT | GIT_ATTR_CHECK_NO_SYSTEM;
if (git_attr_get_ext(&attr, (git_repository *) (this->repo), &opts, path.rel_c_str(), "filter"))
throw Error("cannot get git-lfs attribute: %s", git_error_last()->message);
debug("Git filter for '%s' is '%s'", path, attr ? attr : "null");
return attr != nullptr && !std::string(attr).compare("lfs");
}
static nlohmann::json pointerToPayload(const std::vector<Pointer> & items)
{
nlohmann::json jArray = nlohmann::json::array();
for (const auto & pointer : items)
jArray.push_back({{"oid", pointer.oid}, {"size", pointer.size}});
return jArray;
}
std::vector<nlohmann::json> Fetch::fetchUrls(const std::vector<Pointer> & pointers) const
{
ParsedURL httpUrl(url);
httpUrl.scheme = url.scheme == "ssh" ? "https" : url.scheme;
FileTransferRequest request(httpUrl.to_string() + "/info/lfs/objects/batch");
request.post = true;
Headers headers;
if (this->url.scheme == "ssh")
headers.push_back({"Authorization", lfs::getLfsApiToken(this->url)});
headers.push_back({"Content-Type", "application/vnd.git-lfs+json"});
headers.push_back({"Accept", "application/vnd.git-lfs+json"});
request.headers = headers;
nlohmann::json oidList = pointerToPayload(pointers);
nlohmann::json data = {{"operation", "download"}};
data["objects"] = oidList;
request.data = data.dump();
FileTransferResult result = getFileTransfer()->upload(request);
auto responseString = result.data;
std::vector<nlohmann::json> objects;
// example resp here:
// {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic
// Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]}
try {
auto resp = nlohmann::json::parse(responseString);
if (resp.contains("objects"))
objects.insert(objects.end(), resp["objects"].begin(), resp["objects"].end());
else
throw Error("response does not contain 'objects'");
return objects;
} catch (const nlohmann::json::parse_error & e) {
printMsg(lvlTalkative, "Full response: '%1%'", responseString);
throw Error("response did not parse as json: %s", e.what());
}
}
void Fetch::fetch(
const std::string & content,
const CanonPath & pointerFilePath,
StringSink & sink,
std::function<void(uint64_t)> sizeCallback) const
{
debug("trying to fetch '%s' using git-lfs", pointerFilePath);
if (content.length() >= 1024) {
warn("encountered file '%s' that should have been a git-lfs pointer, but is too large", pointerFilePath);
sizeCallback(content.length());
sink(content);
return;
}
const auto pointer = parseLfsPointer(content, pointerFilePath.rel());
if (pointer == std::nullopt) {
warn("encountered file '%s' that should have been a git-lfs pointer, but is invalid", pointerFilePath);
sizeCallback(content.length());
sink(content);
return;
}
Path cacheDir = getCacheDir() + "/git-lfs";
std::string key = hashString(HashAlgorithm::SHA256, pointerFilePath.rel()).to_string(HashFormat::Base16, false)
+ "/" + pointer->oid;
Path cachePath = cacheDir + "/" + key;
if (pathExists(cachePath)) {
debug("using cache entry %s -> %s", key, cachePath);
sink(readFile(cachePath));
return;
}
debug("did not find cache entry for %s", key);
std::vector<Pointer> pointers;
pointers.push_back(pointer.value());
const auto objUrls = fetchUrls(pointers);
const auto obj = objUrls[0];
try {
std::string sha256 = obj.at("oid"); // oid is also the sha256
std::string ourl = obj.at("actions").at("download").at("href");
std::string authHeader = "";
if (obj.at("actions").at("download").contains("header")
&& obj.at("actions").at("download").at("header").contains("Authorization")) {
authHeader = obj["actions"]["download"]["header"]["Authorization"];
}
const uint64_t size = obj.at("size");
sizeCallback(size);
downloadToSink(ourl, authHeader, sink, sha256, size);
debug("creating cache entry %s -> %s", key, cachePath);
if (!pathExists(dirOf(cachePath)))
createDirs(dirOf(cachePath));
writeFile(cachePath, sink.s);
debug("%s fetched with git-lfs", pointerFilePath);
} catch (const nlohmann::json::out_of_range & e) {
throw Error("bad json from /info/lfs/objects/batch: %s %s", obj, e.what());
}
}
} // namespace nix::lfs