1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-29 23:13:14 +02:00

Fix format

This commit is contained in:
Leandro Reina 2024-11-20 18:24:17 +01:00
parent 79d41062d0
commit 70ffcc83d7
2 changed files with 307 additions and 287 deletions

View file

@ -110,7 +110,6 @@ TEST_F(GitUtilsTest, sink_hardlink)
} }
}; };
namespace lfs { namespace lfs {
TEST_F(GitUtilsTest, parseGitRemoteUrl) TEST_F(GitUtilsTest, parseGitRemoteUrl)
@ -196,7 +195,8 @@ TEST_F(GitUtilsTest, parseGitRemoteUrl)
EXPECT_EQ(result.path, ""); EXPECT_EQ(result.path, "");
} }
} }
TEST_F(GitUtilsTest, gitUrlToHttp) { TEST_F(GitUtilsTest, gitUrlToHttp)
{
{ {
const GitUrl url = parseGitUrl("git@github.com:user/repo.git"); const GitUrl url = parseGitUrl("git@github.com:user/repo.git");
EXPECT_EQ(url.toHttp(), "https://github.com/user/repo.git"); EXPECT_EQ(url.toHttp(), "https://github.com/user/repo.git");
@ -219,7 +219,8 @@ TEST_F(GitUtilsTest, gitUrlToHttp) {
} }
} }
TEST_F(GitUtilsTest, gitUrlToSsh) { TEST_F(GitUtilsTest, gitUrlToSsh)
{
{ {
const GitUrl url = parseGitUrl("https://example.com/user/repo.git"); const GitUrl url = parseGitUrl("https://example.com/user/repo.git");
const auto [host, path] = url.toSsh(); const auto [host, path] = url.toSsh();
@ -234,9 +235,11 @@ TEST_F(GitUtilsTest, gitUrlToSsh) {
} }
} }
class FetchAttributeTest : public ::testing::Test { class FetchAttributeTest : public ::testing::Test
{
protected: protected:
void SetUp() override { void SetUp() override
{
// test literal (non-wildcard) matches too // test literal (non-wildcard) matches too
std::string content1 = "litfile filter=lfs diff=lfs merge=lfs -text"; std::string content1 = "litfile filter=lfs diff=lfs merge=lfs -text";
auto rules1 = parseGitAttrFile(content1); auto rules1 = parseGitAttrFile(content1);
@ -251,23 +254,25 @@ protected:
Fetch fetch2; Fetch fetch2;
}; };
TEST_F(FetchAttributeTest, ExactMatch) { TEST_F(FetchAttributeTest, ExactMatch)
{
EXPECT_TRUE(fetch1.hasAttribute("litfile", "filter", "lfs")); EXPECT_TRUE(fetch1.hasAttribute("litfile", "filter", "lfs"));
EXPECT_FALSE(fetch1.hasAttribute("other", "filter", "lfs")); EXPECT_FALSE(fetch1.hasAttribute("other", "filter", "lfs"));
} }
TEST_F(FetchAttributeTest, WildcardMatch) { TEST_F(FetchAttributeTest, WildcardMatch)
{
EXPECT_TRUE(fetch2.hasAttribute("match.wildcard", "filter", "lfs")); EXPECT_TRUE(fetch2.hasAttribute("match.wildcard", "filter", "lfs"));
EXPECT_FALSE(fetch2.hasAttribute("nomatch.otherext", "filter", "lfs")); EXPECT_FALSE(fetch2.hasAttribute("nomatch.otherext", "filter", "lfs"));
EXPECT_FALSE(fetch2.hasAttribute("nomatch.wildcard.extra", "filter", "lfs")); EXPECT_FALSE(fetch2.hasAttribute("nomatch.wildcard.extra", "filter", "lfs"));
} }
TEST_F(FetchAttributeTest, EmptyPath) { TEST_F(FetchAttributeTest, EmptyPath)
{
EXPECT_FALSE(fetch1.hasAttribute("", "filter", "lfs")); EXPECT_FALSE(fetch1.hasAttribute("", "filter", "lfs"));
EXPECT_FALSE(fetch2.hasAttribute("", "filter", "lfs")); EXPECT_FALSE(fetch2.hasAttribute("", "filter", "lfs"));
} }
} // namespace lfs } // namespace lfs
} // namespace nix } // namespace nix

View file

@ -16,31 +16,34 @@
#include "processes.hh" #include "processes.hh"
#include "url.hh" #include "url.hh"
namespace fs = std::filesystem; namespace fs = std::filesystem;
namespace nix { namespace nix {
namespace lfs { namespace lfs {
// see Fetch::rules // see Fetch::rules
struct AttrRule { struct AttrRule
{
std::string pattern; std::string pattern;
std::unordered_map<std::string, std::string> attributes; std::unordered_map<std::string, std::string> attributes;
git_pathspec* pathspec = nullptr; git_pathspec * pathspec = nullptr;
AttrRule() = default; AttrRule() = default;
explicit AttrRule(std::string pat) : pattern(std::move(pat)) { explicit AttrRule(std::string pat)
: pattern(std::move(pat))
{
initPathspec(); initPathspec();
} }
~AttrRule() { ~AttrRule()
{
if (pathspec) { if (pathspec) {
git_pathspec_free(pathspec); git_pathspec_free(pathspec);
} }
} }
AttrRule(const AttrRule& other) AttrRule(const AttrRule & other)
: pattern(other.pattern) : pattern(other.pattern)
, attributes(other.attributes) , attributes(other.attributes)
, pathspec(nullptr) , pathspec(nullptr)
@ -50,10 +53,11 @@ struct AttrRule {
} }
} }
void initPathspec() { void initPathspec()
{
git_strarray patterns = {0}; git_strarray patterns = {0};
const char* pattern_str = pattern.c_str(); const char * pattern_str = pattern.c_str();
patterns.strings = const_cast<char**>(&pattern_str); patterns.strings = const_cast<char **>(&pattern_str);
patterns.count = 1; patterns.count = 1;
if (git_pathspec_new(&pathspec, &patterns) != 0) { if (git_pathspec_new(&pathspec, &patterns) != 0) {
@ -63,94 +67,99 @@ struct AttrRule {
}; };
// git-lfs metadata about a file // git-lfs metadata about a file
struct Md { struct Md
std::string path; // fs path relative to repo root, no ./ prefix {
std::string oid; // git-lfs managed object id. you give this to the lfs server std::string path; // fs path relative to repo root, no ./ prefix
// for downloads std::string oid; // git-lfs managed object id. you give this to the lfs server
size_t size; // in bytes // for downloads
size_t size; // in bytes
}; };
struct GitUrl { struct GitUrl
{
std::string protocol; std::string protocol;
std::string user; std::string user;
std::string host; std::string host;
std::string port; std::string port;
std::string path; std::string path;
std::string toHttp() const { std::string toHttp() const
{
if (protocol.empty() || host.empty()) { if (protocol.empty() || host.empty()) {
return ""; return "";
} }
std::string prefix = ((protocol == "ssh") ? "https" : protocol) + "://"; std::string prefix = ((protocol == "ssh") ? "https" : protocol) + "://";
return prefix + host + return prefix + host + (port.empty() ? "" : ":" + port) + "/" + path;
(port.empty() ? "" : ":" + port) + "/" + path;
} }
// [host, path] // [host, path]
std::pair<std::string, std::string> toSsh() const { std::pair<std::string, std::string> toSsh() const
{
if (host.empty()) { if (host.empty()) {
return {"", ""}; return {"", ""};
} }
std::string userPart = user.empty() ? "" : user + "@"; std::string userPart = user.empty() ? "" : user + "@";
return { return {userPart + host, path};
userPart + host,
path
};
} }
}; };
struct Fetch { struct Fetch
// only true after init() {
bool ready = false; // only true after init()
bool ready = false;
// from shelling out to ssh, used for 2 subsequent fetches: // from shelling out to ssh, used for 2 subsequent fetches:
// list of URLs to fetch from, and fetching the data itself // list of URLs to fetch from, and fetching the data itself
std::string token = ""; std::string token = "";
// derived from git remote url // derived from git remote url
GitUrl gitUrl = GitUrl{}; GitUrl gitUrl = GitUrl{};
// parsed contents of .gitattributes // parsed contents of .gitattributes
// .gitattributes contains a list of path patterns, and list of attributes (=key-value tags) for each pattern // .gitattributes contains a list of path patterns, and list of attributes (=key-value tags) for each pattern
// paths tagged with `filter=lfs` need to be smudged by downloading from lfs server // paths tagged with `filter=lfs` need to be smudged by downloading from lfs server
std::vector<AttrRule> rules = {}; std::vector<AttrRule> rules = {};
void init(git_repository* repo, const std::string& gitattributesContent); void init(git_repository * repo, const std::string & gitattributesContent);
bool hasAttribute(const std::string& path, const std::string& attrName, const std::string& attrValue) const; bool hasAttribute(const std::string & path, const std::string & attrName, const std::string & attrValue) const;
void fetch(const git_blob* pointerBlob, const std::string& pointerFilePath, Sink& sink) const; void fetch(const git_blob * pointerBlob, const std::string & pointerFilePath, Sink & sink) const;
std::vector<nlohmann::json> fetchUrls(const std::vector<Md> &metadatas) const; std::vector<nlohmann::json> fetchUrls(const std::vector<Md> & metadatas) const;
}; };
static size_t writeCallback(void * contents, size_t size, size_t nmemb, std::string * s)
static size_t writeCallback(void *contents, size_t size, size_t nmemb, {
std::string *s) { size_t newLength = size * nmemb;
size_t newLength = size * nmemb; s->append((char *) contents, newLength);
s->append((char *)contents, newLength); return newLength;
return newLength;
} }
struct SinkCallbackData { struct SinkCallbackData
Sink* sink; {
Sink * sink;
std::string_view sha256Expected; std::string_view sha256Expected;
HashSink hashSink; HashSink hashSink;
SinkCallbackData(Sink* sink, std::string_view sha256) SinkCallbackData(Sink * sink, std::string_view sha256)
: sink(sink) : sink(sink)
, sha256Expected(sha256) , sha256Expected(sha256)
, hashSink(HashAlgorithm::SHA256) , hashSink(HashAlgorithm::SHA256)
{} {
}
}; };
static size_t sinkWriteCallback(void *contents, size_t size, size_t nmemb, SinkCallbackData *data) { static size_t sinkWriteCallback(void * contents, size_t size, size_t nmemb, SinkCallbackData * data)
{
size_t totalSize = size * nmemb; size_t totalSize = size * nmemb;
data->hashSink({(char *)contents, totalSize}); data->hashSink({(char *) contents, totalSize});
(*data->sink)({(char *)contents, totalSize}); (*data->sink)({(char *) contents, totalSize});
return totalSize; return totalSize;
} }
// if authHeader is "", downloadToSink assumes to auth is expected // if authHeader is "", downloadToSink assumes to auth is expected
void downloadToSink(const std::string &url, const std::string &authHeader, Sink &sink, std::string_view sha256Expected) { void downloadToSink(
CURL *curl; const std::string & url, const std::string & authHeader, Sink & sink, std::string_view sha256Expected)
{
CURL * curl;
CURLcode res; CURLcode res;
curl = curl_easy_init(); curl = curl_easy_init();
@ -161,7 +170,7 @@ void downloadToSink(const std::string &url, const std::string &authHeader, Sink
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &data); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &data);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
struct curl_slist *headers = nullptr; struct curl_slist * headers = nullptr;
if (!authHeader.empty()) { if (!authHeader.empty()) {
const std::string authHeader_prepend = "Authorization: " + authHeader; const std::string authHeader_prepend = "Authorization: " + authHeader;
headers = curl_slist_append(headers, authHeader_prepend.c_str()); headers = curl_slist_append(headers, authHeader_prepend.c_str());
@ -177,135 +186,140 @@ void downloadToSink(const std::string &url, const std::string &authHeader, Sink
const auto sha256Actual = data.hashSink.finish().first.to_string(HashFormat::Base16, false); const auto sha256Actual = data.hashSink.finish().first.to_string(HashFormat::Base16, false);
if (sha256Actual != data.sha256Expected) { if (sha256Actual != data.sha256Expected) {
throw std::runtime_error("sha256 mismatch: while fetching " + url + ": expected " + std::string(data.sha256Expected) + " but got " + sha256Actual); throw std::runtime_error(
"sha256 mismatch: while fetching " + url + ": expected " + std::string(data.sha256Expected) + " but got "
+ sha256Actual);
} }
curl_slist_free_all(headers); curl_slist_free_all(headers);
curl_easy_cleanup(curl); curl_easy_cleanup(curl);
} }
std::string getLfsApiToken(const GitUrl & u)
{
const auto [maybeUserAndHost, path] = u.toSsh();
auto [status, output] = runProgram(RunOptions{
.program = "ssh",
.args = {maybeUserAndHost, "git-lfs-authenticate", path, "download"},
});
std::string getLfsApiToken(const GitUrl& u) { if (output.empty())
const auto [maybeUserAndHost, path] = u.toSsh(); throw std::runtime_error(
auto [status, output] = runProgram(RunOptions { "git-lfs-authenticate: no output (cmd: ssh " + maybeUserAndHost + " git-lfs-authenticate " + path
.program = "ssh", + " download)");
.args = {maybeUserAndHost, "git-lfs-authenticate", path, "download"},
});
if (output.empty()) nlohmann::json query_resp = nlohmann::json::parse(output);
throw std::runtime_error("git-lfs-authenticate: no output (cmd: ssh " + maybeUserAndHost + " git-lfs-authenticate " + path + " download)"); if (!query_resp.contains("header"))
throw std::runtime_error("no header in git-lfs-authenticate response");
if (!query_resp["header"].contains("Authorization"))
throw std::runtime_error("no Authorization in git-lfs-authenticate response");
nlohmann::json query_resp = nlohmann::json::parse(output); std::string res = query_resp["header"]["Authorization"].get<std::string>();
if (!query_resp.contains("header"))
throw std::runtime_error("no header in git-lfs-authenticate response");
if (!query_resp["header"].contains("Authorization"))
throw std::runtime_error("no Authorization in git-lfs-authenticate response");
std::string res = query_resp["header"]["Authorization"].get<std::string>(); return res;
return res;
} }
std::string getLfsEndpointUrl(git_repository *repo) { std::string getLfsEndpointUrl(git_repository * repo)
int err; {
git_remote* remote = NULL; int err;
err = git_remote_lookup(&remote, repo, "origin"); git_remote * remote = NULL;
if (err < 0) { err = git_remote_lookup(&remote, repo, "origin");
return ""; if (err < 0) {
} return "";
const char *url_c_str = git_remote_url(remote);
if (!url_c_str) {
return "";
}
return std::string(url_c_str);
}
std::string git_attr_value_to_string(git_attr_value_t value) {
switch (value) {
case GIT_ATTR_VALUE_UNSPECIFIED:
return "GIT_ATTR_VALUE_UNSPECIFIED";
case GIT_ATTR_VALUE_TRUE:
return "GIT_ATTR_VALUE_TRUE";
case GIT_ATTR_VALUE_FALSE:
return "GIT_ATTR_VALUE_FALSE";
case GIT_ATTR_VALUE_STRING:
return "GIT_ATTR_VALUE_STRING";
default:
return "Unknown value";
}
}
std::optional<Md> parseLfsMetadata(const std::string &content, const std::string &filename) {
// https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md
//
// example git-lfs pointer file:
// version https://git-lfs.github.com/spec/v1
// oid sha256:f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf
// size 10000000
// (ending \n)
if (!content.starts_with("version ")) {
// Invalid pointer file
return std::nullopt;
}
if (!content.starts_with("version https://git-lfs.github.com/spec/v1")) {
// In case there's new spec versions in the future, but for now only v1 exists
debug("Invalid version found on potential lfs pointer file, skipping");
return std::nullopt;
}
std::istringstream iss(content);
std::string line;
std::string oid;
std::string size;
while (getline(iss, line)) {
if (line.starts_with("version ")) {
continue;
}
if (line.starts_with("oid sha256:")) {
oid = line.substr(11); // skip "oid sha256:"
continue;
}
if (line.starts_with("size ")) {
size = line.substr(5); // skip "size "
continue;
} }
debug("Custom extension '%s' found, ignoring", line); const char * url_c_str = git_remote_url(remote);
} if (!url_c_str) {
return "";
}
if (oid.length() != 64 || !std::all_of(oid.begin(), oid.end(), ::isxdigit)) { return std::string(url_c_str);
debug("Invalid sha256 %s, skipping", oid);
return std::nullopt;
}
if (size.length() == 0 || !std::all_of(size.begin(), size.end(), ::isdigit)) {
debug("Invalid size %s, skipping", size);
return std::nullopt;
}
return std::make_optional(Md{filename, oid, std::stoul(size)});
} }
std::string git_attr_value_to_string(git_attr_value_t value)
{
switch (value) {
case GIT_ATTR_VALUE_UNSPECIFIED:
return "GIT_ATTR_VALUE_UNSPECIFIED";
case GIT_ATTR_VALUE_TRUE:
return "GIT_ATTR_VALUE_TRUE";
case GIT_ATTR_VALUE_FALSE:
return "GIT_ATTR_VALUE_FALSE";
case GIT_ATTR_VALUE_STRING:
return "GIT_ATTR_VALUE_STRING";
default:
return "Unknown value";
}
}
// there's already a ParseURL here https://github.com/b-camacho/nix/blob/ef6fa54e05cd4134ec41b0d64c1a16db46237f83/src/libutil/url.cc#L13 std::optional<Md> parseLfsMetadata(const std::string & content, const std::string & filename)
// but that does not handle git's custom scp-like syntax {
GitUrl parseGitUrl(const std::string& url) { // https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md
//
// example git-lfs pointer file:
// version https://git-lfs.github.com/spec/v1
// oid sha256:f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf
// size 10000000
// (ending \n)
if (!content.starts_with("version ")) {
// Invalid pointer file
return std::nullopt;
}
if (!content.starts_with("version https://git-lfs.github.com/spec/v1")) {
// In case there's new spec versions in the future, but for now only v1 exists
debug("Invalid version found on potential lfs pointer file, skipping");
return std::nullopt;
}
std::istringstream iss(content);
std::string line;
std::string oid;
std::string size;
while (getline(iss, line)) {
if (line.starts_with("version ")) {
continue;
}
if (line.starts_with("oid sha256:")) {
oid = line.substr(11); // skip "oid sha256:"
continue;
}
if (line.starts_with("size ")) {
size = line.substr(5); // skip "size "
continue;
}
debug("Custom extension '%s' found, ignoring", line);
}
if (oid.length() != 64 || !std::all_of(oid.begin(), oid.end(), ::isxdigit)) {
debug("Invalid sha256 %s, skipping", oid);
return std::nullopt;
}
if (size.length() == 0 || !std::all_of(size.begin(), size.end(), ::isdigit)) {
debug("Invalid size %s, skipping", size);
return std::nullopt;
}
return std::make_optional(Md{filename, oid, std::stoul(size)});
}
// there's already a ParseURL here
// https://github.com/b-camacho/nix/blob/ef6fa54e05cd4134ec41b0d64c1a16db46237f83/src/libutil/url.cc#L13 but that does
// not handle git's custom scp-like syntax
GitUrl parseGitUrl(const std::string & url)
{
GitUrl result; GitUrl result;
// regular protocols // regular protocols
const std::regex r_url( const std::regex r_url(R"(^(ssh|git|https?|ftps?)://(?:([^@]+)@)?([^:/]+)(?::(\d+))?/(.*))");
R"(^(ssh|git|https?|ftps?)://(?:([^@]+)@)?([^:/]+)(?::(\d+))?/(.*))");
// "alternative scp-like syntax" https://git-scm.com/docs/git-fetch#_git_urls // "alternative scp-like syntax" https://git-scm.com/docs/git-fetch#_git_urls
const std::regex r_scp_like_url( const std::regex r_scp_like_url(R"(^(?:([^@]+)@)?([^:/]+):(/?.*))");
R"(^(?:([^@]+)@)?([^:/]+):(/?.*))");
std::smatch matches; std::smatch matches;
if (std::regex_match(url, matches, r_url)) { if (std::regex_match(url, matches, r_url)) {
@ -314,8 +328,7 @@ GitUrl parseGitUrl(const std::string& url) {
result.host = matches[3].str(); result.host = matches[3].str();
result.port = matches[4].str(); result.port = matches[4].str();
result.path = matches[5].str(); result.path = matches[5].str();
} } else if (std::regex_match(url, matches, r_scp_like_url)) {
else if (std::regex_match(url, matches, r_scp_like_url)) {
result.protocol = "ssh"; result.protocol = "ssh";
result.user = matches[1].str(); result.user = matches[1].str();
@ -326,7 +339,7 @@ GitUrl parseGitUrl(const std::string& url) {
return result; return result;
} }
std::vector<AttrRule> parseGitAttrFile(const std::string& content) std::vector<AttrRule> parseGitAttrFile(const std::string & content)
{ {
std::vector<AttrRule> rules; std::vector<AttrRule> rules;
std::string content_str(content); std::string content_str(content);
@ -348,14 +361,15 @@ std::vector<AttrRule> parseGitAttrFile(const std::string& content)
rule.pattern = line.substr(0, pattern_end); rule.pattern = line.substr(0, pattern_end);
git_strarray patterns = {0}; git_strarray patterns = {0};
const char* pattern_str = rule.pattern.c_str(); const char * pattern_str = rule.pattern.c_str();
patterns.strings = const_cast<char**>(&pattern_str); patterns.strings = const_cast<char **>(&pattern_str);
patterns.count = 1; patterns.count = 1;
if (git_pathspec_new(&rule.pathspec, &patterns) != 0) { if (git_pathspec_new(&rule.pathspec, &patterns) != 0) {
auto error = git_error_last(); auto error = git_error_last();
std::stringstream ss; std::stringstream ss;
ss << "git_pathspec_new parsing '" << line << "': " << (error ? error->message : "unknown error") << std::endl; ss << "git_pathspec_new parsing '" << line << "': " << (error ? error->message : "unknown error")
<< std::endl;
warn(ss.str()); warn(ss.str());
continue; continue;
} }
@ -396,26 +410,25 @@ std::vector<AttrRule> parseGitAttrFile(const std::string& content)
return rules; return rules;
} }
void Fetch::init(git_repository* repo, const std::string& gitattributesContent) { void Fetch::init(git_repository * repo, const std::string & gitattributesContent)
const auto remoteUrl = lfs::getLfsEndpointUrl(repo); {
const auto remoteUrl = lfs::getLfsEndpointUrl(repo);
this->gitUrl = parseGitUrl(remoteUrl); this->gitUrl = parseGitUrl(remoteUrl);
if (this->gitUrl.protocol == "ssh") { if (this->gitUrl.protocol == "ssh") {
this->token = lfs::getLfsApiToken(this->gitUrl); this->token = lfs::getLfsApiToken(this->gitUrl);
} }
this->rules = lfs::parseGitAttrFile(gitattributesContent); this->rules = lfs::parseGitAttrFile(gitattributesContent);
this->ready = true; this->ready = true;
} }
bool Fetch::hasAttribute(const std::string & path, const std::string & attrName, const std::string & attrValue) const
bool Fetch::hasAttribute(const std::string& path, const std::string& attrName, const std::string& attrValue) const
{ {
for (auto it = rules.rbegin(); it != rules.rend(); ++it) { for (auto it = rules.rbegin(); it != rules.rend(); ++it) {
int match = git_pathspec_matches_path( int match = git_pathspec_matches_path(
it->pathspec, it->pathspec,
0, // no flags 0, // no flags
path.c_str() path.c_str());
);
if (match > 0) { if (match > 0) {
auto attr = it->attributes.find(attrName); auto attr = it->attributes.find(attrName);
@ -428,123 +441,125 @@ bool Fetch::hasAttribute(const std::string& path, const std::string& attrName, c
return false; return false;
} }
nlohmann::json mdToPayload(const std::vector<Md> &items) { nlohmann::json mdToPayload(const std::vector<Md> & items)
nlohmann::json jArray = nlohmann::json::array(); {
for (const auto &md : items) { nlohmann::json jArray = nlohmann::json::array();
jArray.push_back({{"oid", md.oid}, {"size", md.size}}); for (const auto & md : items) {
} jArray.push_back({{"oid", md.oid}, {"size", md.size}});
return jArray; }
return jArray;
} }
std::vector<nlohmann::json> Fetch::fetchUrls(const std::vector<Md> & metadatas) const
{
nlohmann::json oidList = mdToPayload(metadatas);
nlohmann::json data = {
{"operation", "download"},
};
data["objects"] = oidList;
auto dataStr = data.dump();
std::vector<nlohmann::json> Fetch::fetchUrls(const std::vector<Md> &metadatas) const { CURL * curl = curl_easy_init();
nlohmann::json oidList = mdToPayload(metadatas); char curlErrBuf[CURL_ERROR_SIZE];
nlohmann::json data = { curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curlErrBuf);
{"operation", "download"}, std::string responseString;
}; std::string headerString;
data["objects"] = oidList; const auto lfsUrlBatch = gitUrl.toHttp() + "/info/lfs/objects/batch";
auto dataStr = data.dump(); curl_easy_setopt(curl, CURLOPT_URL, lfsUrlBatch.c_str());
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, dataStr.c_str());
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
CURL *curl = curl_easy_init(); struct curl_slist * headers = NULL;
char curlErrBuf[CURL_ERROR_SIZE]; if (this->token != "") {
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curlErrBuf); const auto authHeader = "Authorization: " + token;
std::string responseString; headers = curl_slist_append(headers, authHeader.c_str());
std::string headerString;
const auto lfsUrlBatch = gitUrl.toHttp() + "/info/lfs/objects/batch";
curl_easy_setopt(curl, CURLOPT_URL, lfsUrlBatch.c_str());
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, dataStr.c_str());
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
struct curl_slist *headers = NULL;
if (this->token != "") {
const auto authHeader = "Authorization: " + token;
headers = curl_slist_append(headers, authHeader.c_str());
}
headers =
curl_slist_append(headers, "Content-Type: application/vnd.git-lfs+json");
headers = curl_slist_append(headers, "Accept: application/vnd.git-lfs+json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POST, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &responseString);
CURLcode res = curl_easy_perform(curl);
if (res != CURLE_OK) {
std::stringstream ss;
ss << "lfs::fetchUrls: bad response from info/lfs/objects/batch: code " << res << " " << curlErrBuf;
throw std::runtime_error(ss.str());
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
std::vector<nlohmann::json> objects;
// example resp here:
// {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic
// Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]}
try {
auto resp = nlohmann::json::parse(responseString);
if (resp.contains("objects")) {
objects.insert(objects.end(), resp["objects"].begin(),
resp["objects"].end());
} else {
throw std::runtime_error("response does not contain 'objects'");
} }
return objects; headers = curl_slist_append(headers, "Content-Type: application/vnd.git-lfs+json");
} catch (const nlohmann::json::parse_error& e) { headers = curl_slist_append(headers, "Accept: application/vnd.git-lfs+json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POST, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &responseString);
CURLcode res = curl_easy_perform(curl);
if (res != CURLE_OK) {
std::stringstream ss;
ss << "lfs::fetchUrls: bad response from info/lfs/objects/batch: code " << res << " " << curlErrBuf;
throw std::runtime_error(ss.str());
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
std::vector<nlohmann::json> objects;
// example resp here:
// {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic
// Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]}
try {
auto resp = nlohmann::json::parse(responseString);
if (resp.contains("objects")) {
objects.insert(objects.end(), resp["objects"].begin(), resp["objects"].end());
} else {
throw std::runtime_error("response does not contain 'objects'");
}
return objects;
} catch (const nlohmann::json::parse_error & e) {
std::stringstream ss; std::stringstream ss;
ss << "response did not parse as json: " << responseString; ss << "response did not parse as json: " << responseString;
throw std::runtime_error(ss.str()); throw std::runtime_error(ss.str());
}
}
} }
void Fetch::fetch(const git_blob* pointerBlob, const std::string& pointerFilePath, Sink& sink) const { void Fetch::fetch(const git_blob * pointerBlob, const std::string & pointerFilePath, Sink & sink) const
constexpr size_t chunkSize = 128 * 1024; // 128 KiB {
auto size = git_blob_rawsize(pointerBlob); constexpr size_t chunkSize = 128 * 1024; // 128 KiB
auto size = git_blob_rawsize(pointerBlob);
if (size >= 1024) { if (size >= 1024) {
debug("Skip git-lfs, pointer file too large"); debug("Skip git-lfs, pointer file too large");
warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath); warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath);
for (size_t offset = 0; offset < size; offset += chunkSize) { for (size_t offset = 0; offset < size; offset += chunkSize) {
sink(std::string((const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset))); sink(std::string(
(const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset)));
}
return;
} }
return;
}
const auto pointerFileContents = std::string((const char *) git_blob_rawcontent(pointerBlob), size); const auto pointerFileContents = std::string((const char *) git_blob_rawcontent(pointerBlob), size);
const auto md = parseLfsMetadata(std::string(pointerFileContents), std::string(pointerFilePath)); const auto md = parseLfsMetadata(std::string(pointerFileContents), std::string(pointerFilePath));
if (md == std::nullopt) { if (md == std::nullopt) {
debug("Skip git-lfs, invalid pointer file"); debug("Skip git-lfs, invalid pointer file");
warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath); warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath);
for (size_t offset = 0; offset < size; offset += chunkSize) { for (size_t offset = 0; offset < size; offset += chunkSize) {
sink(std::string((const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset))); sink(std::string(
(const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset)));
}
return;
} }
return;
}
std::vector<Md> vMds; std::vector<Md> vMds;
vMds.push_back(md.value()); vMds.push_back(md.value());
const auto objUrls = fetchUrls(vMds); const auto objUrls = fetchUrls(vMds);
const auto obj = objUrls[0]; const auto obj = objUrls[0];
try { try {
std::string oid = obj.at("oid"); std::string oid = obj.at("oid");
std::string ourl = obj.at("actions").at("download").at("href"); std::string ourl = obj.at("actions").at("download").at("href");
std::string authHeader = ""; std::string authHeader = "";
if (obj.at("actions").at("download").contains("header") && obj.at("actions").at("download").at("header").contains("Authorization")) { if (obj.at("actions").at("download").contains("header")
authHeader = obj["actions"]["download"]["header"]["Authorization"]; && obj.at("actions").at("download").at("header").contains("Authorization")) {
authHeader = obj["actions"]["download"]["header"]["Authorization"];
}
// oid is also the sha256
downloadToSink(ourl, authHeader, sink, oid);
} catch (const nlohmann::json::out_of_range & e) {
std::stringstream ss;
ss << "bad json from /info/lfs/objects/batch: " << obj << " " << e.what();
throw std::runtime_error(ss.str());
} }
// oid is also the sha256
downloadToSink(ourl, authHeader, sink, oid);
} catch (const nlohmann::json::out_of_range& e) {
std::stringstream ss;
ss << "bad json from /info/lfs/objects/batch: " << obj << " " << e.what();
throw std::runtime_error(ss.str());
}
} }
} // namespace lfs } // namespace lfs