diff --git a/src/libfetchers-tests/git-utils.cc b/src/libfetchers-tests/git-utils.cc index 4740dbd52..6b3c1ca94 100644 --- a/src/libfetchers-tests/git-utils.cc +++ b/src/libfetchers-tests/git-utils.cc @@ -110,7 +110,6 @@ TEST_F(GitUtilsTest, sink_hardlink) } }; - namespace lfs { TEST_F(GitUtilsTest, parseGitRemoteUrl) @@ -196,7 +195,8 @@ TEST_F(GitUtilsTest, parseGitRemoteUrl) EXPECT_EQ(result.path, ""); } } -TEST_F(GitUtilsTest, gitUrlToHttp) { +TEST_F(GitUtilsTest, gitUrlToHttp) +{ { const GitUrl url = parseGitUrl("git@github.com:user/repo.git"); EXPECT_EQ(url.toHttp(), "https://github.com/user/repo.git"); @@ -219,7 +219,8 @@ TEST_F(GitUtilsTest, gitUrlToHttp) { } } -TEST_F(GitUtilsTest, gitUrlToSsh) { +TEST_F(GitUtilsTest, gitUrlToSsh) +{ { const GitUrl url = parseGitUrl("https://example.com/user/repo.git"); const auto [host, path] = url.toSsh(); @@ -234,9 +235,11 @@ TEST_F(GitUtilsTest, gitUrlToSsh) { } } -class FetchAttributeTest : public ::testing::Test { +class FetchAttributeTest : public ::testing::Test +{ protected: - void SetUp() override { + void SetUp() override + { // test literal (non-wildcard) matches too std::string content1 = "litfile filter=lfs diff=lfs merge=lfs -text"; auto rules1 = parseGitAttrFile(content1); @@ -251,23 +254,25 @@ protected: Fetch fetch2; }; -TEST_F(FetchAttributeTest, ExactMatch) { +TEST_F(FetchAttributeTest, ExactMatch) +{ EXPECT_TRUE(fetch1.hasAttribute("litfile", "filter", "lfs")); EXPECT_FALSE(fetch1.hasAttribute("other", "filter", "lfs")); } -TEST_F(FetchAttributeTest, WildcardMatch) { +TEST_F(FetchAttributeTest, WildcardMatch) +{ EXPECT_TRUE(fetch2.hasAttribute("match.wildcard", "filter", "lfs")); EXPECT_FALSE(fetch2.hasAttribute("nomatch.otherext", "filter", "lfs")); EXPECT_FALSE(fetch2.hasAttribute("nomatch.wildcard.extra", "filter", "lfs")); } -TEST_F(FetchAttributeTest, EmptyPath) { +TEST_F(FetchAttributeTest, EmptyPath) +{ EXPECT_FALSE(fetch1.hasAttribute("", "filter", "lfs")); EXPECT_FALSE(fetch2.hasAttribute("", "filter", "lfs")); } - } // namespace lfs } // namespace nix diff --git a/src/libfetchers/git-lfs-fetch.hh b/src/libfetchers/git-lfs-fetch.hh index 84cc056fa..eaf63393c 100644 --- a/src/libfetchers/git-lfs-fetch.hh +++ b/src/libfetchers/git-lfs-fetch.hh @@ -16,31 +16,34 @@ #include "processes.hh" #include "url.hh" - namespace fs = std::filesystem; namespace nix { namespace lfs { // see Fetch::rules -struct AttrRule { +struct AttrRule +{ std::string pattern; std::unordered_map attributes; - git_pathspec* pathspec = nullptr; + git_pathspec * pathspec = nullptr; AttrRule() = default; - explicit AttrRule(std::string pat) : pattern(std::move(pat)) { + explicit AttrRule(std::string pat) + : pattern(std::move(pat)) + { initPathspec(); } - ~AttrRule() { + ~AttrRule() + { if (pathspec) { git_pathspec_free(pathspec); } } - AttrRule(const AttrRule& other) + AttrRule(const AttrRule & other) : pattern(other.pattern) , attributes(other.attributes) , pathspec(nullptr) @@ -50,10 +53,11 @@ struct AttrRule { } } - void initPathspec() { + void initPathspec() + { git_strarray patterns = {0}; - const char* pattern_str = pattern.c_str(); - patterns.strings = const_cast(&pattern_str); + const char * pattern_str = pattern.c_str(); + patterns.strings = const_cast(&pattern_str); patterns.count = 1; if (git_pathspec_new(&pathspec, &patterns) != 0) { @@ -63,94 +67,99 @@ struct AttrRule { }; // git-lfs metadata about a file -struct Md { - std::string path; // fs path relative to repo root, no ./ prefix - std::string oid; // git-lfs managed object id. you give this to the lfs server - // for downloads - size_t size; // in bytes +struct Md +{ + std::string path; // fs path relative to repo root, no ./ prefix + std::string oid; // git-lfs managed object id. you give this to the lfs server + // for downloads + size_t size; // in bytes }; -struct GitUrl { +struct GitUrl +{ std::string protocol; std::string user; std::string host; std::string port; std::string path; - std::string toHttp() const { + std::string toHttp() const + { if (protocol.empty() || host.empty()) { return ""; } std::string prefix = ((protocol == "ssh") ? "https" : protocol) + "://"; - return prefix + host + - (port.empty() ? "" : ":" + port) + "/" + path; + return prefix + host + (port.empty() ? "" : ":" + port) + "/" + path; } // [host, path] - std::pair toSsh() const { + std::pair toSsh() const + { if (host.empty()) { return {"", ""}; } std::string userPart = user.empty() ? "" : user + "@"; - return { - userPart + host, - path - }; + return {userPart + host, path}; } }; -struct Fetch { - // only true after init() - bool ready = false; +struct Fetch +{ + // only true after init() + bool ready = false; - // from shelling out to ssh, used for 2 subsequent fetches: - // list of URLs to fetch from, and fetching the data itself - std::string token = ""; + // from shelling out to ssh, used for 2 subsequent fetches: + // list of URLs to fetch from, and fetching the data itself + std::string token = ""; - // derived from git remote url - GitUrl gitUrl = GitUrl{}; + // derived from git remote url + GitUrl gitUrl = GitUrl{}; - // parsed contents of .gitattributes - // .gitattributes contains a list of path patterns, and list of attributes (=key-value tags) for each pattern - // paths tagged with `filter=lfs` need to be smudged by downloading from lfs server - std::vector rules = {}; + // parsed contents of .gitattributes + // .gitattributes contains a list of path patterns, and list of attributes (=key-value tags) for each pattern + // paths tagged with `filter=lfs` need to be smudged by downloading from lfs server + std::vector rules = {}; - void init(git_repository* repo, const std::string& gitattributesContent); - bool hasAttribute(const std::string& path, const std::string& attrName, const std::string& attrValue) const; - void fetch(const git_blob* pointerBlob, const std::string& pointerFilePath, Sink& sink) const; - std::vector fetchUrls(const std::vector &metadatas) const; + void init(git_repository * repo, const std::string & gitattributesContent); + bool hasAttribute(const std::string & path, const std::string & attrName, const std::string & attrValue) const; + void fetch(const git_blob * pointerBlob, const std::string & pointerFilePath, Sink & sink) const; + std::vector fetchUrls(const std::vector & metadatas) const; }; - -static size_t writeCallback(void *contents, size_t size, size_t nmemb, - std::string *s) { - size_t newLength = size * nmemb; - s->append((char *)contents, newLength); - return newLength; +static size_t writeCallback(void * contents, size_t size, size_t nmemb, std::string * s) +{ + size_t newLength = size * nmemb; + s->append((char *) contents, newLength); + return newLength; } -struct SinkCallbackData { - Sink* sink; +struct SinkCallbackData +{ + Sink * sink; std::string_view sha256Expected; HashSink hashSink; - SinkCallbackData(Sink* sink, std::string_view sha256) + SinkCallbackData(Sink * sink, std::string_view sha256) : sink(sink) , sha256Expected(sha256) , hashSink(HashAlgorithm::SHA256) - {} + { + } }; -static size_t sinkWriteCallback(void *contents, size_t size, size_t nmemb, SinkCallbackData *data) { +static size_t sinkWriteCallback(void * contents, size_t size, size_t nmemb, SinkCallbackData * data) +{ size_t totalSize = size * nmemb; - data->hashSink({(char *)contents, totalSize}); - (*data->sink)({(char *)contents, totalSize}); + data->hashSink({(char *) contents, totalSize}); + (*data->sink)({(char *) contents, totalSize}); return totalSize; } // if authHeader is "", downloadToSink assumes to auth is expected -void downloadToSink(const std::string &url, const std::string &authHeader, Sink &sink, std::string_view sha256Expected) { - CURL *curl; +void downloadToSink( + const std::string & url, const std::string & authHeader, Sink & sink, std::string_view sha256Expected) +{ + CURL * curl; CURLcode res; curl = curl_easy_init(); @@ -161,7 +170,7 @@ void downloadToSink(const std::string &url, const std::string &authHeader, Sink curl_easy_setopt(curl, CURLOPT_WRITEDATA, &data); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - struct curl_slist *headers = nullptr; + struct curl_slist * headers = nullptr; if (!authHeader.empty()) { const std::string authHeader_prepend = "Authorization: " + authHeader; headers = curl_slist_append(headers, authHeader_prepend.c_str()); @@ -177,135 +186,140 @@ void downloadToSink(const std::string &url, const std::string &authHeader, Sink const auto sha256Actual = data.hashSink.finish().first.to_string(HashFormat::Base16, false); if (sha256Actual != data.sha256Expected) { - throw std::runtime_error("sha256 mismatch: while fetching " + url + ": expected " + std::string(data.sha256Expected) + " but got " + sha256Actual); + throw std::runtime_error( + "sha256 mismatch: while fetching " + url + ": expected " + std::string(data.sha256Expected) + " but got " + + sha256Actual); } curl_slist_free_all(headers); curl_easy_cleanup(curl); } +std::string getLfsApiToken(const GitUrl & u) +{ + const auto [maybeUserAndHost, path] = u.toSsh(); + auto [status, output] = runProgram(RunOptions{ + .program = "ssh", + .args = {maybeUserAndHost, "git-lfs-authenticate", path, "download"}, + }); -std::string getLfsApiToken(const GitUrl& u) { - const auto [maybeUserAndHost, path] = u.toSsh(); - auto [status, output] = runProgram(RunOptions { - .program = "ssh", - .args = {maybeUserAndHost, "git-lfs-authenticate", path, "download"}, - }); + if (output.empty()) + throw std::runtime_error( + "git-lfs-authenticate: no output (cmd: ssh " + maybeUserAndHost + " git-lfs-authenticate " + path + + " download)"); - if (output.empty()) - throw std::runtime_error("git-lfs-authenticate: no output (cmd: ssh " + maybeUserAndHost + " git-lfs-authenticate " + path + " download)"); + nlohmann::json query_resp = nlohmann::json::parse(output); + if (!query_resp.contains("header")) + throw std::runtime_error("no header in git-lfs-authenticate response"); + if (!query_resp["header"].contains("Authorization")) + throw std::runtime_error("no Authorization in git-lfs-authenticate response"); - nlohmann::json query_resp = nlohmann::json::parse(output); - if (!query_resp.contains("header")) - throw std::runtime_error("no header in git-lfs-authenticate response"); - if (!query_resp["header"].contains("Authorization")) - throw std::runtime_error("no Authorization in git-lfs-authenticate response"); + std::string res = query_resp["header"]["Authorization"].get(); - std::string res = query_resp["header"]["Authorization"].get(); - - return res; + return res; } -std::string getLfsEndpointUrl(git_repository *repo) { - int err; - git_remote* remote = NULL; - err = git_remote_lookup(&remote, repo, "origin"); - if (err < 0) { - return ""; - } - - const char *url_c_str = git_remote_url(remote); - if (!url_c_str) { - return ""; - } - - return std::string(url_c_str); -} - -std::string git_attr_value_to_string(git_attr_value_t value) { - switch (value) { - case GIT_ATTR_VALUE_UNSPECIFIED: - return "GIT_ATTR_VALUE_UNSPECIFIED"; - case GIT_ATTR_VALUE_TRUE: - return "GIT_ATTR_VALUE_TRUE"; - case GIT_ATTR_VALUE_FALSE: - return "GIT_ATTR_VALUE_FALSE"; - case GIT_ATTR_VALUE_STRING: - return "GIT_ATTR_VALUE_STRING"; - default: - return "Unknown value"; - } -} - - -std::optional parseLfsMetadata(const std::string &content, const std::string &filename) { - // https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md - // - // example git-lfs pointer file: - // version https://git-lfs.github.com/spec/v1 - // oid sha256:f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf - // size 10000000 - // (ending \n) - - if (!content.starts_with("version ")) { - // Invalid pointer file - return std::nullopt; - } - - if (!content.starts_with("version https://git-lfs.github.com/spec/v1")) { - // In case there's new spec versions in the future, but for now only v1 exists - debug("Invalid version found on potential lfs pointer file, skipping"); - return std::nullopt; - } - - std::istringstream iss(content); - std::string line; - - std::string oid; - std::string size; - - while (getline(iss, line)) { - if (line.starts_with("version ")) { - continue; - } - if (line.starts_with("oid sha256:")) { - oid = line.substr(11); // skip "oid sha256:" - continue; - } - if (line.starts_with("size ")) { - size = line.substr(5); // skip "size " - continue; +std::string getLfsEndpointUrl(git_repository * repo) +{ + int err; + git_remote * remote = NULL; + err = git_remote_lookup(&remote, repo, "origin"); + if (err < 0) { + return ""; } - debug("Custom extension '%s' found, ignoring", line); - } + const char * url_c_str = git_remote_url(remote); + if (!url_c_str) { + return ""; + } - if (oid.length() != 64 || !std::all_of(oid.begin(), oid.end(), ::isxdigit)) { - debug("Invalid sha256 %s, skipping", oid); - return std::nullopt; - } - - if (size.length() == 0 || !std::all_of(size.begin(), size.end(), ::isdigit)) { - debug("Invalid size %s, skipping", size); - return std::nullopt; - } - - return std::make_optional(Md{filename, oid, std::stoul(size)}); + return std::string(url_c_str); } +std::string git_attr_value_to_string(git_attr_value_t value) +{ + switch (value) { + case GIT_ATTR_VALUE_UNSPECIFIED: + return "GIT_ATTR_VALUE_UNSPECIFIED"; + case GIT_ATTR_VALUE_TRUE: + return "GIT_ATTR_VALUE_TRUE"; + case GIT_ATTR_VALUE_FALSE: + return "GIT_ATTR_VALUE_FALSE"; + case GIT_ATTR_VALUE_STRING: + return "GIT_ATTR_VALUE_STRING"; + default: + return "Unknown value"; + } +} -// there's already a ParseURL here https://github.com/b-camacho/nix/blob/ef6fa54e05cd4134ec41b0d64c1a16db46237f83/src/libutil/url.cc#L13 -// but that does not handle git's custom scp-like syntax -GitUrl parseGitUrl(const std::string& url) { +std::optional parseLfsMetadata(const std::string & content, const std::string & filename) +{ + // https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md + // + // example git-lfs pointer file: + // version https://git-lfs.github.com/spec/v1 + // oid sha256:f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf + // size 10000000 + // (ending \n) + + if (!content.starts_with("version ")) { + // Invalid pointer file + return std::nullopt; + } + + if (!content.starts_with("version https://git-lfs.github.com/spec/v1")) { + // In case there's new spec versions in the future, but for now only v1 exists + debug("Invalid version found on potential lfs pointer file, skipping"); + return std::nullopt; + } + + std::istringstream iss(content); + std::string line; + + std::string oid; + std::string size; + + while (getline(iss, line)) { + if (line.starts_with("version ")) { + continue; + } + if (line.starts_with("oid sha256:")) { + oid = line.substr(11); // skip "oid sha256:" + continue; + } + if (line.starts_with("size ")) { + size = line.substr(5); // skip "size " + continue; + } + + debug("Custom extension '%s' found, ignoring", line); + } + + if (oid.length() != 64 || !std::all_of(oid.begin(), oid.end(), ::isxdigit)) { + debug("Invalid sha256 %s, skipping", oid); + return std::nullopt; + } + + if (size.length() == 0 || !std::all_of(size.begin(), size.end(), ::isdigit)) { + debug("Invalid size %s, skipping", size); + return std::nullopt; + } + + return std::make_optional(Md{filename, oid, std::stoul(size)}); +} + +// there's already a ParseURL here +// https://github.com/b-camacho/nix/blob/ef6fa54e05cd4134ec41b0d64c1a16db46237f83/src/libutil/url.cc#L13 but that does +// not handle git's custom scp-like syntax +GitUrl parseGitUrl(const std::string & url) +{ GitUrl result; // regular protocols - const std::regex r_url( - R"(^(ssh|git|https?|ftps?)://(?:([^@]+)@)?([^:/]+)(?::(\d+))?/(.*))"); + const std::regex r_url(R"(^(ssh|git|https?|ftps?)://(?:([^@]+)@)?([^:/]+)(?::(\d+))?/(.*))"); // "alternative scp-like syntax" https://git-scm.com/docs/git-fetch#_git_urls - const std::regex r_scp_like_url( - R"(^(?:([^@]+)@)?([^:/]+):(/?.*))"); + const std::regex r_scp_like_url(R"(^(?:([^@]+)@)?([^:/]+):(/?.*))"); std::smatch matches; if (std::regex_match(url, matches, r_url)) { @@ -314,8 +328,7 @@ GitUrl parseGitUrl(const std::string& url) { result.host = matches[3].str(); result.port = matches[4].str(); result.path = matches[5].str(); - } - else if (std::regex_match(url, matches, r_scp_like_url)) { + } else if (std::regex_match(url, matches, r_scp_like_url)) { result.protocol = "ssh"; result.user = matches[1].str(); @@ -326,7 +339,7 @@ GitUrl parseGitUrl(const std::string& url) { return result; } -std::vector parseGitAttrFile(const std::string& content) +std::vector parseGitAttrFile(const std::string & content) { std::vector rules; std::string content_str(content); @@ -348,14 +361,15 @@ std::vector parseGitAttrFile(const std::string& content) rule.pattern = line.substr(0, pattern_end); git_strarray patterns = {0}; - const char* pattern_str = rule.pattern.c_str(); - patterns.strings = const_cast(&pattern_str); + const char * pattern_str = rule.pattern.c_str(); + patterns.strings = const_cast(&pattern_str); patterns.count = 1; if (git_pathspec_new(&rule.pathspec, &patterns) != 0) { auto error = git_error_last(); std::stringstream ss; - ss << "git_pathspec_new parsing '" << line << "': " << (error ? error->message : "unknown error") << std::endl; + ss << "git_pathspec_new parsing '" << line << "': " << (error ? error->message : "unknown error") + << std::endl; warn(ss.str()); continue; } @@ -396,26 +410,25 @@ std::vector parseGitAttrFile(const std::string& content) return rules; } -void Fetch::init(git_repository* repo, const std::string& gitattributesContent) { - const auto remoteUrl = lfs::getLfsEndpointUrl(repo); +void Fetch::init(git_repository * repo, const std::string & gitattributesContent) +{ + const auto remoteUrl = lfs::getLfsEndpointUrl(repo); - this->gitUrl = parseGitUrl(remoteUrl); - if (this->gitUrl.protocol == "ssh") { - this->token = lfs::getLfsApiToken(this->gitUrl); - } - this->rules = lfs::parseGitAttrFile(gitattributesContent); - this->ready = true; + this->gitUrl = parseGitUrl(remoteUrl); + if (this->gitUrl.protocol == "ssh") { + this->token = lfs::getLfsApiToken(this->gitUrl); + } + this->rules = lfs::parseGitAttrFile(gitattributesContent); + this->ready = true; } - -bool Fetch::hasAttribute(const std::string& path, const std::string& attrName, const std::string& attrValue) const +bool Fetch::hasAttribute(const std::string & path, const std::string & attrName, const std::string & attrValue) const { for (auto it = rules.rbegin(); it != rules.rend(); ++it) { int match = git_pathspec_matches_path( it->pathspec, 0, // no flags - path.c_str() - ); + path.c_str()); if (match > 0) { auto attr = it->attributes.find(attrName); @@ -428,123 +441,125 @@ bool Fetch::hasAttribute(const std::string& path, const std::string& attrName, c return false; } -nlohmann::json mdToPayload(const std::vector &items) { - nlohmann::json jArray = nlohmann::json::array(); - for (const auto &md : items) { - jArray.push_back({{"oid", md.oid}, {"size", md.size}}); - } - return jArray; +nlohmann::json mdToPayload(const std::vector & items) +{ + nlohmann::json jArray = nlohmann::json::array(); + for (const auto & md : items) { + jArray.push_back({{"oid", md.oid}, {"size", md.size}}); + } + return jArray; } +std::vector Fetch::fetchUrls(const std::vector & metadatas) const +{ + nlohmann::json oidList = mdToPayload(metadatas); + nlohmann::json data = { + {"operation", "download"}, + }; + data["objects"] = oidList; + auto dataStr = data.dump(); -std::vector Fetch::fetchUrls(const std::vector &metadatas) const { - nlohmann::json oidList = mdToPayload(metadatas); - nlohmann::json data = { - {"operation", "download"}, - }; - data["objects"] = oidList; - auto dataStr = data.dump(); + CURL * curl = curl_easy_init(); + char curlErrBuf[CURL_ERROR_SIZE]; + curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curlErrBuf); + std::string responseString; + std::string headerString; + const auto lfsUrlBatch = gitUrl.toHttp() + "/info/lfs/objects/batch"; + curl_easy_setopt(curl, CURLOPT_URL, lfsUrlBatch.c_str()); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, dataStr.c_str()); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - CURL *curl = curl_easy_init(); - char curlErrBuf[CURL_ERROR_SIZE]; - curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curlErrBuf); - std::string responseString; - std::string headerString; - const auto lfsUrlBatch = gitUrl.toHttp() + "/info/lfs/objects/batch"; - curl_easy_setopt(curl, CURLOPT_URL, lfsUrlBatch.c_str()); - curl_easy_setopt(curl, CURLOPT_POSTFIELDS, dataStr.c_str()); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - - struct curl_slist *headers = NULL; - if (this->token != "") { - const auto authHeader = "Authorization: " + token; - headers = curl_slist_append(headers, authHeader.c_str()); - } - - headers = - curl_slist_append(headers, "Content-Type: application/vnd.git-lfs+json"); - headers = curl_slist_append(headers, "Accept: application/vnd.git-lfs+json"); - curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); - curl_easy_setopt(curl, CURLOPT_POST, 1L); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &responseString); - - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) { - std::stringstream ss; - ss << "lfs::fetchUrls: bad response from info/lfs/objects/batch: code " << res << " " << curlErrBuf; - throw std::runtime_error(ss.str()); - } - - curl_easy_cleanup(curl); - curl_slist_free_all(headers); - - std::vector objects; - // example resp here: - // {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic - // Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]} - - try { - auto resp = nlohmann::json::parse(responseString); - if (resp.contains("objects")) { - objects.insert(objects.end(), resp["objects"].begin(), - resp["objects"].end()); - } else { - throw std::runtime_error("response does not contain 'objects'"); + struct curl_slist * headers = NULL; + if (this->token != "") { + const auto authHeader = "Authorization: " + token; + headers = curl_slist_append(headers, authHeader.c_str()); } - return objects; - } catch (const nlohmann::json::parse_error& e) { + headers = curl_slist_append(headers, "Content-Type: application/vnd.git-lfs+json"); + headers = curl_slist_append(headers, "Accept: application/vnd.git-lfs+json"); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt(curl, CURLOPT_POST, 1L); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &responseString); + + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + std::stringstream ss; + ss << "lfs::fetchUrls: bad response from info/lfs/objects/batch: code " << res << " " << curlErrBuf; + throw std::runtime_error(ss.str()); + } + + curl_easy_cleanup(curl); + curl_slist_free_all(headers); + + std::vector objects; + // example resp here: + // {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic + // Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]} + + try { + auto resp = nlohmann::json::parse(responseString); + if (resp.contains("objects")) { + objects.insert(objects.end(), resp["objects"].begin(), resp["objects"].end()); + } else { + throw std::runtime_error("response does not contain 'objects'"); + } + + return objects; + } catch (const nlohmann::json::parse_error & e) { std::stringstream ss; ss << "response did not parse as json: " << responseString; throw std::runtime_error(ss.str()); - - } + } } -void Fetch::fetch(const git_blob* pointerBlob, const std::string& pointerFilePath, Sink& sink) const { - constexpr size_t chunkSize = 128 * 1024; // 128 KiB - auto size = git_blob_rawsize(pointerBlob); +void Fetch::fetch(const git_blob * pointerBlob, const std::string & pointerFilePath, Sink & sink) const +{ + constexpr size_t chunkSize = 128 * 1024; // 128 KiB + auto size = git_blob_rawsize(pointerBlob); - if (size >= 1024) { - debug("Skip git-lfs, pointer file too large"); - warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath); - for (size_t offset = 0; offset < size; offset += chunkSize) { - sink(std::string((const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset))); + if (size >= 1024) { + debug("Skip git-lfs, pointer file too large"); + warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath); + for (size_t offset = 0; offset < size; offset += chunkSize) { + sink(std::string( + (const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset))); + } + return; } - return; - } - const auto pointerFileContents = std::string((const char *) git_blob_rawcontent(pointerBlob), size); - const auto md = parseLfsMetadata(std::string(pointerFileContents), std::string(pointerFilePath)); - if (md == std::nullopt) { - debug("Skip git-lfs, invalid pointer file"); - warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath); - for (size_t offset = 0; offset < size; offset += chunkSize) { - sink(std::string((const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset))); + const auto pointerFileContents = std::string((const char *) git_blob_rawcontent(pointerBlob), size); + const auto md = parseLfsMetadata(std::string(pointerFileContents), std::string(pointerFilePath)); + if (md == std::nullopt) { + debug("Skip git-lfs, invalid pointer file"); + warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath); + for (size_t offset = 0; offset < size; offset += chunkSize) { + sink(std::string( + (const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset))); + } + return; } - return; - } - std::vector vMds; - vMds.push_back(md.value()); - const auto objUrls = fetchUrls(vMds); + std::vector vMds; + vMds.push_back(md.value()); + const auto objUrls = fetchUrls(vMds); - const auto obj = objUrls[0]; - try { - std::string oid = obj.at("oid"); - std::string ourl = obj.at("actions").at("download").at("href"); - std::string authHeader = ""; - if (obj.at("actions").at("download").contains("header") && obj.at("actions").at("download").at("header").contains("Authorization")) { - authHeader = obj["actions"]["download"]["header"]["Authorization"]; + const auto obj = objUrls[0]; + try { + std::string oid = obj.at("oid"); + std::string ourl = obj.at("actions").at("download").at("href"); + std::string authHeader = ""; + if (obj.at("actions").at("download").contains("header") + && obj.at("actions").at("download").at("header").contains("Authorization")) { + authHeader = obj["actions"]["download"]["header"]["Authorization"]; + } + // oid is also the sha256 + downloadToSink(ourl, authHeader, sink, oid); + } catch (const nlohmann::json::out_of_range & e) { + std::stringstream ss; + ss << "bad json from /info/lfs/objects/batch: " << obj << " " << e.what(); + throw std::runtime_error(ss.str()); } - // oid is also the sha256 - downloadToSink(ourl, authHeader, sink, oid); - } catch (const nlohmann::json::out_of_range& e) { - std::stringstream ss; - ss << "bad json from /info/lfs/objects/batch: " << obj << " " << e.what(); - throw std::runtime_error(ss.str()); - } } } // namespace lfs