1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-27 00:11:17 +02:00

working smudge for GitSourceAccessor

This commit is contained in:
Brian Camacho 2024-11-01 03:37:48 -04:00
parent 87e0bc9e21
commit f4962fe59b
7 changed files with 299 additions and 201 deletions

View file

@ -11,10 +11,17 @@
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
namespace fs = std::filesystem; namespace fs = std::filesystem;
namespace lfs { namespace lfs {
// see Fetch::rules
struct AttrRule {
std::string pattern;
std::map<std::string, std::string> attributes;
};
// git-lfs metadata about a file // git-lfs metadata about a file
struct Md { struct Md {
std::string path; // fs path relative to repo root, no ./ prefix std::string path; // fs path relative to repo root, no ./ prefix
@ -23,6 +30,183 @@ struct Md {
size_t size; // in bytes size_t size; // in bytes
}; };
struct Fetch {
// only true after init()
bool ready = false;
// from shelling out to ssh, used for 2 subsequent fetches:
// list of URLs to fetch from, and fetching the data itself
std::string token = "";
// this is the URL you hit to get another list of URLs for subsequent fetches
// e.g. https://github.com/owner/repo.git/info/lfs/objects/batch
// determined from the git remote
std::string rootUrl = "";
// parsed contents of .gitattributes
// .gitattributes contains a list of path patterns, and list of attributes (=key-value tags) for each pattern
// paths tagged with `filter=lfs` need to be smudged by downloading from lfs server
std::vector<AttrRule> rules = {};
void init(git_repository* repo, std::string gitattributesContent);
bool hasAttribute(const std::string& path, const std::string& attrName) const;
std::string download(const std::string& data, const std::string& path) const;
std::vector<nlohmann::json> fetch_urls(const std::vector<Md> &metadatas) const;
};
bool matchesPattern(std::string_view path, std::string_view pattern) {
if (pattern.ends_with("/**")) {
auto prefix = pattern.substr(0, pattern.length() - 3);
return path.starts_with(prefix);
}
size_t patternPos = 0;
size_t pathPos = 0;
while (patternPos < pattern.length() && pathPos < path.length()) {
if (pattern[patternPos] == '*') {
// For "*.ext" pattern, match against end of path
if (patternPos == 0 && pattern.find('*', 1) == std::string_view::npos) {
return path.ends_with(pattern.substr(1));
}
auto nextPatternChar = pattern[patternPos + 1];
while (pathPos < path.length() && path[pathPos] != nextPatternChar) {
pathPos++;
}
patternPos++;
} else if (pattern[patternPos] == path[pathPos]) {
patternPos++;
pathPos++;
} else {
return false;
}
}
return patternPos == pattern.length() && pathPos == path.length();
}
static size_t WriteCallback(void *contents, size_t size, size_t nmemb,
std::string *s) {
size_t newLength = size * nmemb;
try {
s->append((char *)contents, newLength);
} catch (std::bad_alloc &e) {
// Handle memory bad_alloc error
return 0;
}
return newLength;
}
std::string download_to_memory(const std::string &url, const std::string &auth_header) {
CURL *curl;
CURLcode res;
std::string response_string;
curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
struct curl_slist *headers = nullptr;
const std::string auth_header_prepend = "Authorization: " + auth_header;
headers = curl_slist_append(headers, auth_header_prepend.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
std::cerr << "curl_easy_perform() failed: " << curl_easy_strerror(res)
<< std::endl;
}
curl_slist_free_all(headers);
curl_easy_cleanup(curl);
}
return response_string;
}
// generic parser for .gitattributes files
// I could not get libgit2 parsing to work with GitSourceAccessor,
// but GitExportIgnoreSourceAccessor somehow works, so TODO
AttrRule parseGitAttrLine(std::string_view line)
{
// example .gitattributes line: `*.beeg filter=lfs diff=lfs merge=lfs -text`
AttrRule rule;
if (line.empty() || line[0] == '#')
return rule;
size_t pos = line.find_first_of(" \t");
if (pos == std::string_view::npos)
return rule;
rule.pattern = std::string(line.substr(0, pos));
pos = line.find_first_not_of(" \t", pos);
if (pos == std::string_view::npos)
return rule;
std::string_view rest = line.substr(pos);
while (!rest.empty()) {
pos = rest.find_first_of(" \t");
std::string_view attr = rest.substr(0, pos);
if (attr[0] == '-') {
rule.attributes[std::string(attr.substr(1))] = "false";
} else if (auto equals_pos = attr.find('='); equals_pos != std::string_view::npos) {
auto key = attr.substr(0, equals_pos);
auto value = attr.substr(equals_pos + 1);
rule.attributes[std::string(key)] = std::string(value);
} else {
rule.attributes[std::string(attr)] = "true";
}
if (pos == std::string_view::npos)
break;
rest = rest.substr(pos);
pos = rest.find_first_not_of(" \t");
if (pos == std::string_view::npos)
break;
rest = rest.substr(pos);
}
return rule;
}
std::vector<AttrRule> parseGitAttrFile(std::string_view content)
{
std::vector<AttrRule> rules;
size_t pos = 0;
while (pos < content.length()) {
size_t eol = content.find('\n', pos);
std::string_view line;
if (eol == std::string_view::npos) {
line = content.substr(pos);
pos = content.length();
} else {
line = content.substr(pos, eol - pos);
pos = eol + 1;
}
// Trim carriage return if present
if (!line.empty() && line.back() == '\r')
line.remove_suffix(1);
// Skip empty lines and comments
if (line.empty() || line[0] == '#')
continue;
auto rule = parseGitAttrLine(line);
if (!rule.pattern.empty())
rules.push_back(std::move(rule));
}
return rules;
}
std::string exec_command(const std::string &cmd) { std::string exec_command(const std::string &cmd) {
std::cout << cmd << std::endl; std::cout << cmd << std::endl;
std::string data; std::string data;
@ -57,56 +241,46 @@ std::string get_lfs_api_token(const std::string &host,
return res; return res;
} }
std::tuple<std::string, std::string, std::string> std::string get_lfs_endpoint_url(git_repository *repo) {
get_lfs_endpoint_url(git_repository *repo) {
int err; int err;
git_config *config = nullptr; git_remote* remote = NULL;
// using git_repository_config instead of _snapshot makes the err = git_remote_lookup(&remote, repo, "origin"); // TODO: we just created this repo so I think origin is always the only remote, but should double check
// git_config_get_string call fail why????
if (git_repository_config_snapshot(&config, repo) < 0) {
// Handle error if necessary
git_config_free(config);
std::cerr << "no config" << std::endl;
return {"", "", ""};
}
const char *url_c_str;
err = git_config_get_string(&url_c_str, config, "remote.origin.url");
if (err < 0) { if (err < 0) {
// Handle error if necessary std::cerr << " failed git_remote_lookup with: " << err << std::endl;
std::cerr << "no remote.origin.url: " << err << std::endl; return "";
git_config_free(config);
return {"", "", ""};
}
std::string url = std::string(url_c_str);
std::cerr << "url_c_str: " << url_c_str << std::endl;
if (url.back() == '/') {
url.pop_back();
} }
// idk what this was for man
// if (url.compare(url.length() - 4, 4, ".git") != 0) {
// url += "/info/lfs";
//} else {
// url += ".git/info/lfs";
//}
// Parse the URL const char *url_c_str = git_remote_url(remote);
std::string scheme, host, path; if (!url_c_str) {
if (url.find("https://") != 0) { std::cerr << "no remote url ";
size_t at_pos = url.find('@'); return "";
if (at_pos != std::string::npos) {
host = url.substr(at_pos + 1);
size_t colon_pos = host.find(':');
path = host.substr(colon_pos + 1);
host = host.substr(0, colon_pos);
scheme = "https";
url = scheme + "://" + host + "/" + path;
}
} }
return std::make_tuple(url, host, path); return std::string(url_c_str);
}
// splits url into (hostname, path)
std::tuple<std::string, std::string> split_url(const std::string& url_in) {
CURLU *url = curl_url();
if (curl_url_set(url, CURLUPART_URL, url_in.c_str(), 0) != CURLUE_OK) {
std::cerr << "Failed to set URL\n";
return {"", ""};
}
char *hostname;
char *path;
if (curl_url_get(url, CURLUPART_HOST, &hostname, 0) != CURLUE_OK) {
std::cerr << "no hostname" << std::endl;
}
if (curl_url_get(url, CURLUPART_PATH, &path, 0) != CURLUE_OK) {
std::cerr << "no path" << std::endl;
}
return std::make_tuple(std::string(hostname), std::string(path));
} }
std::string git_attr_value_to_string(git_attr_value_t value) { std::string git_attr_value_to_string(git_attr_value_t value) {
@ -201,7 +375,7 @@ std::string get_obj_content(git_repository *repo, std::string path) {
return std::string(static_cast<const char *>(content)); return std::string(static_cast<const char *>(content));
} }
std::tuple<std::string, size_t> parse_lfs_metadata(const std::string &content) { Md parse_lfs_metadata(const std::string &content, const std::string &filename) {
std::istringstream iss(content); std::istringstream iss(content);
std::string line; std::string line;
std::string oid; std::string oid;
@ -223,36 +397,33 @@ std::tuple<std::string, size_t> parse_lfs_metadata(const std::string &content) {
} }
} }
return std::make_tuple(oid, size); return Md{filename, oid, size};
} }
// path, oid, size void Fetch::init(git_repository* repo, std::string gitattributesContent) {
std::vector<Md> parse_lfs_files(git_repository *repo) { this->rootUrl = lfs::get_lfs_endpoint_url(repo);
const auto files = find_lfs_files(repo); const auto [host, path] = lfs::split_url(rootUrl);
std::vector<Md> out; this->token = lfs::get_lfs_api_token(host, path);
for (const auto &file : *files) { this->rules = lfs::parseGitAttrFile(gitattributesContent);
std::cerr << file; this->ready = true;
auto content = get_obj_content(repo, file);
auto [oid, size] = parse_lfs_metadata(content);
out.push_back(Md{file, oid, size});
}
return out;
} }
static size_t WriteCallback(void *contents, size_t size, size_t nmemb, bool Fetch::hasAttribute(const std::string& path, const std::string& attrName) const
std::string *s) { {
size_t newLength = size * nmemb; // Iterate rules in reverse order (last matching rule wins)
try { for (auto it = rules.rbegin(); it != rules.rend(); ++it) {
s->append((char *)contents, newLength); if (matchesPattern(path, it->pattern)) {
} catch (std::bad_alloc &e) { auto attr = it->attributes.find(attrName);
// Handle memory bad_alloc error if (attr != it->attributes.end()) {
return 0; // Found a matching rule with this attribute
} return attr->second != "false";
return newLength; }
}
}
return false;
} }
nlohmann::json oids_to_payload(const std::vector<Md> &items) { nlohmann::json mdToPayload(const std::vector<Md> &items) {
nlohmann::json j_array = nlohmann::json::array(); nlohmann::json j_array = nlohmann::json::array();
for (const auto &md : items) { for (const auto &md : items) {
j_array.push_back({{"oid", md.oid}, {"size", md.size}}); j_array.push_back({{"oid", md.oid}, {"size", md.size}});
@ -260,23 +431,20 @@ nlohmann::json oids_to_payload(const std::vector<Md> &items) {
return j_array; return j_array;
} }
std::vector<nlohmann::json> fetch_urls(const std::string &lfs_url, std::vector<nlohmann::json> Fetch::fetch_urls(const std::vector<Md> &metadatas) const {
const std::string &token,
const std::vector<Md> &metadatas) {
std::vector<nlohmann::json> objects; std::vector<nlohmann::json> objects;
nlohmann::json oid_list = oids_to_payload(metadatas); nlohmann::json oid_list = mdToPayload(metadatas);
nlohmann::json data = { nlohmann::json data = {
{"operation", "download"}, {"operation", "download"},
}; };
data["objects"] = oid_list; data["objects"] = oid_list;
auto data_str = data.dump(); auto data_str = data.dump();
std::cerr << "data_str: " + data_str << std::endl;
CURL *curl = curl_easy_init(); CURL *curl = curl_easy_init();
std::string response_string; std::string response_string;
std::string header_string; std::string header_string;
auto lfs_url_batch = lfs_url + "/info/lfs/objects/batch"; auto lfs_url_batch = rootUrl + "/info/lfs/objects/batch";
curl_easy_setopt(curl, CURLOPT_URL, lfs_url_batch.c_str()); curl_easy_setopt(curl, CURLOPT_URL, lfs_url_batch.c_str());
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data_str.c_str()); curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data_str.c_str());
@ -289,7 +457,6 @@ std::vector<nlohmann::json> fetch_urls(const std::string &lfs_url,
headers = curl_slist_append(headers, "Accept: application/vnd.git-lfs+json"); headers = curl_slist_append(headers, "Accept: application/vnd.git-lfs+json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POST, 1L); curl_easy_setopt(curl, CURLOPT_POST, 1L);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
@ -303,7 +470,6 @@ std::vector<nlohmann::json> fetch_urls(const std::string &lfs_url,
curl_slist_free_all(headers); curl_slist_free_all(headers);
try { try {
std::cerr << "resp: " << response_string << std::endl;
// example resp here: // example resp here:
// {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic // {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic
// Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]} // Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]}
@ -322,77 +488,21 @@ std::vector<nlohmann::json> fetch_urls(const std::string &lfs_url,
return objects; return objects;
} }
static size_t WriteData(void *ptr, size_t size, size_t nmemb, void *stream) { std::string Fetch::download(const std::string& pointer_data, const std::string& path) const {
size_t written = fwrite(ptr, size, nmemb, (FILE *)stream); const auto md = parse_lfs_metadata(pointer_data, path);
return written; std::vector<Md> v_mds;
v_mds.push_back(md);
const auto obj_urls = fetch_urls(v_mds);
const auto obj = obj_urls[0];
std::string oid = obj["oid"];
std::string ourl = obj["actions"]["download"]["href"];
std::string auth_header =
obj["actions"]["download"]["header"]["Authorization"];
const auto data = lfs::download_to_memory(ourl, auth_header);
return data;
} }
void download_file(const std::string &url, const std::string &auth_header,
const std::string &output_filename) {
CURL *curl;
FILE *fp;
CURLcode res;
curl = curl_easy_init();
if (curl) {
fp = fopen(output_filename.c_str(), "wb");
if (fp == nullptr) {
std::cerr << "Failed to open file for writing: " << output_filename
<< std::endl;
return;
}
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteData);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
struct curl_slist *headers = nullptr;
const std::string auth_header_prepend = "Authorization: " + auth_header;
headers = curl_slist_append(headers, auth_header_prepend.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
std::cerr << "curl_easy_perform() failed: " << curl_easy_strerror(res)
<< std::endl;
}
curl_slist_free_all(headers);
curl_easy_cleanup(curl);
fclose(fp);
}
}
void download_files(nlohmann::json objects, std::string dir) {
for (auto &obj : objects) {
std::string oid = obj["oid"];
std::string url = obj["actions"]["download"]["href"];
std::string auth_header =
obj["actions"]["download"]["header"]["Authorization"];
download_file(url, auth_header, dir + "/" + oid);
}
}
// moves files from temporary download dir to final location
void move_files(const std::vector<Md> &metadata,
const std::string &sourceDir, const std::string &repoRoot) {
namespace fs = std::filesystem;
for (const auto &md : metadata) {
fs::path srcFile =
fs::path(sourceDir) / md.oid; // Construct the source file path
fs::path destFile =
fs::path(repoRoot) / md.path; // Construct the destination file path
// Move the file
try {
fs::rename(srcFile, destFile);
} catch (const fs::filesystem_error &e) {
std::cerr << "Error moving file " << srcFile << " to " << destFile << ": "
<< e.what() << std::endl;
}
}
}
} // namespace lfs } // namespace lfs

View file

@ -328,8 +328,6 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) override; std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) override;
void smudgeLfs() override;
std::string resolveSubmoduleUrl(const std::string & url) override std::string resolveSubmoduleUrl(const std::string & url) override
{ {
git_buf buf = GIT_BUF_INIT; git_buf buf = GIT_BUF_INIT;
@ -358,9 +356,9 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
/** /**
* A 'GitSourceAccessor' with no regard for export-ignore or any other transformations. * A 'GitSourceAccessor' with no regard for export-ignore or any other transformations.
*/ */
ref<GitSourceAccessor> getRawAccessor(const Hash & rev); ref<GitSourceAccessor> getRawAccessor(const Hash & rev, bool smudgeLfs);
ref<SourceAccessor> getAccessor(const Hash & rev, bool exportIgnore) override; ref<SourceAccessor> getAccessor(const Hash & rev, bool exportIgnore, bool smudgeLfs) override;
ref<SourceAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override; ref<SourceAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override;
@ -476,7 +474,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
Hash treeHashToNarHash(const Hash & treeHash) override Hash treeHashToNarHash(const Hash & treeHash) override
{ {
auto accessor = getAccessor(treeHash, false); auto accessor = getAccessor(treeHash, false, false);
fetchers::Cache::Key cacheKey{"treeHashToNarHash", {{"treeHash", treeHash.gitRev()}}}; fetchers::Cache::Key cacheKey{"treeHashToNarHash", {{"treeHash", treeHash.gitRev()}}};
@ -517,24 +515,16 @@ ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create,
* Raw git tree input accessor. * Raw git tree input accessor.
*/ */
static int attr_callback(const char *name, const char *value, void *payload) {
warn("got an attribute! it's %s = %s", name, value);
return 0;
//// Check if the attribute is a filter attribute
//if (strncmp(name, "filter.", 7) == 0) {
// printf("Filter attribute: %s\n", name);
//}
//return 0; // Continue iterating
}
struct GitSourceAccessor : SourceAccessor struct GitSourceAccessor : SourceAccessor
{ {
ref<GitRepoImpl> repo; ref<GitRepoImpl> repo;
Object root; Object root;
std::optional<lfs::Fetch> lfsFetch;
GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev) GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev, std::optional<lfs::Fetch> lfsFetch)
: repo(repo_) : repo(repo_)
, root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get())) , root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get()))
, lfsFetch(lfsFetch)
{ {
} }
@ -542,20 +532,24 @@ struct GitSourceAccessor : SourceAccessor
{ {
auto blob = getBlob(path, symlink); auto blob = getBlob(path, symlink);
int error; auto data = std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get()));
// read filters here, perform smudge
// TODO: fix git_attr_foreach here, it can't seem to parse `.gitattributes` here even though it should if (path != CanonPath(".gitattributes") && lfsFetch) {
warn("on path %s", path.abs().c_str()); auto& _lfsFetch = *lfsFetch;
if ((error = git_attr_foreach(&(*(*repo).repo), GIT_ATTR_CHECK_INCLUDE_HEAD, path.rel_c_str(), attr_callback, NULL)) < 0) { if (!_lfsFetch.ready) {
warn("git_attr_foreach: %s", git_error_last()->message); const auto contents = readFile(CanonPath(".gitattributes"));
} _lfsFetch.init(*repo, contents);
};
if (_lfsFetch.hasAttribute(path.abs(), "filter")) {
return _lfsFetch.download(data, path.abs());
}
}
auto data = std::string_view((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get())); return data;
return std::string(data);
} }
// TODO(b-camacho): implement callback-based readFile override
std::string readFile(const CanonPath & path) override std::string readFile(const CanonPath & path) override
{ {
return readBlob(path, false); return readBlob(path, false);
@ -730,6 +724,7 @@ struct GitSourceAccessor : SourceAccessor
return tree; return tree;
} }
Blob getBlob(const CanonPath & path, bool expectSymlink) Blob getBlob(const CanonPath & path, bool expectSymlink)
{ {
if (!expectSymlink && git_object_type(root.get()) == GIT_OBJECT_BLOB) if (!expectSymlink && git_object_type(root.get()) == GIT_OBJECT_BLOB)
@ -748,6 +743,7 @@ struct GitSourceAccessor : SourceAccessor
auto entry = need(path); auto entry = need(path);
if (git_tree_entry_type(entry) != GIT_OBJECT_BLOB) if (git_tree_entry_type(entry) != GIT_OBJECT_BLOB)
notExpected(); notExpected();
@ -769,6 +765,7 @@ struct GitSourceAccessor : SourceAccessor
}; };
struct GitExportIgnoreSourceAccessor : CachingFilteringSourceAccessor { struct GitExportIgnoreSourceAccessor : CachingFilteringSourceAccessor {
ref<GitRepoImpl> repo; ref<GitRepoImpl> repo;
std::optional<Hash> rev; std::optional<Hash> rev;
@ -1035,16 +1032,24 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
} }
}; };
ref<GitSourceAccessor> GitRepoImpl::getRawAccessor(const Hash & rev) ref<GitSourceAccessor> GitRepoImpl::getRawAccessor(const Hash & rev, bool smudgeLfs)
{ {
auto self = ref<GitRepoImpl>(shared_from_this()); auto self = ref<GitRepoImpl>(shared_from_this());
return make_ref<GitSourceAccessor>(self, rev); if (smudgeLfs) {
auto lfsFetch = lfs::Fetch{};
return make_ref<GitSourceAccessor>(self, rev, std::make_optional(lfsFetch));
}
else {
return make_ref<GitSourceAccessor>(self, rev, std::nullopt);
}
} }
ref<SourceAccessor> GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore) ref<SourceAccessor> GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore, bool smudgeLfs)
{ {
// exportIgnore does not work with git-lfs smudging
//assert(!(exportIgnore && smudgeLfs));
auto self = ref<GitRepoImpl>(shared_from_this()); auto self = ref<GitRepoImpl>(shared_from_this());
ref<GitSourceAccessor> rawGitAccessor = getRawAccessor(rev); ref<GitSourceAccessor> rawGitAccessor = getRawAccessor(rev, smudgeLfs);
if (exportIgnore) { if (exportIgnore) {
return make_ref<GitExportIgnoreSourceAccessor>(self, rawGitAccessor, rev); return make_ref<GitExportIgnoreSourceAccessor>(self, rawGitAccessor, rev);
} }
@ -1083,7 +1088,7 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
/* Read the .gitmodules files from this revision. */ /* Read the .gitmodules files from this revision. */
CanonPath modulesFile(".gitmodules"); CanonPath modulesFile(".gitmodules");
auto accessor = getAccessor(rev, exportIgnore); auto accessor = getAccessor(rev, exportIgnore, false);
if (!accessor->pathExists(modulesFile)) return {}; if (!accessor->pathExists(modulesFile)) return {};
/* Parse it and get the revision of each submodule. */ /* Parse it and get the revision of each submodule. */
@ -1094,7 +1099,7 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
std::vector<std::tuple<Submodule, Hash>> result; std::vector<std::tuple<Submodule, Hash>> result;
auto rawAccessor = getRawAccessor(rev); auto rawAccessor = getRawAccessor(rev, false);
for (auto & submodule : parseSubmodules(pathTemp)) { for (auto & submodule : parseSubmodules(pathTemp)) {
auto rev = rawAccessor->getSubmoduleRev(submodule.path); auto rev = rawAccessor->getSubmoduleRev(submodule.path);
@ -1104,20 +1109,6 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
return result; return result;
} }
void GitRepoImpl::smudgeLfs() {
const auto metadatas = lfs::parse_lfs_files(&(*repo));
const auto [url, host, path] = lfs::get_lfs_endpoint_url(&(*repo));
// TODO: handle public lfs repos without ssh?
const auto token = lfs::get_lfs_api_token(host, path);
auto urls = lfs::fetch_urls(url, token, metadatas);
std::cerr << "Got urls! ";
for (const auto &url : urls) {
std::cerr << url << std::endl;
}
std::cerr << "Fetching actual data" << std::endl;
lfs::download_files(urls, this->path);
}
ref<GitRepo> getTarballCache() ref<GitRepo> getTarballCache()
{ {
static auto repoDir = std::filesystem::path(getCacheDir()) / "nix" / "tarball-cache"; static auto repoDir = std::filesystem::path(getCacheDir()) / "nix" / "tarball-cache";

View file

@ -70,13 +70,11 @@ struct GitRepo
*/ */
virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) = 0; virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) = 0;
virtual void smudgeLfs() = 0;
virtual std::string resolveSubmoduleUrl(const std::string & url) = 0; virtual std::string resolveSubmoduleUrl(const std::string & url) = 0;
virtual bool hasObject(const Hash & oid) = 0; virtual bool hasObject(const Hash & oid) = 0;
virtual ref<SourceAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0; virtual ref<SourceAccessor> getAccessor(const Hash & rev, bool exportIgnore, bool smudgeLfs) = 0;
virtual ref<SourceAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0; virtual ref<SourceAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0;

View file

@ -640,7 +640,8 @@ struct GitInputScheme : InputScheme
verifyCommit(input, repo); verifyCommit(input, repo);
bool exportIgnore = getExportIgnoreAttr(input); bool exportIgnore = getExportIgnoreAttr(input);
auto accessor = repo->getAccessor(rev, exportIgnore); bool smudgeLfs = getLfsAttr(input);
auto accessor = repo->getAccessor(rev, exportIgnore, smudgeLfs);
accessor->setPathDisplay("«" + input.to_string() + "»"); accessor->setPathDisplay("«" + input.to_string() + "»");
@ -677,9 +678,7 @@ struct GitInputScheme : InputScheme
} }
if (getLfsAttr(input)) { if (getLfsAttr(input)) {
warn("lfs attr set on %s", input.to_string()); printTalkative("lfs=1 on %s", input.to_string());
// urlencoded `?lfs=1` param is set,
repo->smudgeLfs();
} }
assert(!origRev || origRev == rev); assert(!origRev || origRev == rev);

View file

@ -293,7 +293,7 @@ struct GitArchiveInputScheme : InputScheme
#endif #endif
input.attrs.insert_or_assign("lastModified", uint64_t(tarballInfo.lastModified)); input.attrs.insert_or_assign("lastModified", uint64_t(tarballInfo.lastModified));
auto accessor = getTarballCache()->getAccessor(tarballInfo.treeHash, false); auto accessor = getTarballCache()->getAccessor(tarballInfo.treeHash, false, false);
accessor->setPathDisplay("«" + input.to_string() + "»"); accessor->setPathDisplay("«" + input.to_string() + "»");

View file

@ -117,7 +117,7 @@ DownloadTarballResult downloadTarball(
.treeHash = treeHash, .treeHash = treeHash,
.lastModified = (time_t) getIntAttr(infoAttrs, "lastModified"), .lastModified = (time_t) getIntAttr(infoAttrs, "lastModified"),
.immutableUrl = maybeGetStrAttr(infoAttrs, "immutableUrl"), .immutableUrl = maybeGetStrAttr(infoAttrs, "immutableUrl"),
.accessor = getTarballCache()->getAccessor(treeHash, false), .accessor = getTarballCache()->getAccessor(treeHash, false, false),
}; };
}; };

View file

@ -78,7 +78,7 @@ TEST_F(GitUtilsTest, sink_basic)
// sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello")); // sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello"));
auto result = repo->dereferenceSingletonDirectory(sink->sync()); auto result = repo->dereferenceSingletonDirectory(sink->sync());
auto accessor = repo->getAccessor(result, false); auto accessor = repo->getAccessor(result, false, false);
auto entries = accessor->readDirectory(CanonPath::root); auto entries = accessor->readDirectory(CanonPath::root);
ASSERT_EQ(entries.size(), 5); ASSERT_EQ(entries.size(), 5);
ASSERT_EQ(accessor->readFile(CanonPath("hello")), "hello world"); ASSERT_EQ(accessor->readFile(CanonPath("hello")), "hello world");