1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-07-07 01:51:47 +02:00

Fix some errors, and add tests for them

This commit is contained in:
Leandro Reina 2024-11-20 17:53:06 +01:00
parent b48dacd50c
commit 79d41062d0
3 changed files with 223 additions and 74 deletions

View file

@ -117,7 +117,7 @@ struct Fetch {
void init(git_repository* repo, const std::string& gitattributesContent);
bool hasAttribute(const std::string& path, const std::string& attrName, const std::string& attrValue) const;
void fetch(const std::string& pointerFileContents, const std::string& pointerFilePath, Sink& sink) const;
void fetch(const git_blob* pointerBlob, const std::string& pointerFilePath, Sink& sink) const;
std::vector<nlohmann::json> fetchUrls(const std::vector<Md> &metadatas) const;
};
@ -185,8 +185,6 @@ void downloadToSink(const std::string &url, const std::string &authHeader, Sink
}
std::string getLfsApiToken(const GitUrl& u) {
const auto [maybeUserAndHost, path] = u.toSsh();
auto [status, output] = runProgram(RunOptions {
@ -216,7 +214,6 @@ std::string getLfsEndpointUrl(git_repository *repo) {
return "";
}
const char *url_c_str = git_remote_url(remote);
if (!url_c_str) {
return "";
@ -241,35 +238,60 @@ std::string git_attr_value_to_string(git_attr_value_t value) {
}
Md parseLfsMetadata(const std::string &content, const std::string &filename) {
std::optional<Md> parseLfsMetadata(const std::string &content, const std::string &filename) {
// https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md
//
// example git-lfs pointer file:
// version https://git-lfs.github.com/spec/v1
// oid sha256:f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf
// size 10000000
std::istringstream iss(content);
std::string line;
std::string oid;
size_t size = 0;
// (ending \n)
while (getline(iss, line)) {
std::size_t pos = line.find("oid sha256:");
if (pos != std::string::npos) {
oid = line.substr(pos + 11); // skip "oid sha256:"
continue;
}
pos = line.find("size ");
if (pos != std::string::npos) {
std::string sizeStr =
line.substr(pos + 5); // skip "size "
size = std::stol(sizeStr);
continue;
}
if (!content.starts_with("version ")) {
// Invalid pointer file
return std::nullopt;
}
return Md{filename, oid, size};
}
if (!content.starts_with("version https://git-lfs.github.com/spec/v1")) {
// In case there's new spec versions in the future, but for now only v1 exists
debug("Invalid version found on potential lfs pointer file, skipping");
return std::nullopt;
}
std::istringstream iss(content);
std::string line;
std::string oid;
std::string size;
while (getline(iss, line)) {
if (line.starts_with("version ")) {
continue;
}
if (line.starts_with("oid sha256:")) {
oid = line.substr(11); // skip "oid sha256:"
continue;
}
if (line.starts_with("size ")) {
size = line.substr(5); // skip "size "
continue;
}
debug("Custom extension '%s' found, ignoring", line);
}
if (oid.length() != 64 || !std::all_of(oid.begin(), oid.end(), ::isxdigit)) {
debug("Invalid sha256 %s, skipping", oid);
return std::nullopt;
}
if (size.length() == 0 || !std::all_of(size.begin(), size.end(), ::isdigit)) {
debug("Invalid size %s, skipping", size);
return std::nullopt;
}
return std::make_optional(Md{filename, oid, std::stoul(size)});
}
// there's already a ParseURL here https://github.com/b-camacho/nix/blob/ef6fa54e05cd4134ec41b0d64c1a16db46237f83/src/libutil/url.cc#L13
@ -386,7 +408,6 @@ void Fetch::init(git_repository* repo, const std::string& gitattributesContent)
}
bool Fetch::hasAttribute(const std::string& path, const std::string& attrName, const std::string& attrValue) const
{
for (auto it = rules.rbegin(); it != rules.rend(); ++it) {
@ -481,10 +502,32 @@ std::vector<nlohmann::json> Fetch::fetchUrls(const std::vector<Md> &metadatas) c
}
}
void Fetch::fetch(const std::string& pointerFileContents, const std::string& pointerFilePath, Sink& sink) const {
void Fetch::fetch(const git_blob* pointerBlob, const std::string& pointerFilePath, Sink& sink) const {
constexpr size_t chunkSize = 128 * 1024; // 128 KiB
auto size = git_blob_rawsize(pointerBlob);
if (size >= 1024) {
debug("Skip git-lfs, pointer file too large");
warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath);
for (size_t offset = 0; offset < size; offset += chunkSize) {
sink(std::string((const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset)));
}
return;
}
const auto pointerFileContents = std::string((const char *) git_blob_rawcontent(pointerBlob), size);
const auto md = parseLfsMetadata(std::string(pointerFileContents), std::string(pointerFilePath));
if (md == std::nullopt) {
debug("Skip git-lfs, invalid pointer file");
warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath);
for (size_t offset = 0; offset < size; offset += chunkSize) {
sink(std::string((const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset)));
}
return;
}
std::vector<Md> vMds;
vMds.push_back(md);
vMds.push_back(md.value());
const auto objUrls = fetchUrls(vMds);
const auto obj = objUrls[0];
@ -507,4 +550,3 @@ void Fetch::fetch(const std::string& pointerFileContents, const std::string& poi
} // namespace lfs
} // namespace nix

View file

@ -676,29 +676,31 @@ struct GitSourceAccessor : SourceAccessor
, root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get()))
, lfsFetch(lfsFetch)
{
if (lfsFetch && !lookup(CanonPath(".gitattributes"))) {
warn("Requested to fetch lfs files, but no .gitattributes file was found, ignoring");
}
}
std::string readBlob(const CanonPath & path, bool symlink)
{
const auto blob = getBlob(path, symlink);
const auto data = std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get()));
if (path != CanonPath(".gitattributes") && lfsFetch) {
if (lfsFetch && path != CanonPath(".gitattributes") && lookup(CanonPath(".gitattributes"))) {
auto& _lfsFetch = *lfsFetch;
if (!_lfsFetch.ready) {
const auto contents = readFile(CanonPath(".gitattributes"));
_lfsFetch.init(*repo, contents);
};
}
auto pathStr = std::string(path.rel());
if (_lfsFetch.hasAttribute(pathStr, "filter", "lfs")) {
StringSink s;
_lfsFetch.fetch(data, pathStr, s);
_lfsFetch.fetch(blob.get(), pathStr, s);
return s.s;
}
}
return data;
return std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get()));
}
void readFile(
@ -709,11 +711,7 @@ struct GitSourceAccessor : SourceAccessor
auto size = git_blob_rawsize(blob.get());
sizeCallback(size);
// if lfs, this is just a pointer file
// if not lfs then it's not big either way
auto contents = std::string((const char *) git_blob_rawcontent(blob.get()), size);
if (lfsFetch && path != CanonPath(".gitattributes")) {
if (lfsFetch && path != CanonPath(".gitattributes") && lookup(CanonPath(".gitattributes"))) {
auto& _lfsFetch = *lfsFetch;
if (!_lfsFetch.ready) {
const auto contents = readFile(CanonPath(".gitattributes"));
@ -722,13 +720,15 @@ struct GitSourceAccessor : SourceAccessor
auto pathStr = std::string(path.rel());
if (_lfsFetch.hasAttribute(pathStr, "filter", "lfs")) {
_lfsFetch.fetch(contents, pathStr, sink);
_lfsFetch.fetch(blob.get(), pathStr, sink);
return;
}
}
// either not using lfs or file should not be smudged
sink(contents);
constexpr size_t chunkSize = 128 * 1024; // 128 KiB
for (size_t offset = 0; offset < size; offset += chunkSize) {
sink(std::string((const char *) git_blob_rawcontent(blob.get()) + offset, std::min(chunkSize, size - offset)));
}
}
std::string readFile(const CanonPath & path) override