mirror of
https://github.com/NixOS/nix
synced 2025-06-24 22:11:15 +02:00
Fix some errors, and add tests for them
This commit is contained in:
parent
b48dacd50c
commit
79d41062d0
3 changed files with 223 additions and 74 deletions
|
@ -117,7 +117,7 @@ struct Fetch {
|
|||
|
||||
void init(git_repository* repo, const std::string& gitattributesContent);
|
||||
bool hasAttribute(const std::string& path, const std::string& attrName, const std::string& attrValue) const;
|
||||
void fetch(const std::string& pointerFileContents, const std::string& pointerFilePath, Sink& sink) const;
|
||||
void fetch(const git_blob* pointerBlob, const std::string& pointerFilePath, Sink& sink) const;
|
||||
std::vector<nlohmann::json> fetchUrls(const std::vector<Md> &metadatas) const;
|
||||
};
|
||||
|
||||
|
@ -185,8 +185,6 @@ void downloadToSink(const std::string &url, const std::string &authHeader, Sink
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
std::string getLfsApiToken(const GitUrl& u) {
|
||||
const auto [maybeUserAndHost, path] = u.toSsh();
|
||||
auto [status, output] = runProgram(RunOptions {
|
||||
|
@ -216,7 +214,6 @@ std::string getLfsEndpointUrl(git_repository *repo) {
|
|||
return "";
|
||||
}
|
||||
|
||||
|
||||
const char *url_c_str = git_remote_url(remote);
|
||||
if (!url_c_str) {
|
||||
return "";
|
||||
|
@ -241,35 +238,60 @@ std::string git_attr_value_to_string(git_attr_value_t value) {
|
|||
}
|
||||
|
||||
|
||||
Md parseLfsMetadata(const std::string &content, const std::string &filename) {
|
||||
std::optional<Md> parseLfsMetadata(const std::string &content, const std::string &filename) {
|
||||
// https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md
|
||||
//
|
||||
// example git-lfs pointer file:
|
||||
// version https://git-lfs.github.com/spec/v1
|
||||
// oid sha256:f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf
|
||||
// size 10000000
|
||||
std::istringstream iss(content);
|
||||
std::string line;
|
||||
std::string oid;
|
||||
size_t size = 0;
|
||||
// (ending \n)
|
||||
|
||||
while (getline(iss, line)) {
|
||||
std::size_t pos = line.find("oid sha256:");
|
||||
if (pos != std::string::npos) {
|
||||
oid = line.substr(pos + 11); // skip "oid sha256:"
|
||||
continue;
|
||||
}
|
||||
|
||||
pos = line.find("size ");
|
||||
if (pos != std::string::npos) {
|
||||
std::string sizeStr =
|
||||
line.substr(pos + 5); // skip "size "
|
||||
size = std::stol(sizeStr);
|
||||
continue;
|
||||
}
|
||||
if (!content.starts_with("version ")) {
|
||||
// Invalid pointer file
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return Md{filename, oid, size};
|
||||
}
|
||||
if (!content.starts_with("version https://git-lfs.github.com/spec/v1")) {
|
||||
// In case there's new spec versions in the future, but for now only v1 exists
|
||||
debug("Invalid version found on potential lfs pointer file, skipping");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::istringstream iss(content);
|
||||
std::string line;
|
||||
|
||||
std::string oid;
|
||||
std::string size;
|
||||
|
||||
while (getline(iss, line)) {
|
||||
if (line.starts_with("version ")) {
|
||||
continue;
|
||||
}
|
||||
if (line.starts_with("oid sha256:")) {
|
||||
oid = line.substr(11); // skip "oid sha256:"
|
||||
continue;
|
||||
}
|
||||
if (line.starts_with("size ")) {
|
||||
size = line.substr(5); // skip "size "
|
||||
continue;
|
||||
}
|
||||
|
||||
debug("Custom extension '%s' found, ignoring", line);
|
||||
}
|
||||
|
||||
if (oid.length() != 64 || !std::all_of(oid.begin(), oid.end(), ::isxdigit)) {
|
||||
debug("Invalid sha256 %s, skipping", oid);
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (size.length() == 0 || !std::all_of(size.begin(), size.end(), ::isdigit)) {
|
||||
debug("Invalid size %s, skipping", size);
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return std::make_optional(Md{filename, oid, std::stoul(size)});
|
||||
}
|
||||
|
||||
|
||||
// there's already a ParseURL here https://github.com/b-camacho/nix/blob/ef6fa54e05cd4134ec41b0d64c1a16db46237f83/src/libutil/url.cc#L13
|
||||
|
@ -386,7 +408,6 @@ void Fetch::init(git_repository* repo, const std::string& gitattributesContent)
|
|||
}
|
||||
|
||||
|
||||
|
||||
bool Fetch::hasAttribute(const std::string& path, const std::string& attrName, const std::string& attrValue) const
|
||||
{
|
||||
for (auto it = rules.rbegin(); it != rules.rend(); ++it) {
|
||||
|
@ -481,10 +502,32 @@ std::vector<nlohmann::json> Fetch::fetchUrls(const std::vector<Md> &metadatas) c
|
|||
}
|
||||
}
|
||||
|
||||
void Fetch::fetch(const std::string& pointerFileContents, const std::string& pointerFilePath, Sink& sink) const {
|
||||
void Fetch::fetch(const git_blob* pointerBlob, const std::string& pointerFilePath, Sink& sink) const {
|
||||
constexpr size_t chunkSize = 128 * 1024; // 128 KiB
|
||||
auto size = git_blob_rawsize(pointerBlob);
|
||||
|
||||
if (size >= 1024) {
|
||||
debug("Skip git-lfs, pointer file too large");
|
||||
warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath);
|
||||
for (size_t offset = 0; offset < size; offset += chunkSize) {
|
||||
sink(std::string((const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset)));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const auto pointerFileContents = std::string((const char *) git_blob_rawcontent(pointerBlob), size);
|
||||
const auto md = parseLfsMetadata(std::string(pointerFileContents), std::string(pointerFilePath));
|
||||
if (md == std::nullopt) {
|
||||
debug("Skip git-lfs, invalid pointer file");
|
||||
warn("Encountered a file that should have been a pointer, but wasn't: %s", pointerFilePath);
|
||||
for (size_t offset = 0; offset < size; offset += chunkSize) {
|
||||
sink(std::string((const char *) git_blob_rawcontent(pointerBlob) + offset, std::min(chunkSize, size - offset)));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Md> vMds;
|
||||
vMds.push_back(md);
|
||||
vMds.push_back(md.value());
|
||||
const auto objUrls = fetchUrls(vMds);
|
||||
|
||||
const auto obj = objUrls[0];
|
||||
|
@ -507,4 +550,3 @@ void Fetch::fetch(const std::string& pointerFileContents, const std::string& poi
|
|||
} // namespace lfs
|
||||
|
||||
} // namespace nix
|
||||
|
||||
|
|
|
@ -676,29 +676,31 @@ struct GitSourceAccessor : SourceAccessor
|
|||
, root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get()))
|
||||
, lfsFetch(lfsFetch)
|
||||
{
|
||||
if (lfsFetch && !lookup(CanonPath(".gitattributes"))) {
|
||||
warn("Requested to fetch lfs files, but no .gitattributes file was found, ignoring");
|
||||
}
|
||||
}
|
||||
|
||||
std::string readBlob(const CanonPath & path, bool symlink)
|
||||
{
|
||||
const auto blob = getBlob(path, symlink);
|
||||
|
||||
const auto data = std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get()));
|
||||
|
||||
if (path != CanonPath(".gitattributes") && lfsFetch) {
|
||||
if (lfsFetch && path != CanonPath(".gitattributes") && lookup(CanonPath(".gitattributes"))) {
|
||||
auto& _lfsFetch = *lfsFetch;
|
||||
if (!_lfsFetch.ready) {
|
||||
const auto contents = readFile(CanonPath(".gitattributes"));
|
||||
_lfsFetch.init(*repo, contents);
|
||||
};
|
||||
}
|
||||
|
||||
auto pathStr = std::string(path.rel());
|
||||
if (_lfsFetch.hasAttribute(pathStr, "filter", "lfs")) {
|
||||
StringSink s;
|
||||
_lfsFetch.fetch(data, pathStr, s);
|
||||
_lfsFetch.fetch(blob.get(), pathStr, s);
|
||||
return s.s;
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
return std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get()));
|
||||
}
|
||||
|
||||
void readFile(
|
||||
|
@ -709,11 +711,7 @@ struct GitSourceAccessor : SourceAccessor
|
|||
auto size = git_blob_rawsize(blob.get());
|
||||
sizeCallback(size);
|
||||
|
||||
// if lfs, this is just a pointer file
|
||||
// if not lfs then it's not big either way
|
||||
auto contents = std::string((const char *) git_blob_rawcontent(blob.get()), size);
|
||||
|
||||
if (lfsFetch && path != CanonPath(".gitattributes")) {
|
||||
if (lfsFetch && path != CanonPath(".gitattributes") && lookup(CanonPath(".gitattributes"))) {
|
||||
auto& _lfsFetch = *lfsFetch;
|
||||
if (!_lfsFetch.ready) {
|
||||
const auto contents = readFile(CanonPath(".gitattributes"));
|
||||
|
@ -722,13 +720,15 @@ struct GitSourceAccessor : SourceAccessor
|
|||
|
||||
auto pathStr = std::string(path.rel());
|
||||
if (_lfsFetch.hasAttribute(pathStr, "filter", "lfs")) {
|
||||
_lfsFetch.fetch(contents, pathStr, sink);
|
||||
_lfsFetch.fetch(blob.get(), pathStr, sink);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// either not using lfs or file should not be smudged
|
||||
sink(contents);
|
||||
constexpr size_t chunkSize = 128 * 1024; // 128 KiB
|
||||
for (size_t offset = 0; offset < size; offset += chunkSize) {
|
||||
sink(std::string((const char *) git_blob_rawcontent(blob.get()) + offset, std::min(chunkSize, size - offset)));
|
||||
}
|
||||
}
|
||||
|
||||
std::string readFile(const CanonPath & path) override
|
||||
|
|
|
@ -1,72 +1,179 @@
|
|||
{
|
||||
# mostly copied from https://github.com/NixOS/nix/blob/358c26fd13a902d9a4032a00e6683571be07a384/tests/nixos/fetch-git/test-cases/fetchTree-shallow/default.nix#L1
|
||||
# ty @DavHau
|
||||
description = "fetchGit smudges LFS pointers iff lfs=true";
|
||||
description = "fetchGit smudges LFS pointers if lfs=true";
|
||||
script = ''
|
||||
expected_max_size_lfs_pointer = 1024 # 1 KiB (values >= than this cannot be pointers, and test files are >= 1 MiB)
|
||||
|
||||
# purge nix git cache to make sure we start with a clean slate
|
||||
client.succeed("rm -rf ~/.cache/nix")
|
||||
|
||||
# add an lfs-enrolled file to the repo:
|
||||
client.succeed(f"dd if=/dev/urandom of={repo.path}/beeg bs=1M count=1")
|
||||
client.succeed(f"{repo.git} lfs install")
|
||||
client.succeed(f"{repo.git} lfs track --filename \"beeg\"")
|
||||
client.succeed(f"{repo.git} add .gitattributes")
|
||||
client.succeed(f"{repo.git} add beeg")
|
||||
client.succeed(f"{repo.git} commit -m 'commit1'")
|
||||
client.succeed(f"{repo.git} push origin main")
|
||||
|
||||
# memoize the revision
|
||||
commit1_rev = client.succeed(f"""
|
||||
{repo.git} rev-parse HEAD
|
||||
""").strip()
|
||||
# Request lfs fetch without any .gitattributes file
|
||||
############################################################################
|
||||
|
||||
client.succeed(f"dd if=/dev/urandom of={repo.path}/regular bs=1M count=1 >&2")
|
||||
client.succeed(f"{repo.git} add : >&2")
|
||||
client.succeed(f"{repo.git} commit -m 'no .gitattributes' >&2")
|
||||
client.succeed(f"{repo.git} push origin main >&2")
|
||||
|
||||
# memorize the revision
|
||||
no_gitattributes_rev = client.succeed(f"{repo.git} rev-parse HEAD").strip()
|
||||
|
||||
# fetch with lfs=true, and check that the lack of .gitattributes does not break anything
|
||||
fetchGit_no_gitattributes_expr = f"""
|
||||
builtins.fetchGit {{
|
||||
url = "{repo.remote}";
|
||||
rev = "{no_gitattributes_rev}";
|
||||
ref = "main";
|
||||
lfs = true;
|
||||
}}
|
||||
"""
|
||||
fetched_no_gitattributes = client.succeed(f"""
|
||||
nix eval --impure --raw --expr '({fetchGit_no_gitattributes_expr}).outPath'
|
||||
""")
|
||||
client.succeed(f"cmp {repo.path}/regular {fetched_no_gitattributes}/regular >&2")
|
||||
|
||||
|
||||
# Add a file that should be tracked by lfs, but isn't
|
||||
# (git lfs cli only throws a warning "Encountered 1 file that should have
|
||||
# been a pointer, but wasn't")
|
||||
############################################################################
|
||||
|
||||
client.succeed(f"dd if=/dev/urandom of={repo.path}/black_sheep bs=1M count=1 >&2")
|
||||
client.succeed(f"echo 'black_sheep filter=lfs -text' >>{repo.path}/.gitattributes")
|
||||
client.succeed(f"{repo.git} add : >&2")
|
||||
client.succeed(f"{repo.git} commit -m 'add misleading file' >&2")
|
||||
client.succeed(f"{repo.git} push origin main >&2")
|
||||
|
||||
# memorize the revision
|
||||
bad_lfs_rev = client.succeed(f"{repo.git} rev-parse HEAD").strip()
|
||||
|
||||
# prove that it can be cloned with regular git first
|
||||
# (here we see the warning as stated above)
|
||||
from tempfile import TemporaryDirectory
|
||||
with TemporaryDirectory() as tempdir:
|
||||
client.succeed(f"git clone -n {repo.remote} {tempdir} >&2")
|
||||
client.succeed(f"git -C {tempdir} lfs install >&2")
|
||||
client.succeed(f"git -C {tempdir} checkout {bad_lfs_rev} >&2")
|
||||
|
||||
# check that the file is not a pointer, as expected
|
||||
file_size_git = client.succeed(f"stat -c %s {tempdir}/black_sheep").strip()
|
||||
assert int(file_size_git) >= expected_max_size_lfs_pointer, \
|
||||
f"non lfs file is {file_size_git}b (<{expected_max_size_lfs_pointer}b), probably a test implementation error"
|
||||
|
||||
lfs_files = client.succeed(f"git -C {tempdir} lfs ls-files").strip()
|
||||
assert lfs_files == "", "non lfs file is tracked by lfs, probably a test implementation error"
|
||||
|
||||
client.succeed(f"cmp {repo.path}/black_sheep {tempdir}/black_sheep >&2")
|
||||
|
||||
# now fetch without lfs, check that the file is not a pointer
|
||||
fetchGit_bad_lfs_without_lfs_expr = f"""
|
||||
builtins.fetchGit {{
|
||||
url = "{repo.remote}";
|
||||
rev = "{bad_lfs_rev}";
|
||||
ref = "main";
|
||||
lfs = false;
|
||||
}}
|
||||
"""
|
||||
fetched_bad_lfs_without_lfs = client.succeed(f"""
|
||||
nix eval --impure --raw --expr '({fetchGit_bad_lfs_without_lfs_expr}).outPath'
|
||||
""")
|
||||
|
||||
# check that file was not somehow turned into a pointer
|
||||
file_size_bad_lfs_without_lfs = client.succeed(f"stat -c %s {fetched_bad_lfs_without_lfs}/black_sheep").strip()
|
||||
|
||||
assert int(file_size_bad_lfs_without_lfs) >= expected_max_size_lfs_pointer, \
|
||||
f"non lfs-enrolled file is {file_size_bad_lfs_without_lfs}b (<{expected_max_size_lfs_pointer}b), probably a test implementation error"
|
||||
client.succeed(f"cmp {repo.path}/black_sheep {fetched_bad_lfs_without_lfs}/black_sheep >&2")
|
||||
|
||||
# finally fetch with lfs=true, and check that the bad file does not break anything
|
||||
fetchGit_bad_lfs_with_lfs_expr = f"""
|
||||
builtins.fetchGit {{
|
||||
url = "{repo.remote}";
|
||||
rev = "{bad_lfs_rev}";
|
||||
ref = "main";
|
||||
lfs = true;
|
||||
}}
|
||||
"""
|
||||
fetchGit_bad_lfs_with_lfs = client.succeed(f"""
|
||||
nix eval --impure --raw --expr '({fetchGit_bad_lfs_with_lfs_expr}).outPath'
|
||||
""")
|
||||
|
||||
client.succeed(f"cmp {repo.path}/black_sheep {fetchGit_bad_lfs_with_lfs}/black_sheep >&2")
|
||||
|
||||
|
||||
# Add an lfs-enrolled file to the repo
|
||||
############################################################################
|
||||
|
||||
client.succeed(f"dd if=/dev/urandom of={repo.path}/beeg bs=1M count=1 >&2")
|
||||
client.succeed(f"{repo.git} lfs install >&2")
|
||||
client.succeed(f"{repo.git} lfs track --filename beeg >&2")
|
||||
client.succeed(f"{repo.git} add : >&2")
|
||||
client.succeed(f"{repo.git} commit -m 'add lfs file' >&2")
|
||||
client.succeed(f"{repo.git} push origin main >&2")
|
||||
|
||||
# memorize the revision
|
||||
lfs_file_rev = client.succeed(f"{repo.git} rev-parse HEAD").strip()
|
||||
|
||||
# first fetch without lfs, check that we did not smudge the file
|
||||
fetchGit_nolfs_expr = f"""
|
||||
builtins.fetchGit {{
|
||||
url = "{repo.remote}";
|
||||
rev = "{commit1_rev}";
|
||||
rev = "{lfs_file_rev}";
|
||||
ref = "main";
|
||||
lfs = false;
|
||||
}}
|
||||
"""
|
||||
|
||||
# fetch the repo via nix
|
||||
fetched_nolfs = client.succeed(f"""
|
||||
nix eval --impure --raw --expr '({fetchGit_nolfs_expr}).outPath'
|
||||
""")
|
||||
|
||||
# check that file was not smudged
|
||||
file_size_nolfs = client.succeed(f"""
|
||||
stat -c %s {fetched_nolfs}/beeg
|
||||
""").strip()
|
||||
file_size_nolfs = client.succeed(f"stat -c %s {fetched_nolfs}/beeg").strip()
|
||||
|
||||
expected_max_size_lfs = 1024
|
||||
assert int(file_size_nolfs) < expected_max_size_lfs, f"did not set lfs=true, yet lfs-enrolled file is {file_size_nolfs}b (>{expected_max_size_lfs}b), probably smudged when we should not have"
|
||||
assert int(file_size_nolfs) < expected_max_size_lfs_pointer, \
|
||||
f"did not set lfs=true, yet lfs-enrolled file is {file_size_nolfs}b (>{expected_max_size_lfs_pointer}b), probably smudged when we should not have"
|
||||
|
||||
# now fetch with lfs=true and check that the file was smudged
|
||||
fetchGit_lfs_expr = f"""
|
||||
builtins.fetchGit {{
|
||||
url = "{repo.remote}";
|
||||
rev = "{commit1_rev}";
|
||||
rev = "{lfs_file_rev}";
|
||||
ref = "main";
|
||||
lfs = true;
|
||||
}}
|
||||
"""
|
||||
|
||||
|
||||
# fetch the repo via nix
|
||||
fetched_lfs = client.succeed(f"""
|
||||
nix eval --impure --raw --expr '({fetchGit_lfs_expr}).outPath'
|
||||
""")
|
||||
|
||||
assert fetched_lfs != fetched_nolfs, f"fetching with and without lfs yielded the same store path {fetched_lfs}, fingerprinting error?"
|
||||
assert fetched_lfs != fetched_nolfs, \
|
||||
f"fetching with and without lfs yielded the same store path {fetched_lfs}, fingerprinting error?"
|
||||
|
||||
# check that file was smudged
|
||||
file_size_lfs = client.succeed(f"""
|
||||
stat -c %s {fetched_lfs}/beeg
|
||||
""").strip()
|
||||
file_size_lfs = client.succeed(f"stat -c %s {fetched_lfs}/beeg").strip()
|
||||
assert int(file_size_lfs) >= expected_max_size_lfs_pointer, \
|
||||
f"set lfs=true, yet lfs-enrolled file is {file_size_lfs}b (<{expected_max_size_lfs_pointer}), probably did not smudge when we should have"
|
||||
|
||||
expected_min_size_lfs = 1024 * 1024 # 1MB
|
||||
assert int(file_size_lfs) >= expected_min_size_lfs, f"set lfs=true, yet lfs-enrolled file is {file_size_lfs}b (<{expected_min_size_lfs}), probably did not smudge when we should have"
|
||||
|
||||
# Check that default is lfs=false
|
||||
############################################################################
|
||||
fetchGit_default_expr = f"""
|
||||
builtins.fetchGit {{
|
||||
url = "{repo.remote}";
|
||||
rev = "{lfs_file_rev}";
|
||||
ref = "main";
|
||||
}}
|
||||
"""
|
||||
fetched_default = client.succeed(f"""
|
||||
nix eval --impure --raw --expr '({fetchGit_default_expr}).outPath'
|
||||
""")
|
||||
|
||||
# check that file was not smudged
|
||||
file_size_default = client.succeed(f"stat -c %s {fetched_default}/beeg").strip()
|
||||
|
||||
assert int(file_size_default) < expected_max_size_lfs_pointer, \
|
||||
f"did not set lfs, yet lfs-enrolled file is {file_size_default}b (>{expected_max_size_lfs_pointer}b), probably bad default value"
|
||||
'';
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue