1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-07-06 05:01:48 +02:00

Add Git object hashing to the store layer

Part of RFC 133

Extracted from our old IPFS branches.

Co-Authored-By: Matthew Bauer <mjbauer95@gmail.com>
Co-Authored-By: Carlo Nucera <carlo.nucera@protonmail.com>
Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
Co-authored-by: Florian Klink <flokli@flokli.de>
This commit is contained in:
John Ericson 2023-09-04 09:51:23 -04:00
parent 04836c73e5
commit 201551c937
27 changed files with 484 additions and 86 deletions

View file

@ -1,16 +1,53 @@
#include "file-content-address.hh"
#include "archive.hh"
#include "git.hh"
namespace nix {
FileIngestionMethod parseFileIngestionMethod(std::string_view input)
static std::optional<FileSerialisationMethod> parseFileSerialisationMethodOpt(std::string_view input)
{
if (input == "flat") {
return FileIngestionMethod::Flat;
return FileSerialisationMethod::Flat;
} else if (input == "nar") {
return FileIngestionMethod::Recursive;
return FileSerialisationMethod::Recursive;
} else {
throw UsageError("Unknown file ingestion method '%s', expect `flat` or `nar`");
return std::nullopt;
}
}
FileSerialisationMethod parseFileSerialisationMethod(std::string_view input)
{
auto ret = parseFileSerialisationMethodOpt(input);
if (ret)
return *ret;
else
throw UsageError("Unknown file serialiation method '%s', expect `flat` or `nar`");
}
FileIngestionMethod parseFileIngestionMethod(std::string_view input)
{
if (input == "git") {
return FileIngestionMethod::Git;
} else {
auto ret = parseFileSerialisationMethodOpt(input);
if (ret)
return static_cast<FileIngestionMethod>(*ret);
else
throw UsageError("Unknown file ingestion method '%s', expect `flat`, `nar`, or `git`");
}
}
std::string_view renderFileSerialisationMethod(FileSerialisationMethod method)
{
switch (method) {
case FileSerialisationMethod::Flat:
return "flat";
case FileSerialisationMethod::Recursive:
return "nar";
default:
assert(false);
}
}
@ -19,9 +56,11 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method)
{
switch (method) {
case FileIngestionMethod::Flat:
return "flat";
case FileIngestionMethod::Recursive:
return "nar";
return renderFileSerialisationMethod(
static_cast<FileSerialisationMethod>(method));
case FileIngestionMethod::Git:
return "git";
default:
abort();
}
@ -31,14 +70,14 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method)
void dumpPath(
SourceAccessor & accessor, const CanonPath & path,
Sink & sink,
FileIngestionMethod method,
FileSerialisationMethod method,
PathFilter & filter)
{
switch (method) {
case FileIngestionMethod::Flat:
case FileSerialisationMethod::Flat:
accessor.readFile(path, sink);
break;
case FileIngestionMethod::Recursive:
case FileSerialisationMethod::Recursive:
accessor.dumpPath(path, sink, filter);
break;
}
@ -48,13 +87,13 @@ void dumpPath(
void restorePath(
const Path & path,
Source & source,
FileIngestionMethod method)
FileSerialisationMethod method)
{
switch (method) {
case FileIngestionMethod::Flat:
case FileSerialisationMethod::Flat:
writeFile(path, source);
break;
case FileIngestionMethod::Recursive:
case FileSerialisationMethod::Recursive:
restorePath(path, source);
break;
}
@ -63,7 +102,7 @@ void restorePath(
HashResult hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ha,
FileSerialisationMethod method, HashAlgorithm ha,
PathFilter & filter)
{
HashSink sink { ha };
@ -71,4 +110,20 @@ HashResult hashPath(
return sink.finish();
}
Hash hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ht,
PathFilter & filter)
{
switch (method) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
return hashPath(accessor, path, (FileSerialisationMethod) method, ht, filter).first;
case FileIngestionMethod::Git:
return git::dumpHash(ht, accessor, path, filter).hash;
}
}
}

View file

@ -8,37 +8,38 @@
namespace nix {
/**
* An enumeration of the main ways we can serialize file system
* An enumeration of the ways we can serialize file system
* objects.
*/
enum struct FileIngestionMethod : uint8_t {
enum struct FileSerialisationMethod : uint8_t {
/**
* Flat-file hashing. Directly ingest the contents of a single file
* Flat-file. The contents of a single file exactly.
*/
Flat = 0,
Flat,
/**
* Recursive (or NAR) hashing. Serializes the file-system object in
* Nix Archive format and ingest that.
* Nix Archive. Serializes the file-system object in
* Nix Archive format.
*/
Recursive = 1,
Recursive,
};
/**
* Parse a `FileIngestionMethod` by name. Choice of:
* Parse a `FileSerialisationMethod` by name. Choice of:
*
* - `flat`: `FileIngestionMethod::Flat`
* - `nar`: `FileIngestionMethod::Recursive`
* - `flat`: `FileSerialisationMethod::Flat`
* - `nar`: `FileSerialisationMethod::Recursive`
*
* Oppostite of `renderFileIngestionMethod`.
* Opposite of `renderFileSerialisationMethod`.
*/
FileIngestionMethod parseFileIngestionMethod(std::string_view input);
FileSerialisationMethod parseFileSerialisationMethod(std::string_view input);
/**
* Render a `FileIngestionMethod` by name.
* Render a `FileSerialisationMethod` by name.
*
* Oppostite of `parseFileIngestionMethod`.
* Opposite of `parseFileSerialisationMethod`.
*/
std::string_view renderFileIngestionMethod(FileIngestionMethod method);
std::string_view renderFileSerialisationMethod(FileSerialisationMethod method);
/**
* Dump a serialization of the given file system object.
@ -46,26 +47,97 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method);
void dumpPath(
SourceAccessor & accessor, const CanonPath & path,
Sink & sink,
FileIngestionMethod method,
FileSerialisationMethod method,
PathFilter & filter = defaultPathFilter);
/**
* Restore a serialization of the given file system object.
* Restore a serialisation of the given file system object.
*
* @TODO use an arbitrary `FileSystemObjectSink`.
*/
void restorePath(
const Path & path,
Source & source,
FileIngestionMethod method);
FileSerialisationMethod method);
/**
* Compute the hash of the given file system object according to the
* given method.
*
* The hash is defined as (essentially) hashString(ha, dumpPath(path)).
* the hash is defined as (in pseudocode):
*
* ```
* hashString(ha, dumpPath(...))
* ```
*/
HashResult hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileSerialisationMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter);
/**
* An enumeration of the ways we can ingest file system
* objects, producing a hash or digest.
*/
enum struct FileIngestionMethod : uint8_t {
/**
* Hash `FileSerialisationMethod::Flat` serialisation.
*/
Flat,
/**
* Hash `FileSerialisationMethod::Git` serialisation.
*/
Recursive,
/**
* Git hashing. In particular files are hashed as git "blobs", and
* directories are hashed as git "trees".
*
* Unlike `Flat` and `Recursive`, this is not a hash of a single
* serialisation but a [Merkle
* DAG](https://en.wikipedia.org/wiki/Merkle_tree) of multiple
* rounds of serialisation and hashing.
*
* @note Git's data model is slightly different, in that a plain
* file doesn't have an executable bit, directory entries do
* instead. We decide treat a bare file as non-executable by fiat,
* as we do with `FileIngestionMethod::Flat` which also lacks this
* information. Thus, Git can encode some but all of Nix's "File
* System Objects", and this sort of hashing is likewise partial.
*/
Git,
};
/**
* Parse a `FileIngestionMethod` by name. Choice of:
*
* - `flat`: `FileIngestionMethod::Flat`
* - `nar`: `FileIngestionMethod::Recursive`
* - `git`: `FileIngestionMethod::Git`
*
* Opposite of `renderFileIngestionMethod`.
*/
FileIngestionMethod parseFileIngestionMethod(std::string_view input);
/**
* Render a `FileIngestionMethod` by name.
*
* Opposite of `parseFileIngestionMethod`.
*/
std::string_view renderFileIngestionMethod(FileIngestionMethod method);
/**
* Compute the hash of the given file system object according to the
* given method.
*
* Unlike the other `hashPath`, this works on an arbitrary
* `FileIngestionMethod` instead of `FileSerialisationMethod`, but
* doesn't return the size as this is this is not a both simple and
* useful defined for a merkle format.
*/
Hash hashPath(
SourceAccessor & accessor, const CanonPath & path,
FileIngestionMethod method, HashAlgorithm ha,
PathFilter & filter = defaultPathFilter);