diff --git a/doc/manual/rl-next/store-object-info.md b/doc/manual/rl-next/store-object-info.md new file mode 100644 index 000000000..ab8f5fec0 --- /dev/null +++ b/doc/manual/rl-next/store-object-info.md @@ -0,0 +1,11 @@ +--- +synopsis: Store object info JSON format now uses `null` rather than omitting fields. +prs: 9995 +--- + +The [store object info JSON format](@docroot@/protocols/json/store-object-info.md), used for e.g. `nix path-info`, no longer omits fields to indicate absent information, but instead includes the fields with a `null` value. +For example, `"ca": null` is used to to indicate a store object that isn't content-addressed rather than omitting the `ca` field entirely. +This makes records of this sort more self-describing, and easier to consume programmatically. + +We will follow this design principle going forward; +the [JSON guidelines](@docroot@/contributing/json-guideline.md) in the contributing section have been updated accordingly. diff --git a/doc/manual/src/SUMMARY.md.in b/doc/manual/src/SUMMARY.md.in index 2516cc3db..18e7e8380 100644 --- a/doc/manual/src/SUMMARY.md.in +++ b/doc/manual/src/SUMMARY.md.in @@ -121,6 +121,7 @@ - [Documentation](contributing/documentation.md) - [Experimental Features](contributing/experimental-features.md) - [CLI guideline](contributing/cli-guideline.md) + - [JSON guideline](contributing/json-guideline.md) - [C++ style guide](contributing/cxx.md) - [Releases](release-notes/index.md) {{#include ./SUMMARY-rl-next.md}} diff --git a/doc/manual/src/contributing/cli-guideline.md b/doc/manual/src/contributing/cli-guideline.md index e90d6de8d..23df844ec 100644 --- a/doc/manual/src/contributing/cli-guideline.md +++ b/doc/manual/src/contributing/cli-guideline.md @@ -389,88 +389,6 @@ colors, no emojis and using ASCII instead of Unicode symbols). The same should happen when TTY is not detected on STDERR. We should not display progress / status section, but only print warnings and errors. -## Returning future proof JSON - -The schema of JSON output should allow for backwards compatible extension. This section explains how to achieve this. - -Two definitions are helpful here, because while JSON only defines one "key-value" -object type, we use it to cover two use cases: - - - **dictionary**: a map from names to value that all have the same type. In - C++ this would be a `std::map` with string keys. - - **record**: a fixed set of attributes each with their own type. In C++, this - would be represented by a `struct`. - -It is best not to mix these use cases, as that may lead to incompatibilities when the schema changes. For example, adding a record field to a dictionary breaks consumers that assume all JSON object fields to have the same meaning and type. - -This leads to the following guidelines: - - - The top-level (root) value must be a record. - - Otherwise, one can not change the structure of a command's output. - - - The value of a dictionary item must be a record. - - Otherwise, the item type can not be extended. - - - List items should be records. - - Otherwise, one can not change the structure of the list items. - - If the order of the items does not matter, and each item has a unique key that is a string, consider representing the list as a dictionary instead. If the order of the items needs to be preserved, return a list of records. - - - Streaming JSON should return records. - - An example of a streaming JSON format is [JSON lines](https://jsonlines.org/), where each line represents a JSON value. These JSON values can be considered top-level values or list items, and they must be records. - -### Examples - - -This is bad, because all keys must be assumed to be store types: - -```json -{ - "local": { ... }, - "remote": { ... }, - "http": { ... } -} -``` - -This is good, because the it is extensible at the root, and is somewhat self-documenting: - -```json -{ - "storeTypes": { "local": { ... }, ... }, - "pluginSupport": true -} -``` - -While the dictionary of store types seems like a very complete response at first, a use case may arise that warrants returning additional information. -For example, the presence of plugin support may be crucial information for a client to proceed when their desired store type is missing. - - - -The following representation is bad because it is not extensible: - -```json -{ "outputs": [ "out" "bin" ] } -``` - -However, simply converting everything to records is not enough, because the order of outputs must be preserved: - -```json -{ "outputs": { "bin": {}, "out": {} } } -``` - -The first item is the default output. Deriving this information from the outputs ordering is not great, but this is how Nix currently happens to work. -While it is possible for a JSON parser to preserve the order of fields, we can not rely on this capability to be present in all JSON libraries. - -This representation is extensible and preserves the ordering: - -```json -{ "outputs": [ { "outputName": "out" }, { "outputName": "bin" } ] } -``` - ## Dialog with the user CLIs don't always make it clear when an action has taken place. For every diff --git a/doc/manual/src/contributing/json-guideline.md b/doc/manual/src/contributing/json-guideline.md new file mode 100644 index 000000000..b4bc92af9 --- /dev/null +++ b/doc/manual/src/contributing/json-guideline.md @@ -0,0 +1,128 @@ +# JSON guideline + +Nix consumes and produces JSON in a variety of contexts. +These guidelines ensure consistent practices for all our JSON interfaces, for ease of use, and so that experience in one part carries over to another. + +## Extensibility + +The schema of JSON input and output should allow for backwards compatible extension. +This section explains how to achieve this. + +Two definitions are helpful here, because while JSON only defines one "key-value" object type, we use it to cover two use cases: + + - **dictionary**: a map from names to value that all have the same type. + In C++ this would be a `std::map` with string keys. + + - **record**: a fixed set of attributes each with their own type. + In C++, this would be represented by a `struct`. + +It is best not to mix these use cases, as that may lead to incompatibilities when the schema changes. +For example, adding a record field to a dictionary breaks consumers that assume all JSON object fields to have the same meaning and type, and dictionary items with a colliding name can not be represented anymore. + +This leads to the following guidelines: + + - The top-level (root) value must be a record. + + Otherwise, one can not change the structure of a command's output. + + - The value of a dictionary item must be a record. + + Otherwise, the item type can not be extended. + + - List items should be records. + + Otherwise, one can not change the structure of the list items. + + If the order of the items does not matter, and each item has a unique key that is a string, consider representing the list as a dictionary instead. + If the order of the items needs to be preserved, return a list of records. + + - Streaming JSON should return records. + + An example of a streaming JSON format is [JSON lines](https://jsonlines.org/), where each line represents a JSON value. + These JSON values can be considered top-level values or list items, and they must be records. + +### Examples + +This is bad, because all keys must be assumed to be store types: + +```json +{ + "local": { ... }, + "remote": { ... }, + "http": { ... } +} +``` + +This is good, because the it is extensible at the root, and is somewhat self-documenting: + +```json +{ + "storeTypes": { "local": { ... }, ... }, + "pluginSupport": true +} +``` + +While the dictionary of store types seems like a very complete response at first, a use case may arise that warrants returning additional information. +For example, the presence of plugin support may be crucial information for a client to proceed when their desired store type is missing. + + + +The following representation is bad because it is not extensible: + +```json +{ "outputs": [ "out" "bin" ] } +``` + +However, simply converting everything to records is not enough, because the order of outputs must be preserved: + +```json +{ "outputs": { "bin": {}, "out": {} } } +``` + +The first item is the default output. Deriving this information from the outputs ordering is not great, but this is how Nix currently happens to work. +While it is possible for a JSON parser to preserve the order of fields, we can not rely on this capability to be present in all JSON libraries. + +This representation is extensible and preserves the ordering: + +```json +{ "outputs": [ { "outputName": "out" }, { "outputName": "bin" } ] } +``` + +## Self-describing values + +As described in the previous section, it's crucial that schemas can be extended with with new fields without breaking compatibility. +However, that should *not* mean we use the presence/absence of fields to indicate optional information *within* a version of the schema. +Instead, always include the field, and use `null` to indicate the "nothing" case. + +### Examples + +Here are two JSON objects: + +```json +{ + "foo": {} +} +``` +```json +{ + "foo": {}, + "bar": {} +} +``` + +Since they differ in which fields they contain, they should *not* both be valid values of the same schema. +At most, they can match two different schemas where the second (with `foo` and `bar`) is considered a newer version of the first (with just `foo`). +Within each version, all fields are mandatory (always `foo`, and always `foo` and `bar`). +Only *between* each version, `bar` gets added as a new mandatory field. + +Here are another two JSON objects: + +```json +{ "foo": null } +``` +```json +{ "foo": { "bar": 1 } } +``` + +Since they both contain a `foo` field, they could be valid values of the same schema. +The schema would have `foo` has an optional field, which is either `null` or an object where `bar` is an integer. diff --git a/doc/manual/src/glossary.md b/doc/manual/src/glossary.md index 080b25d30..55ad9e1c2 100644 --- a/doc/manual/src/glossary.md +++ b/doc/manual/src/glossary.md @@ -12,7 +12,7 @@ For how Nix uses content addresses, see: - [Content-Addressing File System Objects](@docroot@/store/file-system-object/content-address.md) - - [content-addressed store object](#gloss-content-addressed-store-object) + - [Content-Addressing Store Objects](@docroot@/store/store-object/content-address.md) - [content-addressed derivation](#gloss-content-addressed-derivation) Software Heritage's writing on [*Intrinsic and Extrinsic identifiers*](https://www.softwareheritage.org/2020/07/09/intrinsic-vs-extrinsic-identifiers) is also a good introduction to the value of content-addressing over other referencing schemes. @@ -137,9 +137,12 @@ - [content-addressed store object]{#gloss-content-addressed-store-object} - A [store object] whose [store path] is determined by its contents. + A [store object] which is [content-addressed](#gloss-content-address), + i.e. whose [store path] is determined by its contents. This includes derivations, the outputs of [content-addressed derivations](#gloss-content-addressed-derivation), and the outputs of [fixed-output derivations](#gloss-fixed-output-derivation). + See [Content-Addressing Store Objects](@docroot@/store/store-object/content-address.md) for details. + - [substitute]{#gloss-substitute} A substitute is a command invocation stored in the [Nix database] that diff --git a/doc/manual/src/protocols/json/store-object-info.md b/doc/manual/src/protocols/json/store-object-info.md index 22a14715f..9f647a96c 100644 --- a/doc/manual/src/protocols/json/store-object-info.md +++ b/doc/manual/src/protocols/json/store-object-info.md @@ -24,9 +24,11 @@ Info about a [store object]. An array of [store paths][store path], possibly including this one. -* `ca` (optional): +* `ca`: - Content address of this store object's file system object, used to compute its store path. + If the store object is [content-addressed], + this is the content address of this store object's file system object, used to compute its store path. + Otherwise (i.e. if it is [input-addressed]), this is `null`. [store path]: @docroot@/store/store-path.md [file system object]: @docroot@/store/file-system-object.md @@ -37,28 +39,30 @@ Info about a [store object]. These are not intrinsic properties of the store object. In other words, the same store object residing in different store could have different values for these properties. -* `deriver` (optional): +* `deriver`: - The path to the [derivation] from which this store object is produced. + If known, the path to the [derivation] from which this store object was produced. + Otherwise `null`. [derivation]: @docroot@/glossary.md#gloss-store-derivation * `registrationTime` (optional): - When this derivation was added to the store. + If known, when this derivation was added to the store. + Otherwise `null`. -* `ultimate` (optional): +* `ultimate`: Whether this store object is trusted because we built it ourselves, rather than substituted a build product from elsewhere. -* `signatures` (optional): +* `signatures`: Signatures claiming that this store object is what it claims to be. Not relevant for [content-addressed] store objects, but useful for [input-addressed] store objects. - [content-addressed]: @docroot@/glossary.md#gloss-content-addressed-store-object - [input-addressed]: @docroot@/glossary.md#gloss-input-addressed-store-object +[content-addressed]: @docroot@/store/store-object/content-address.md +[input-addressed]: @docroot@/glossary.md#gloss-input-addressed-store-object ### `.narinfo` extra fields diff --git a/src/libstore/parsed-derivations.cc b/src/libstore/parsed-derivations.cc index a29281953..d8459d4d7 100644 --- a/src/libstore/parsed-derivations.cc +++ b/src/libstore/parsed-derivations.cc @@ -135,18 +135,37 @@ static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*"); /** * Write a JSON representation of store object metadata, such as the * hash and the references. + * + * @note Do *not* use `ValidPathInfo::toJSON` because this function is + * subject to stronger stability requirements since it is used to + * prepare build environments. Perhaps someday we'll have a versionining + * mechanism to allow this to evolve again and get back in sync, but for + * now we must not change - not even extend - the behavior. */ static nlohmann::json pathInfoToJSON( Store & store, const StorePathSet & storePaths) { - nlohmann::json::array_t jsonList = nlohmann::json::array(); + using nlohmann::json; + + nlohmann::json::array_t jsonList = json::array(); for (auto & storePath : storePaths) { auto info = store.queryPathInfo(storePath); - auto & jsonPath = jsonList.emplace_back( - info->toJSON(store, false, HashFormat::Nix32)); + auto & jsonPath = jsonList.emplace_back(json::object()); + + jsonPath["narHash"] = info->narHash.to_string(HashFormat::Nix32, true); + jsonPath["narSize"] = info->narSize; + + { + auto & jsonRefs = jsonPath["references"] = json::array(); + for (auto & ref : info->references) + jsonRefs.emplace_back(store.printStorePath(ref)); + } + + if (info->ca) + jsonPath["ca"] = renderContentAddress(info->ca); // Add the path to the object whose metadata we are including. jsonPath["path"] = store.printStorePath(storePath); diff --git a/src/libstore/path-info.cc b/src/libstore/path-info.cc index 6523cb425..ddd7f50d9 100644 --- a/src/libstore/path-info.cc +++ b/src/libstore/path-info.cc @@ -161,28 +161,23 @@ nlohmann::json UnkeyedValidPathInfo::toJSON( jsonObject["narSize"] = narSize; { - auto& jsonRefs = (jsonObject["references"] = json::array()); + auto & jsonRefs = jsonObject["references"] = json::array(); for (auto & ref : references) jsonRefs.emplace_back(store.printStorePath(ref)); } - if (ca) - jsonObject["ca"] = renderContentAddress(ca); + jsonObject["ca"] = ca ? (std::optional { renderContentAddress(*ca) }) : std::nullopt; if (includeImpureInfo) { - if (deriver) - jsonObject["deriver"] = store.printStorePath(*deriver); + jsonObject["deriver"] = deriver ? (std::optional { store.printStorePath(*deriver) }) : std::nullopt; - if (registrationTime) - jsonObject["registrationTime"] = registrationTime; + jsonObject["registrationTime"] = registrationTime ? (std::optional { registrationTime }) : std::nullopt; - if (ultimate) - jsonObject["ultimate"] = ultimate; + jsonObject["ultimate"] = ultimate; - if (!sigs.empty()) { - for (auto & sig : sigs) - jsonObject["signatures"].push_back(sig); - } + auto & sigsObj = jsonObject["signatures"] = json::array(); + for (auto & sig : sigs) + sigsObj.push_back(sig); } return jsonObject; @@ -210,20 +205,25 @@ UnkeyedValidPathInfo UnkeyedValidPathInfo::fromJSON( throw; } + // New format as this as nullable but mandatory field; handling + // missing is for back-compat. if (json.contains("ca")) - res.ca = ContentAddress::parse(getString(valueAt(json, "ca"))); + if (auto * rawCa = getNullable(valueAt(json, "ca"))) + res.ca = ContentAddress::parse(getString(*rawCa)); if (json.contains("deriver")) - res.deriver = store.parseStorePath(getString(valueAt(json, "deriver"))); + if (auto * rawDeriver = getNullable(valueAt(json, "deriver"))) + res.deriver = store.parseStorePath(getString(*rawDeriver)); if (json.contains("registrationTime")) - res.registrationTime = getInteger(valueAt(json, "registrationTime")); + if (auto * rawRegistrationTime = getNullable(valueAt(json, "registrationTime"))) + res.registrationTime = getInteger(*rawRegistrationTime); if (json.contains("ultimate")) res.ultimate = getBoolean(valueAt(json, "ultimate")); if (json.contains("signatures")) - res.sigs = valueAt(json, "signatures"); + res.sigs = getStringSet(valueAt(json, "signatures")); return res; } diff --git a/src/libutil/json-utils.cc b/src/libutil/json-utils.cc index 1b911bf75..dff068e07 100644 --- a/src/libutil/json-utils.cc +++ b/src/libutil/json-utils.cc @@ -39,12 +39,9 @@ std::optional optionalValueAt(const nlohmann::json::object_t & m } -std::optional getNullable(const nlohmann::json & value) +const nlohmann::json * getNullable(const nlohmann::json & value) { - if (value.is_null()) - return std::nullopt; - - return value.get(); + return value.is_null() ? nullptr : &value; } /** diff --git a/src/libutil/json-utils.hh b/src/libutil/json-utils.hh index 08c98cc8c..fe7a406cf 100644 --- a/src/libutil/json-utils.hh +++ b/src/libutil/json-utils.hh @@ -29,7 +29,7 @@ std::optional optionalValueAt(const nlohmann::json::object_t & v * Downcast the json object, failing with a nice error if the conversion fails. * See https://json.nlohmann.me/features/types/ */ -std::optional getNullable(const nlohmann::json & value); +const nlohmann::json * getNullable(const nlohmann::json & value); const nlohmann::json::object_t & getObject(const nlohmann::json & value); const nlohmann::json::array_t & getArray(const nlohmann::json & value); const nlohmann::json::string_t & getString(const nlohmann::json & value); diff --git a/tests/functional/signing.sh b/tests/functional/signing.sh index bcbd3b675..cf84ab377 100755 --- a/tests/functional/signing.sh +++ b/tests/functional/signing.sh @@ -15,9 +15,9 @@ outPath=$(nix-build dependencies.nix --no-out-link --secret-key-files "$TEST_ROO # Verify that the path got signed. info=$(nix path-info --json $outPath) -[[ $info =~ '"ultimate":true' ]] -[[ $info =~ 'cache1.example.org' ]] -[[ $info =~ 'cache2.example.org' ]] +echo $info | jq -e '.[] | .ultimate == true' +echo $info | jq -e '.[] | .signatures.[] | select(startswith("cache1.example.org"))' +echo $info | jq -e '.[] | .signatures.[] | select(startswith("cache2.example.org"))' # Test "nix store verify". nix store verify -r $outPath @@ -39,8 +39,8 @@ nix store verify -r $outPath # Verify that the path did not get signed but does have the ultimate bit. info=$(nix path-info --json $outPath2) -[[ $info =~ '"ultimate":true' ]] -(! [[ $info =~ 'signatures' ]]) +echo $info | jq -e '.[] | .ultimate == true' +echo $info | jq -e '.[] | .signatures == []' # Test "nix store verify". nix store verify -r $outPath2 @@ -57,7 +57,7 @@ nix store verify -r $outPath2 --sigs-needed 1 --trusted-public-keys $pk1 # Build something content-addressed. outPathCA=$(IMPURE_VAR1=foo IMPURE_VAR2=bar nix-build ./fixed.nix -A good.0 --no-out-link) -[[ $(nix path-info --json $outPathCA) =~ '"ca":"fixed:md5:' ]] +nix path-info --json $outPathCA | jq -e '.[] | .ca | startswith("fixed:md5:")' # Content-addressed paths don't need signatures, so they verify # regardless of --sigs-needed. @@ -73,15 +73,15 @@ nix copy --to file://$cacheDir $outPath2 # Verify that signatures got copied. info=$(nix path-info --store file://$cacheDir --json $outPath2) -(! [[ $info =~ '"ultimate":true' ]]) -[[ $info =~ 'cache1.example.org' ]] -(! [[ $info =~ 'cache2.example.org' ]]) +echo $info | jq -e '.[] | .ultimate == false' +echo $info | jq -e '.[] | .signatures.[] | select(startswith("cache1.example.org"))' +echo $info | expect 4 jq -e '.[] | .signatures.[] | select(startswith("cache2.example.org"))' # Verify that adding a signature to a path in a binary cache works. nix store sign --store file://$cacheDir --key-file $TEST_ROOT/sk2 $outPath2 info=$(nix path-info --store file://$cacheDir --json $outPath2) -[[ $info =~ 'cache1.example.org' ]] -[[ $info =~ 'cache2.example.org' ]] +echo $info | jq -e '.[] | .signatures.[] | select(startswith("cache1.example.org"))' +echo $info | jq -e '.[] | .signatures.[] | select(startswith("cache2.example.org"))' # Copying to a diverted store should fail due to a lack of signatures by trusted keys. chmod -R u+w $TEST_ROOT/store0 || true diff --git a/tests/unit/libstore/data/path-info/empty_impure.json b/tests/unit/libstore/data/path-info/empty_impure.json new file mode 100644 index 000000000..be982dcef --- /dev/null +++ b/tests/unit/libstore/data/path-info/empty_impure.json @@ -0,0 +1,10 @@ +{ + "ca": null, + "deriver": null, + "narHash": "sha256-FePFYIlMuycIXPZbWi7LGEiMmZSX9FMbaQenWBzm1Sc=", + "narSize": 0, + "references": [], + "registrationTime": null, + "signatures": [], + "ultimate": false +} diff --git a/tests/unit/libstore/data/path-info/empty_pure.json b/tests/unit/libstore/data/path-info/empty_pure.json new file mode 100644 index 000000000..10d9f508a --- /dev/null +++ b/tests/unit/libstore/data/path-info/empty_pure.json @@ -0,0 +1,6 @@ +{ + "ca": null, + "narHash": "sha256-FePFYIlMuycIXPZbWi7LGEiMmZSX9FMbaQenWBzm1Sc=", + "narSize": 0, + "references": [] +} diff --git a/tests/unit/libstore/path-info.cc b/tests/unit/libstore/path-info.cc index 80d6fcfed..06c662b74 100644 --- a/tests/unit/libstore/path-info.cc +++ b/tests/unit/libstore/path-info.cc @@ -19,7 +19,15 @@ class PathInfoTest : public CharacterizationTest, public LibStoreTest } }; -static UnkeyedValidPathInfo makePathInfo(const Store & store, bool includeImpureInfo) { +static UnkeyedValidPathInfo makeEmpty() +{ + return { + Hash::parseSRI("sha256-FePFYIlMuycIXPZbWi7LGEiMmZSX9FMbaQenWBzm1Sc="), + }; +} + +static UnkeyedValidPathInfo makeFull(const Store & store, bool includeImpureInfo) +{ UnkeyedValidPathInfo info = ValidPathInfo { store, "foo", @@ -50,22 +58,21 @@ static UnkeyedValidPathInfo makePathInfo(const Store & store, bool includeImpure return info; } -#define JSON_TEST(STEM, PURE) \ +#define JSON_TEST(STEM, OBJ, PURE) \ TEST_F(PathInfoTest, PathInfo_ ## STEM ## _from_json) { \ readTest(#STEM, [&](const auto & encoded_) { \ auto encoded = json::parse(encoded_); \ UnkeyedValidPathInfo got = UnkeyedValidPathInfo::fromJSON( \ *store, \ encoded); \ - auto expected = makePathInfo(*store, PURE); \ + auto expected = OBJ; \ ASSERT_EQ(got, expected); \ }); \ } \ \ TEST_F(PathInfoTest, PathInfo_ ## STEM ## _to_json) { \ writeTest(#STEM, [&]() -> json { \ - return makePathInfo(*store, PURE) \ - .toJSON(*store, PURE, HashFormat::SRI); \ + return OBJ.toJSON(*store, PURE, HashFormat::SRI); \ }, [](const auto & file) { \ return json::parse(readFile(file)); \ }, [](const auto & file, const auto & got) { \ @@ -73,7 +80,10 @@ static UnkeyedValidPathInfo makePathInfo(const Store & store, bool includeImpure }); \ } -JSON_TEST(pure, false) -JSON_TEST(impure, true) +JSON_TEST(empty_pure, makeEmpty(), false) +JSON_TEST(empty_impure, makeEmpty(), true) + +JSON_TEST(pure, makeFull(*store, false), false) +JSON_TEST(impure, makeFull(*store, true), true) } diff --git a/tests/unit/libutil/json-utils.cc b/tests/unit/libutil/json-utils.cc index c9370a74b..704a4acb0 100644 --- a/tests/unit/libutil/json-utils.cc +++ b/tests/unit/libutil/json-utils.cc @@ -175,13 +175,16 @@ TEST(optionalValueAt, empty) { TEST(getNullable, null) { auto json = R"(null)"_json; - ASSERT_EQ(getNullable(json), std::nullopt); + ASSERT_EQ(getNullable(json), nullptr); } TEST(getNullable, empty) { auto json = R"({})"_json; - ASSERT_EQ(getNullable(json), std::optional { R"({})"_json }); + auto * p = getNullable(json); + + ASSERT_NE(p, nullptr); + ASSERT_EQ(*p, R"({})"_json); } } /* namespace nix */