From a44e9dd1ea664a9616d7dabb548095bb3f375f7e Mon Sep 17 00:00:00 2001 From: Philipp Otterbein Date: Sat, 4 Jan 2025 16:14:06 +0100 Subject: [PATCH] correctly parse strings with null bytes and throw error --- src/libexpr/eval.cc | 8 ++++-- src/libexpr/lexer.l | 25 +++++++++++------- src/libexpr/value.hh | 2 +- .../lang/eval-fail-string-nul-1.err.exp | Bin 0 -> 209 bytes .../lang/eval-fail-string-nul-1.nix | Bin 0 -> 10 bytes .../lang/eval-fail-string-nul-2.err.exp | Bin 0 -> 256 bytes .../lang/eval-fail-string-nul-2.nix | Bin 0 -> 22 bytes 7 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 tests/functional/lang/eval-fail-string-nul-1.err.exp create mode 100644 tests/functional/lang/eval-fail-string-nul-1.nix create mode 100644 tests/functional/lang/eval-fail-string-nul-2.err.exp create mode 100644 tests/functional/lang/eval-fail-string-nul-2.nix diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index fe5f05ab8..21dd5a294 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -3185,12 +3185,16 @@ std::ostream & operator << (std::ostream & str, const ExternalValueBase & v) { return v.print(str); } -void forceNoNullByte(std::string_view s) +void forceNoNullByte(std::string_view s, std::function pos) { if (s.find('\0') != s.npos) { using namespace std::string_view_literals; auto str = replaceStrings(std::string(s), "\0"sv, "␀"sv); - throw Error("input string '%s' cannot be represented as Nix string because it contains null bytes", str); + Error error("input string '%s' cannot be represented as Nix string because it contains null bytes", str); + if (pos) { + error.atPos(pos()); + } + throw error; } } diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index a7e44cb72..067f86e01 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -41,16 +41,18 @@ namespace nix { // we make use of the fact that the parser receives a private copy of the input // string and can munge around in it. -static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length) +// getting the position is expensive and thus it is implemented lazily. +static StringToken unescapeStr(char * const s, size_t length, std::function && pos) { - char * result = s; + bool noNullByte = true; char * t = s; - char c; // the input string is terminated with *two* NULs, so we can safely take // *one* character after the one being checked against. - while ((c = *s++)) { + for (size_t i = 0; i < length; t++) { + char c = s[i++]; + noNullByte &= c != '\0'; if (c == '\\') { - c = *s++; + c = s[i++]; if (c == 'n') *t = '\n'; else if (c == 'r') *t = '\r'; else if (c == 't') *t = '\t'; @@ -59,12 +61,14 @@ static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length) else if (c == '\r') { /* Normalise CR and CR/LF into LF. */ *t = '\n'; - if (*s == '\n') s++; /* cr/lf */ + if (s[i] == '\n') i++; /* cr/lf */ } else *t = c; - t++; } - return {result, size_t(t - result)}; + if (!noNullByte) { + forceNoNullByte({s, size_t(t - s)}, std::move(pos)); + } + return {s, size_t(t - s)}; } static void requireExperimentalFeature(const ExperimentalFeature & feature, const Pos & pos) @@ -175,7 +179,7 @@ or { return OR_KW; } /* It is impossible to match strings ending with '$' with one regex because trailing contexts are only valid at the end of a rule. (A sane but undocumented limitation.) */ - yylval->str = unescapeStr(state->symbols, yytext, yyleng); + yylval->str = unescapeStr(yytext, yyleng, [&]() { return state->positions[CUR_POS]; }); return STR; } \$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } @@ -191,6 +195,7 @@ or { return OR_KW; } \'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; } ([^\$\']|\$[^\{\']|\'[^\'\$])+ { yylval->str = {yytext, (size_t) yyleng, true}; + forceNoNullByte(yylval->str, [&]() { return state->positions[CUR_POS]; }); return IND_STR; } \'\'\$ | @@ -203,7 +208,7 @@ or { return OR_KW; } return IND_STR; } \'\'\\{ANY} { - yylval->str = unescapeStr(state->symbols, yytext + 2, yyleng - 2); + yylval->str = unescapeStr(yytext + 2, yyleng - 2, [&]() { return state->positions[CUR_POS]; }); return IND_STR; } \$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } diff --git a/src/libexpr/value.hh b/src/libexpr/value.hh index 88fcae986..8925693e3 100644 --- a/src/libexpr/value.hh +++ b/src/libexpr/value.hh @@ -510,6 +510,6 @@ typedef std::shared_ptr RootValue; RootValue allocRootValue(Value * v); -void forceNoNullByte(std::string_view s); +void forceNoNullByte(std::string_view s, std::function = nullptr); } diff --git a/tests/functional/lang/eval-fail-string-nul-1.err.exp b/tests/functional/lang/eval-fail-string-nul-1.err.exp new file mode 100644 index 0000000000000000000000000000000000000000..2dfbea0635c8c2ea159249879c4d89523c2a7366 GIT binary patch literal 209 zcmY+8F%E+;6hk|63X6p)MKIDq_H+{VbbPipj#u zG%KVo2L&^b4`$yq8*iGc_{BE1wMXaQY*v)pqs#WEJOx;R`^jn7;s?AsKQI6Q literal 0 HcmV?d00001 diff --git a/tests/functional/lang/eval-fail-string-nul-1.nix b/tests/functional/lang/eval-fail-string-nul-1.nix new file mode 100644 index 0000000000000000000000000000000000000000..3689409171139a7d310eac52df0be4315b6ba786 GIT binary patch literal 10 RcmY#N%g<*>N-R?10ss()0>}UW literal 0 HcmV?d00001 diff --git a/tests/functional/lang/eval-fail-string-nul-2.err.exp b/tests/functional/lang/eval-fail-string-nul-2.err.exp new file mode 100644 index 0000000000000000000000000000000000000000..b1cae5325d90a2c05f7304db89bca0826c2f98f0 GIT binary patch literal 256 zcmY+9K?=e!5Jg>kioeS$meySk;Sq#MJ1v1QDM_joy6^~|(UaIJK{~rX^XBtE#~7l4 zlX1#OIXW4jfIfsH%Di9CzpxaGP-sacWLQSzF>+$b+<_7t7IF*eHqEjgb_F(Y!F5NS IH|lWw0QY`GT>t<8 literal 0 HcmV?d00001 diff --git a/tests/functional/lang/eval-fail-string-nul-2.nix b/tests/functional/lang/eval-fail-string-nul-2.nix new file mode 100644 index 0000000000000000000000000000000000000000..fd6b3258a5e414e6e4812113aed4ece422aa76bc GIT binary patch literal 22 acmY#a=TcBe%g+ar3_zTeSj45S&IJH2l>`U? literal 0 HcmV?d00001