1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-28 01:11:15 +02:00

Merge branch 'parser-improvements' of https://github.com/pennae/nix

This commit is contained in:
Eelco Dolstra 2022-02-02 12:45:44 +01:00
commit 17e3f353df
8 changed files with 138 additions and 90 deletions

View file

@ -16,6 +16,8 @@
#ifndef BISON_HEADER
#define BISON_HEADER
#include <variant>
#include "util.hh"
#include "nixexpr.hh"
@ -39,8 +41,22 @@ namespace nix {
{ };
};
struct ParserFormals {
std::vector<Formal> formals;
bool ellipsis = false;
};
}
// using C a struct allows us to avoid having to define the special
// members that using string_view here would implicitly delete.
struct StringToken {
const char * p;
size_t l;
bool hasIndentation;
operator std::string_view() const { return {p, l}; }
};
#define YY_DECL int yylex \
(YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data)
@ -140,21 +156,46 @@ static void addAttr(ExprAttrs * attrs, AttrPath & attrPath,
}
static void addFormal(const Pos & pos, Formals * formals, const Formal & formal)
static Formals * toFormals(ParseData & data, ParserFormals * formals,
Pos pos = noPos, Symbol arg = {})
{
if (!formals->argNames.insert(formal.name).second)
std::sort(formals->formals.begin(), formals->formals.end(),
[] (const auto & a, const auto & b) {
return std::tie(a.name, a.pos) < std::tie(b.name, b.pos);
});
std::optional<std::pair<Symbol, Pos>> duplicate;
for (size_t i = 0; i + 1 < formals->formals.size(); i++) {
if (formals->formals[i].name != formals->formals[i + 1].name)
continue;
std::pair thisDup{formals->formals[i].name, formals->formals[i + 1].pos};
duplicate = std::min(thisDup, duplicate.value_or(thisDup));
}
if (duplicate)
throw ParseError({
.msg = hintfmt("duplicate formal function argument '%1%'",
formal.name),
.msg = hintfmt("duplicate formal function argument '%1%'", duplicate->first),
.errPos = duplicate->second
});
Formals result;
result.ellipsis = formals->ellipsis;
result.formals = std::move(formals->formals);
if (arg.set() && result.has(arg))
throw ParseError({
.msg = hintfmt("duplicate formal function argument '%1%'", arg),
.errPos = pos
});
formals->formals.push_front(formal);
delete formals;
return new Formals(std::move(result));
}
static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<std::pair<Pos, Expr *> > & es)
static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols,
vector<std::pair<Pos, std::variant<Expr *, StringToken> > > & es)
{
if (es.empty()) return new ExprString(symbols.create(""));
if (es.empty()) return new ExprString("");
/* Figure out the minimum indentation. Note that by design
whitespace-only final lines are not taken into account. (So
@ -163,20 +204,20 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
size_t minIndent = 1000000;
size_t curIndent = 0;
for (auto & [i_pos, i] : es) {
ExprIndStr * e = dynamic_cast<ExprIndStr *>(i);
if (!e) {
/* Anti-quotations end the current start-of-line whitespace. */
auto * str = std::get_if<StringToken>(&i);
if (!str || !str->hasIndentation) {
/* Anti-quotations and escaped characters end the current start-of-line whitespace. */
if (atStartOfLine) {
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
continue;
}
for (size_t j = 0; j < e->s.size(); ++j) {
for (size_t j = 0; j < str->l; ++j) {
if (atStartOfLine) {
if (e->s[j] == ' ')
if (str->p[j] == ' ')
curIndent++;
else if (e->s[j] == '\n') {
else if (str->p[j] == '\n') {
/* Empty line, doesn't influence minimum
indentation. */
curIndent = 0;
@ -184,7 +225,7 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
} else if (e->s[j] == '\n') {
} else if (str->p[j] == '\n') {
atStartOfLine = true;
curIndent = 0;
}
@ -196,33 +237,31 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
atStartOfLine = true;
size_t curDropped = 0;
size_t n = es.size();
for (vector<std::pair<Pos, Expr *> >::iterator i = es.begin(); i != es.end(); ++i, --n) {
ExprIndStr * e = dynamic_cast<ExprIndStr *>(i->second);
if (!e) {
atStartOfLine = false;
curDropped = 0;
es2->push_back(*i);
continue;
}
auto i = es.begin();
const auto trimExpr = [&] (Expr * e) {
atStartOfLine = false;
curDropped = 0;
es2->emplace_back(i->first, e);
};
const auto trimString = [&] (const StringToken & t) {
string s2;
for (size_t j = 0; j < e->s.size(); ++j) {
for (size_t j = 0; j < t.l; ++j) {
if (atStartOfLine) {
if (e->s[j] == ' ') {
if (t.p[j] == ' ') {
if (curDropped++ >= minIndent)
s2 += e->s[j];
s2 += t.p[j];
}
else if (e->s[j] == '\n') {
else if (t.p[j] == '\n') {
curDropped = 0;
s2 += e->s[j];
s2 += t.p[j];
} else {
atStartOfLine = false;
curDropped = 0;
s2 += e->s[j];
s2 += t.p[j];
}
} else {
s2 += e->s[j];
if (e->s[j] == '\n') atStartOfLine = true;
s2 += t.p[j];
if (t.p[j] == '\n') atStartOfLine = true;
}
}
@ -234,7 +273,10 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
s2 = string(s2, 0, p + 1);
}
es2->emplace_back(i->first, new ExprString(symbols.create(s2)));
es2->emplace_back(i->first, new ExprString(s2));
};
for (; i != es.end(); ++i, --n) {
std::visit(overloaded { trimExpr, trimString }, i->second);
}
/* If this is a single string, then don't do a concatenation. */
@ -269,22 +311,17 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
nix::Expr * e;
nix::ExprList * list;
nix::ExprAttrs * attrs;
nix::Formals * formals;
nix::ParserFormals * formals;
nix::Formal * formal;
nix::NixInt n;
nix::NixFloat nf;
// using C a struct allows us to avoid having to define the special
// members that using string_view here would implicitly delete.
struct StringToken {
const char * p;
size_t l;
operator std::string_view() const { return {p, l}; }
};
StringToken id; // !!! -> Symbol
StringToken path;
StringToken uri;
StringToken str;
std::vector<nix::AttrName> * attrNames;
std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
std::vector<std::pair<nix::Pos, std::variant<nix::Expr *, StringToken> > > * ind_string_parts;
}
%type <e> start expr expr_function expr_if expr_op
@ -294,11 +331,12 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
%type <formals> formals
%type <formal> formal
%type <attrNames> attrs attrpath
%type <string_parts> string_parts_interpolated ind_string_parts
%type <string_parts> string_parts_interpolated
%type <ind_string_parts> ind_string_parts
%type <e> path_start string_parts string_attr
%type <id> attr
%token <id> ID ATTRPATH
%token <e> STR IND_STR
%token <str> STR IND_STR
%token <n> INT
%token <nf> FLOAT
%token <path> PATH HPATH SPATH PATH_END
@ -331,11 +369,17 @@ expr_function
: ID ':' expr_function
{ $$ = new ExprLambda(CUR_POS, data->symbols.create($1), 0, $3); }
| '{' formals '}' ':' expr_function
{ $$ = new ExprLambda(CUR_POS, data->symbols.create(""), $2, $5); }
{ $$ = new ExprLambda(CUR_POS, data->symbols.create(""), toFormals(*data, $2), $5); }
| '{' formals '}' '@' ID ':' expr_function
{ $$ = new ExprLambda(CUR_POS, data->symbols.create($5), $2, $7); }
{
Symbol arg = data->symbols.create($5);
$$ = new ExprLambda(CUR_POS, arg, toFormals(*data, $2, CUR_POS, arg), $7);
}
| ID '@' '{' formals '}' ':' expr_function
{ $$ = new ExprLambda(CUR_POS, data->symbols.create($1), $4, $7); }
{
Symbol arg = data->symbols.create($1);
$$ = new ExprLambda(CUR_POS, arg, toFormals(*data, $4, CUR_POS, arg), $7);
}
| ASSERT expr ';' expr_function
{ $$ = new ExprAssert(CUR_POS, $2, $4); }
| WITH expr ';' expr_function
@ -426,7 +470,7 @@ expr_simple
$$ = new ExprCall(CUR_POS,
new ExprVar(data->symbols.create("__findFile")),
{new ExprVar(data->symbols.create("__nixPath")),
new ExprString(data->symbols.create(path))});
new ExprString(path)});
}
| URI {
static bool noURLLiterals = settings.isExperimentalFeatureEnabled(Xp::NoUrlLiterals);
@ -435,7 +479,7 @@ expr_simple
.msg = hintfmt("URL literals are disabled"),
.errPos = CUR_POS
});
$$ = new ExprString(data->symbols.create($1));
$$ = new ExprString(string($1));
}
| '(' expr ')' { $$ = $2; }
/* Let expressions `let {..., body = ...}' are just desugared
@ -450,18 +494,19 @@ expr_simple
;
string_parts
: STR
: STR { $$ = new ExprString(string($1)); }
| string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); }
| { $$ = new ExprString(data->symbols.create("")); }
| { $$ = new ExprString(""); }
;
string_parts_interpolated
: string_parts_interpolated STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
: string_parts_interpolated STR
{ $$ = $1; $1->emplace_back(makeCurPos(@2, data), new ExprString(string($2))); }
| string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
| DOLLAR_CURLY expr '}' { $$ = new vector<std::pair<Pos, Expr *> >; $$->emplace_back(makeCurPos(@1, data), $2); }
| STR DOLLAR_CURLY expr '}' {
$$ = new vector<std::pair<Pos, Expr *> >;
$$->emplace_back(makeCurPos(@1, data), $1);
$$->emplace_back(makeCurPos(@1, data), new ExprString(string($1)));
$$->emplace_back(makeCurPos(@2, data), $3);
}
;
@ -483,7 +528,7 @@ path_start
ind_string_parts
: ind_string_parts IND_STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
| ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
| { $$ = new vector<std::pair<Pos, Expr *> >; }
| { $$ = new vector<std::pair<Pos, std::variant<Expr *, StringToken> > >; }
;
binds
@ -515,7 +560,7 @@ attrs
{ $$ = $1;
ExprString * str = dynamic_cast<ExprString *>($2);
if (str) {
$$->push_back(AttrName(str->s));
$$->push_back(AttrName(data->symbols.create(str->s)));
delete str;
} else
throw ParseError({
@ -532,7 +577,7 @@ attrpath
{ $$ = $1;
ExprString * str = dynamic_cast<ExprString *>($3);
if (str) {
$$->push_back(AttrName(str->s));
$$->push_back(AttrName(data->symbols.create(str->s)));
delete str;
} else
$$->push_back(AttrName($3));
@ -542,7 +587,7 @@ attrpath
{ $$ = new vector<AttrName>;
ExprString *str = dynamic_cast<ExprString *>($1);
if (str) {
$$->push_back(AttrName(str->s));
$$->push_back(AttrName(data->symbols.create(str->s)));
delete str;
} else
$$->push_back(AttrName($1));
@ -566,13 +611,13 @@ expr_list
formals
: formal ',' formals
{ $$ = $3; addFormal(CUR_POS, $$, *$1); }
{ $$ = $3; $$->formals.push_back(*$1); }
| formal
{ $$ = new Formals; addFormal(CUR_POS, $$, *$1); $$->ellipsis = false; }
{ $$ = new ParserFormals; $$->formals.push_back(*$1); $$->ellipsis = false; }
|
{ $$ = new Formals; $$->ellipsis = false; }
{ $$ = new ParserFormals; $$->ellipsis = false; }
| ELLIPSIS
{ $$ = new Formals; $$->ellipsis = true; }
{ $$ = new ParserFormals; $$->ellipsis = true; }
;
formal