From 07cd7197e12f37e5e55fd19d18ad2bb0f306958d Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 6 May 2024 17:06:46 +0100 Subject: [PATCH] Uniformize destruct and pattern matching logic (#1907) * Uniformize destruct and pattern matching logic When patterns were first introduced, only let-destructuring existed, and match expressions weren't a thing. Rather than to manually generate code that would check that a pattern does match the destructured value, we piggy-backed on contracts instead, which was easier and provided better error messages. Up to now, this was still how destructuring worked: a contract is elaborated from the pattern and applied to the matched value, and should report any error if the value doesn't match. Then, destructuring is desugared to a series of let-bindings that assume that the destucture value has the right shape. The generated code is thus quite simple, as it doesn't have any error handling to do. When pattern matching landed, we kept the old destructuring infrastructure, because it was there and worked well. However, there is duplication: the compilation of match expressions does a work quite similar to destructuring. What's more, we're at risk of having a diverging semantics. Some questions aren't trivial to answer for destructuring, in particular with respect to lazyness: should `let _ = x in null` force `x`? What about `let 'Foo _ = x in null`, or `let 'Foo 5 = x in null`? After some discussion, we decided to give destructuring a simple semantics in term of pattern matching: `let = in ` should be strictly equivalent to ` |> match { => }`. Indeed, `match` has a clear semantics around forcing values in lazy languages. This also agrees with intuition, or at least gives a reasonable semantics for most cases. This commit implements this change in practice, by getting rid of the ad-hoc desugaring of destructuring and rewriting let-destructuring to actual pattern matching instead. * Update core/src/term/pattern/compile.rs Co-authored-by: jneem --------- Co-authored-by: jneem --- ..._stderr_destructuring_closed_fail.ncl.snap | 18 +- core/src/parser/grammar.lalrpop | 7 +- core/src/parser/uniterm.rs | 9 +- core/src/parser/utils.rs | 2 +- core/src/position.rs | 40 +++- core/src/term/pattern/compile.rs | 8 +- core/src/term/pattern/mod.rs | 130 +--------- core/src/transform/desugar_destructuring.rs | 223 ++---------------- .../adt_enum_tag_tag_mismatch.ncl | 2 +- ...rg_contract_fail.ncl => adt_extra_arg.ncl} | 2 +- ..._contract_fail.ncl => adt_missing_arg.ncl} | 2 +- ...ag_contract_fail.ncl => adt_wrong_tag.ncl} | 2 +- ...e_contract_fail.ncl => adt_wrong_type.ncl} | 2 +- .../inputs/destructuring/closed_fail.ncl | 2 +- .../destructuring/constant_bool_fail.ncl | 2 +- .../destructuring/constant_null_fail.ncl | 2 +- .../destructuring/constant_number_fail.ncl | 2 +- .../destructuring/constant_string_fail.ncl | 2 +- .../destructuring/nested_duplicated_ident.ncl | 4 - core/tests/integration/main.rs | 7 + doc/manual/syntax.md | 2 +- lsp/nls/src/requests/symbols.rs | 3 +- 22 files changed, 97 insertions(+), 376 deletions(-) rename core/tests/integration/inputs/destructuring/{adt_extra_arg_contract_fail.ncl => adt_extra_arg.ncl} (61%) rename core/tests/integration/inputs/destructuring/{adt_missing_arg_contract_fail.ncl => adt_missing_arg.ncl} (60%) rename core/tests/integration/inputs/destructuring/{adt_wrong_tag_contract_fail.ncl => adt_wrong_tag.ncl} (61%) rename core/tests/integration/inputs/destructuring/{adt_wrong_type_contract_fail.ncl => adt_wrong_type.ncl} (62%) delete mode 100644 core/tests/integration/inputs/destructuring/nested_duplicated_ident.ncl diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_destructuring_closed_fail.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_destructuring_closed_fail.ncl.snap index 0b7eda263a..eb3c6adbf7 100644 --- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_destructuring_closed_fail.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_destructuring_closed_fail.ncl.snap @@ -2,17 +2,11 @@ source: cli/tests/snapshot/main.rs expression: err --- -error: contract broken by a value - extra field `b` - ┌─ [INPUTS_PATH]/errors/destructuring_closed_fail.ncl:3:11 +error: unmatched pattern + ┌─ [INPUTS_PATH]/errors/destructuring_closed_fail.ncl:3:5 │ 3 │ let {a} = {a=1, b=2} - │ --- ^^^^^^^^^^ applied to this expression - │ │ - │ expected type - │ - = Have you misspelled a field? - = The record contract might also be too strict. By default, record contracts exclude any field which is not listed. - Append `, ..` at the end of the record contract, as in `{some_field | SomeContract, ..}`, to make it accept extra fields. - - + │ ^^^^^^^^^^^^^^^^ + │ │ │ + │ │ this value doesn't match any branch + │ in this match expression diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 5155a1b409..9368a8931f 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -404,10 +404,11 @@ RecordField: FieldDef = { ?> in ... -//! ``` -//! -//! will be transformed to: -//! -//! ```text -//! let f = fun x %unnamed% => ( -//! let {a, b=c} = x in -//! let {d ? 2, ..w} = %unnamed% in -//! -//! ) in ... -//! ``` -use crate::identifier::LocIdent; -use crate::match_sharedterm; -use crate::mk_app; -use crate::term::pattern::*; -use crate::term::{ - make as mk_term, BinaryOp::DynRemove, LetAttrs, RecordOpKind, RichTerm, Term, TypeAnnotation, - UnaryOp, +//! A destructuring function `fun => ` is rewritten to +//! `fun x => let = x in `, and then the inner let is recursively desugared. +use crate::{ + identifier::LocIdent, + match_sharedterm, + term::{pattern::*, MatchData, RichTerm, Term}, }; /// Entry point of the destructuring desugaring transformation. @@ -79,165 +45,22 @@ pub fn desugar_fun(mut pat: Pattern, body: RichTerm) -> Term { ) } -/// Elaborate a contract from the pattern if it is a record pattern and apply it to the value before -/// actually destructuring it. Then convert the let pattern to a sequence of normal let-bindings. +/// Desugar a destructuring let-binding. +/// +/// A let-binding `let = bound in body` is desugared to ` |> match { => body }`. pub fn desugar_let(pat: Pattern, bound: RichTerm, body: RichTerm) -> Term { - let contract = pat.elaborate_contract(); + // the position of the match expression is used during error reporting, so we try to provide a + // sensible one. + let match_expr_pos = pat.pos.fuse(bound.pos); - let annotated = { - let t_pos = bound.pos; + // `(match { => }) ` + Term::App( RichTerm::new( - Term::Annotated( - TypeAnnotation { - contracts: contract.into_iter().collect(), - ..Default::default() - }, - bound, - ), - t_pos, - ) - }; - - pat.desugar(annotated, body) -} - -trait Desugar { - /// Elaborate a destructuring let-binding matching a pattern `self` against a value `destr` to - /// a sequence of normal let-bindings and primitive operations. - /// - /// This function ignores the user-supplied contracts of the pattern and doesn't generate a - /// safety contract to check that the value has the expected shape: this guarding contract must - /// be introduced separately and prior to calling to [Desugar::desugar]. In practice, this - /// contract is introduced by [desugar_let]. [Desugar::desugar] is only concerned with - /// destructuring the value and binding its parts to appropriate variables. - fn desugar(self, destr: RichTerm, body: RichTerm) -> Term; -} - -impl Desugar for Pattern { - fn desugar(self, destr: RichTerm, body: RichTerm) -> Term { - // If the pattern is aliased, `x @ ` matching `destr`, we introduce a heading - // let-binding `let x = destruct in `, where `` is the desugaring - // of `` matching `x` followed by the original `body`. - if let Some(alias) = self.alias { - let pos = body.pos; - let inner = RichTerm::new( - self.data - .desugar(RichTerm::new(Term::Var(alias), alias.pos), body), - pos, - ); - - Term::Let(alias, destr, inner, LetAttrs::default()) - } else { - self.data.desugar(destr, body) - } - } -} - -impl Desugar for PatternData { - fn desugar(self, destr: RichTerm, body: RichTerm) -> Term { - match self { - PatternData::Wildcard => body.into(), - // If the pattern is an unconstrained identifier, we just bind it to the value. - PatternData::Any(id) => Term::Let(id, destr, body, LetAttrs::default()), - PatternData::Record(pat) => pat.desugar(destr, body), - PatternData::Enum(pat) => pat.desugar(destr, body), - PatternData::Constant(pat) => pat.desugar(destr, body), - } - } -} - -impl Desugar for ConstantPattern { - fn desugar(self, destr: RichTerm, body: RichTerm) -> Term { - // See [^seq-patterns] - mk_app!(mk_term::op1(UnaryOp::Seq(), destr), body).into() - } -} - -impl Desugar for FieldPattern { - // For a field pattern, we assume that the `destr` argument is the whole record being - // destructured. We extract the field from `destr`, or use the default value is the field isn't - // present, and desugar the rest of the pattern against `destr.matched_id`. - fn desugar(self, destr: RichTerm, body: RichTerm) -> Term { - let destr = if let Some(default) = self.default { - // We put `destr` inside a let-binding, because with_default_value uses it several - // time. - let record_id = LocIdent::fresh(); - let with_default = compile::with_default_value(record_id, self.matched_id, default); - mk_term::let_in(record_id, destr, with_default) - } else { - destr - }; - - let extracted = mk_term::op1(UnaryOp::StaticAccess(self.matched_id), destr); - self.pattern.desugar(extracted, body) - } -} - -impl Desugar for RecordPattern { - fn desugar(self, destr: RichTerm, body: RichTerm) -> Term { - let pos = body.pos; - // The body is the rest of the term being transformed, which contains the code that uses - // the bindings introduced by the pattern. After having extracted all fields from the - // value, we potentially need to capture the rest in a variable for patterns with a - // capturing tail. For example, `let {foo, bar, ..rest} = destr in body` should be - // desugared as `let foo = destr.foo in let bar = destr.bar in let rest = in body` - // (where `` is some expression removing `foo` and `bar` from `destr`). - // - // Because body is the continuation, we need to first append the rest capture to the - // original body before passing it to the [Desugar::desugar] of each individual field. - let body_with_rest = bind_rest(&self, destr.clone(), body); - - self.patterns - .into_iter() - .fold(body_with_rest, |acc, field_pat| { - RichTerm::new(field_pat.desugar(destr.clone(), acc), pos) - }) - .term - .into_owned() - } -} - -impl Desugar for EnumPattern { - fn desugar(self, destr: RichTerm, body: RichTerm) -> Term { - if let Some(arg_pat) = self.pattern { - let extracted = mk_term::op1(UnaryOp::EnumUnwrapVariant(), destr.clone()); - arg_pat.desugar(extracted, body) - } - // [^seq-patterns]: If the pattern doesn't bind any argument, it's transparent, and we just - // proceed with the body. However, because of lazyness, the associated contract will never - // be checked, because body doesn't depend on `destr`. - // - // For patterns that bind variables, it's reasonable to keep them lazy: that is, in `let - // 'Foo x = destr in body`, `destr` is checked to be an enum only when `x` is evaluated. - // However, for patterns that don't bind anything, the pattern is useless. Arguably, users - // would expect that writing `let 'Foo = x in body` would check that `x` is indeed equal to - // `'Foo`. We thus introduce an artificial dependency: it's exactly the role of `seq` - else { - mk_app!(mk_term::op1(UnaryOp::Seq(), destr), body).into() - } - } -} - -fn bind_rest(pat: &RecordPattern, destr: RichTerm, body: RichTerm) -> RichTerm { - let capture_var = match pat { - RecordPattern { - tail: RecordPatternTail::Capture(x), - .. - } => *x, - _ => return body, - }; - - Term::Let( - capture_var, - pat.patterns.iter().fold(destr, |acc, field_pat| { - mk_term::op2( - DynRemove(RecordOpKind::default()), - Term::Str(field_pat.matched_id.ident().into()), - acc, - ) - }), - body, - Default::default(), + Term::Match(MatchData { + branches: vec![(pat, body)], + }), + match_expr_pos, + ), + bound, ) - .into() } diff --git a/core/tests/integration/inputs/destructuring/adt_enum_tag_tag_mismatch.ncl b/core/tests/integration/inputs/destructuring/adt_enum_tag_tag_mismatch.ncl index de58d11e1f..09ab21593b 100644 --- a/core/tests/integration/inputs/destructuring/adt_enum_tag_tag_mismatch.ncl +++ b/core/tests/integration/inputs/destructuring/adt_enum_tag_tag_mismatch.ncl @@ -1,6 +1,6 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveEnumMatch' let 'Foo = 'Bar in true diff --git a/core/tests/integration/inputs/destructuring/adt_extra_arg_contract_fail.ncl b/core/tests/integration/inputs/destructuring/adt_extra_arg.ncl similarity index 61% rename from core/tests/integration/inputs/destructuring/adt_extra_arg_contract_fail.ncl rename to core/tests/integration/inputs/destructuring/adt_extra_arg.ncl index 6ab8dc4886..facfe98c0e 100644 --- a/core/tests/integration/inputs/destructuring/adt_extra_arg_contract_fail.ncl +++ b/core/tests/integration/inputs/destructuring/adt_extra_arg.ncl @@ -1,6 +1,6 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NAryPrimopTypeError' let 'Foo = 'Foo 5 in true diff --git a/core/tests/integration/inputs/destructuring/adt_missing_arg_contract_fail.ncl b/core/tests/integration/inputs/destructuring/adt_missing_arg.ncl similarity index 60% rename from core/tests/integration/inputs/destructuring/adt_missing_arg_contract_fail.ncl rename to core/tests/integration/inputs/destructuring/adt_missing_arg.ncl index 2202b6b671..8e844917fd 100644 --- a/core/tests/integration/inputs/destructuring/adt_missing_arg_contract_fail.ncl +++ b/core/tests/integration/inputs/destructuring/adt_missing_arg.ncl @@ -1,6 +1,6 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveMatch' let 'Foo x = 'Foo in x diff --git a/core/tests/integration/inputs/destructuring/adt_wrong_tag_contract_fail.ncl b/core/tests/integration/inputs/destructuring/adt_wrong_tag.ncl similarity index 61% rename from core/tests/integration/inputs/destructuring/adt_wrong_tag_contract_fail.ncl rename to core/tests/integration/inputs/destructuring/adt_wrong_tag.ncl index 10d194d5ad..b7e49ae75e 100644 --- a/core/tests/integration/inputs/destructuring/adt_wrong_tag_contract_fail.ncl +++ b/core/tests/integration/inputs/destructuring/adt_wrong_tag.ncl @@ -1,6 +1,6 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveMatch' let 'Foo x = 'Bar 5 in x diff --git a/core/tests/integration/inputs/destructuring/adt_wrong_type_contract_fail.ncl b/core/tests/integration/inputs/destructuring/adt_wrong_type.ncl similarity index 62% rename from core/tests/integration/inputs/destructuring/adt_wrong_type_contract_fail.ncl rename to core/tests/integration/inputs/destructuring/adt_wrong_type.ncl index fc7ad158f0..8518877740 100644 --- a/core/tests/integration/inputs/destructuring/adt_wrong_type_contract_fail.ncl +++ b/core/tests/integration/inputs/destructuring/adt_wrong_type.ncl @@ -1,6 +1,6 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveMatch' let 'Foo x = {bar = 1} in x diff --git a/core/tests/integration/inputs/destructuring/closed_fail.ncl b/core/tests/integration/inputs/destructuring/closed_fail.ncl index a934a5bca5..b04e2a387e 100644 --- a/core/tests/integration/inputs/destructuring/closed_fail.ncl +++ b/core/tests/integration/inputs/destructuring/closed_fail.ncl @@ -1,6 +1,6 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveMatch' let {a} = {a=1, b=2} in a == 1 diff --git a/core/tests/integration/inputs/destructuring/constant_bool_fail.ncl b/core/tests/integration/inputs/destructuring/constant_bool_fail.ncl index 80b39dc642..78d64cd2f5 100644 --- a/core/tests/integration/inputs/destructuring/constant_bool_fail.ncl +++ b/core/tests/integration/inputs/destructuring/constant_bool_fail.ncl @@ -1,5 +1,5 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveMatch' let true = false in true diff --git a/core/tests/integration/inputs/destructuring/constant_null_fail.ncl b/core/tests/integration/inputs/destructuring/constant_null_fail.ncl index 3e38d6ac8b..1cb75351d9 100644 --- a/core/tests/integration/inputs/destructuring/constant_null_fail.ncl +++ b/core/tests/integration/inputs/destructuring/constant_null_fail.ncl @@ -1,5 +1,5 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveMatch' let null = 0 in true diff --git a/core/tests/integration/inputs/destructuring/constant_number_fail.ncl b/core/tests/integration/inputs/destructuring/constant_number_fail.ncl index f78dae2617..bdf99a428a 100644 --- a/core/tests/integration/inputs/destructuring/constant_number_fail.ncl +++ b/core/tests/integration/inputs/destructuring/constant_number_fail.ncl @@ -1,5 +1,5 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveMatch' let 1 = 1 + 1 in true diff --git a/core/tests/integration/inputs/destructuring/constant_string_fail.ncl b/core/tests/integration/inputs/destructuring/constant_string_fail.ncl index 6708214d8f..7a6760ea66 100644 --- a/core/tests/integration/inputs/destructuring/constant_string_fail.ncl +++ b/core/tests/integration/inputs/destructuring/constant_string_fail.ncl @@ -1,5 +1,5 @@ # test.type = 'error' # # [test.metadata] -# error = 'EvalError::BlameError' +# error = 'EvalError::NonExhaustiveMatch' let "ab" = "a" ++ "b" ++ "c" in true diff --git a/core/tests/integration/inputs/destructuring/nested_duplicated_ident.ncl b/core/tests/integration/inputs/destructuring/nested_duplicated_ident.ncl deleted file mode 100644 index 34816f77f3..0000000000 --- a/core/tests/integration/inputs/destructuring/nested_duplicated_ident.ncl +++ /dev/null @@ -1,4 +0,0 @@ -# test.type = 'pass' -let f = fun { x = { y }, z = { y } } => y in -let result = f { x = { y = 1 }, z = { y = 2 } } in -result == 1 \ No newline at end of file diff --git a/core/tests/integration/main.rs b/core/tests/integration/main.rs index bebcb71dd7..1f399eda53 100644 --- a/core/tests/integration/main.rs +++ b/core/tests/integration/main.rs @@ -167,6 +167,8 @@ enum ErrorExpectation { EvalMergeIncompatibleArgs, #[serde(rename = "EvalError::NonExhaustiveMatch")] EvalNonExhaustiveMatch, + #[serde(rename = "EvalError::NonExhaustiveEnumMatch")] + EvalNonExhaustiveEnumMatch, #[serde(rename = "TypecheckError::UnboundIdentifier")] TypecheckUnboundIdentifier { identifier: String }, #[serde(rename = "TypecheckError::UnboundTypeVariable")] @@ -233,6 +235,10 @@ impl PartialEq for ErrorExpectation { ) | (EvalOther, Error::EvalError(EvalError::Other(..))) | (EvalNonExhaustiveMatch, Error::EvalError(EvalError::NonExhaustiveMatch { .. })) + | ( + EvalNonExhaustiveEnumMatch, + Error::EvalError(EvalError::NonExhaustiveEnumMatch { .. }), + ) | ( TypecheckRecordRowMismatch, Error::TypecheckError(TypecheckError::RecordRowMismatch { .. }), @@ -390,6 +396,7 @@ impl std::fmt::Display for ErrorExpectation { format!("EvalError::MissingFieldDef({field})") } EvalNonExhaustiveMatch => "EvalError::NonExhaustiveMatch".to_owned(), + EvalNonExhaustiveEnumMatch => "EvalError::NonExhaustiveEnumMatch".to_owned(), TypecheckUnboundIdentifier { identifier } => { format!("TypecheckError::UnboundIdentifier({identifier})") } diff --git a/doc/manual/syntax.md b/doc/manual/syntax.md index 43c64bfb70..1c1b5d301f 100644 --- a/doc/manual/syntax.md +++ b/doc/manual/syntax.md @@ -775,7 +775,7 @@ Examples: 'Hello > let 'Invalid x = {} in x -error: contract broken by a value +error: unmatched pattern [...] ``` diff --git a/lsp/nls/src/requests/symbols.rs b/lsp/nls/src/requests/symbols.rs index 145912b972..38eb3823da 100644 --- a/lsp/nls/src/requests/symbols.rs +++ b/lsp/nls/src/requests/symbols.rs @@ -1,6 +1,5 @@ use lsp_server::{RequestId, Response, ResponseError}; use lsp_types::{DocumentSymbol, DocumentSymbolParams, SymbolKind}; -use nickel_lang_core::position::RawSpan; use nickel_lang_core::term::RichTerm; use nickel_lang_core::typ::Type; @@ -53,7 +52,7 @@ fn symbols( .iter() .filter_map(|ty| ty.typ.pos.into_opt()) .chain(field.value.as_ref().and_then(|val| val.pos.into_opt())) - .fold(id_pos, |a, b| RawSpan::fuse(a, b).unwrap_or(a)); + .fold(id_pos, |a, b| a.fuse(b).unwrap_or(a)); let selection_range = crate::codespan_lsp::byte_span_to_range( world.cache.files(),