Skip to content

Commit

Permalink
feat(scala): parse indented while and end markers (semgrep#7700)
Browse files Browse the repository at this point in the history
## What:
This PR makes it so we can parse `while` bodies that are indented, like:
```
while (true)
  val x = 2
  val y = 3
```

## Why:
SCALA 3

## How:
Just added the `opportunisticIndent`, similar to what we did for semgrep#7498 

## Test plan:
Included test, and parse rate `0.9806002679202549` ->
`0.9810696267622084`

PR checklist:

- [X] Purpose of the code is [evident to future
readers](https://semgrep.dev/docs/contributing/contributing-code/#explaining-code)
- [X] Tests included or PR comment includes a reproducible test plan
- [X] Documentation is up-to-date
- [ ] A changelog entry was [added to
changelog.d](https://semgrep.dev/docs/contributing/contributing-code/#adding-a-changelog-entry)
for any user-facing change
- [X] Change has no security implications (otherwise, ping security
team)

If you're unsure about any of this, please see:

- [Contribution
guidelines](https://semgrep.dev/docs/contributing/contributing-code)!
- [One of the more specific guides located
here](https://semgrep.dev/docs/contributing/contributing/)
  • Loading branch information
brandonspark authored May 2, 2023
1 parent fb8ddc6 commit 098b5a0
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 1 deletion.
6 changes: 6 additions & 0 deletions languages/scala/ast/AST_scala.ml
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ and block_stat =
| I of import
| Ex of import
| Ext of extension
| End of end_marker
| E of expr
(* just at the beginning of top_stat *)
| Package of package
Expand Down Expand Up @@ -468,6 +469,11 @@ and definition_kind =
(* class/traits/objects *)
| Template of template_definition

(*****************************************************************************)
(* End Marker *)
(*****************************************************************************)
and end_marker = { end_tok : tok; end_kind : tok }

(*****************************************************************************)
(* Extensions *)
(*****************************************************************************)
Expand Down
6 changes: 6 additions & 0 deletions languages/scala/generic/scala_to_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,9 @@ and v_block_stat x : G.item list =
| E v1 ->
let v1 = v_expr_for_stmt v1 in
[ v1 ]
| End v1 ->
let v1 = v_end_marker v1 in
[ v1 ]
| Ext v1 -> v_extension v1
| Package v1 ->
let ipak, ids = v_package v1 in
Expand Down Expand Up @@ -914,6 +917,9 @@ and v_given_definition { gsig; gkind } =
G.OtherDef (todo_kind, v1 @ [ G.Anys v2 ]) );
]

and v_end_marker { end_tok; end_kind } : G.stmt =
G.OtherStmt (OS_Todo, [ G.Tk end_tok; G.Tk end_kind ]) |> G.s

and v_extension { ext_tok = _; ext_tparams; ext_using; ext_param; ext_methods }
: G.stmt list =
let tparams =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2488,6 +2488,7 @@ and parseWhile in_ : stmt =
accept (Kdo ab) in_;
fb (Tok.unsafe_fake_tok "") e
in
opportunisticIndent in_;
let body = expr in_ in
(* ast: makeWhile(cond, body) *)
While (ii, cond, body)
Expand Down Expand Up @@ -3651,6 +3652,27 @@ let localDef implicitMod in_ : definition =
defs
(* AST: if RBRACE | CASE defs :+ literalUnit *))

(* ------------------------------------------------------------------------- *)
(* End Marker *)
(* ------------------------------------------------------------------------- *)

(** {{{
* EndMarker ::= ‘end’ EndMarkerTag -- when followed by EOL
* EndMarkerTag ::= id | ‘if’ | ‘while’ | ‘for’ | ‘match’ | ‘try’
| ‘new’ | ‘this’ | ‘given’ | ‘extension’ | ‘val’
* }}}
*)

let endMarker in_ : end_marker =
let end_tok = TH.info_of_tok in_.token in
accept (ID_LOWER ("end", ab)) in_;
let end_kind = TH.info_of_tok in_.token in
(* We could dispatch on the various cases, but every single case ends up
with some kind of token. So let's just say that it's a single token.
*)
skipToken in_;
{ end_tok; end_kind }

(* ------------------------------------------------------------------------- *)
(* Extension *)
(* ------------------------------------------------------------------------- *)
Expand Down Expand Up @@ -3777,6 +3799,7 @@ let statSeq ?(errorMsg = "illegal start of definition") ?(rev = false) stat in_
* | Annotations [implicit] [lazy] Def
* | Annotations LocalModifiers TmplDef
* | Expr1
* | EndMarker
* | Extension
* |
* }}}
Expand All @@ -3795,6 +3818,9 @@ let blockStatSeqInner in_ : top_stat option =
if not (TH.isCaseDefEnd in_.token) then acceptStatSepOpt in_
in
match in_.token with
| ID_LOWER ("end", _) ->
let x = endMarker in_ in
Some (End x)
| ID_LOWER ("extension", _) ->
let x = extension in_ in
Some (Ext x)
Expand Down Expand Up @@ -3915,6 +3941,7 @@ let indentedExprOrBlockStatSeqUntil ?(until = None) in_ =
* | Annotations Modifiers Dcl
* | Expr1
* | super ArgumentExprs {ArgumentExprs}
* | EndMarker
* | Extension
* |
* | Annotations Modifiers EnumCase
Expand All @@ -3929,6 +3956,9 @@ let templateStat in_ : template_stat option =
| Kexport _ ->
let x = exportClause in_ in
Some (Ex x)
| ID_LOWER ("end", _) ->
let x = endMarker in_ in
Some (End x)
| ID_LOWER ("extension", _) ->
let x = extension in_ in
Some (Ext x)
Expand All @@ -3953,6 +3983,7 @@ let templateStats in_ : template_stat list = statSeq templateStat in_
* | package object ObjectDef
* | Import
* | Export
* | EndMarker
* | Extension
* |
* }}}
Expand All @@ -3969,6 +4000,9 @@ let topStat in_ : top_stat option =
| Kexport _ ->
let x = exportClause in_ in
Some (Ex x)
| ID_LOWER ("end", _) ->
let x = endMarker in_ in
Some (End x)
| ID_LOWER ("extension", _) ->
let x = extension in_ in
Some (Ext x)
Expand Down Expand Up @@ -4011,7 +4045,12 @@ let templateStatSeq ~isPre in_ : self_type option * block =
if
TH.isExprIntro in_.token
&& (not (is_modifier in_))
&& not (in_.token =~= ID_LOWER ("extension", ab))
(* We add this here, because there are some "soft" modifiers that can start
a templateIntro, such as "extension", "end", etc.
If we did not do this, template stats that start with those would always
enter this expression case, even though they denote something else.
*)
&& not (TH.isTemplateIntro in_.token)
then (
let first = expr ~location:InTemplate in_ in
match in_.token with
Expand Down
1 change: 1 addition & 0 deletions languages/scala/recursive_descent/Token_helpers_scala.ml
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ let isTemplateIntro = function
| Kenum _
| Ktrait _
| ID_LOWER ("given", _)
| ID_LOWER ("end", _)
| ID_LOWER ("extension", _) ->
true
(*TODO | Kcaseobject | | Kcaseclass *)
Expand Down
10 changes: 10 additions & 0 deletions tests/parsing/scala/end_markers.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

object A:
extension (a: File)
inline def x: String = "hi"
end extension

def foo(x: T) = x
end def

end foo
4 changes: 4 additions & 0 deletions tests/parsing/scala/indented_while.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

while (true)
val x = 2
val y = 3

0 comments on commit 098b5a0

Please sign in to comment.