forked from semgrep/semgrep
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRule_options.atd
194 lines (160 loc) · 8.2 KB
/
Rule_options.atd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
(* Semgrep matching engine configuration.
The goal of this module is to gather in one place all the possible
ways to configure the semgrep matching engine. We now let
the user enable/disable certain features on a per-rule (could do even
per-pattern basis?). For example, constant propagation may be too powerful
sometimes and prevent people to find certain code.
Those features are called 'options' in the rule.
Note that each option in this file will change the matching results;
for non-functional settings such as optimizations (e.g., using a
cache) use instead Flag_semgrep.ml
We're using atdgen to specify the list of configuaration options
because atdgen generates automatically the boilerplate to read
the configuration options from a rule file.
TODO: before this can work with Python, all the custom default field values
must be specified for Python as well. The same must be done
for any target language that we want to support.
A better way would be to modify atd to handle this better.
Ideas for improvements include:
- make 'atdpy --no-implicit-defaults' treat '~field: bool'
as '?field: bool option' unless a default value is set for
Python with an annotation like '<python default="True">'.
- make atd understand a syntax for default values that can be
used to specify defaults in all target languages.
A default value would be set with '<field default="true">'
and this would be translated into 'True' for Python and whatever
means "true" in other languages.
coupling: with semgrep-interfaces/rule_schema_v2.atd
*)
(* !!Do not rename the fields because they can be referenced in rules!! *)
type t = {
~constant_propagation <ocaml default="true"> : bool;
(* symbolic_propagation requires constant_propagation to have effect *)
~symbolic_propagation <ocaml default="false"> : bool;
(* metavariables common to a source and sink will be unified *)
~taint_match_on <ocaml default="`Sink">: taint_match_on;
~taint_unify_mvars <ocaml default="false"> : bool;
~taint_assume_safe_functions <ocaml default="false"> : bool;
~taint_assume_safe_indexes <ocaml default="false"> : bool;
~taint_assume_safe_comparisons <ocaml default="false"> : bool;
~taint_assume_safe_booleans <ocaml default="false"> : bool;
~taint_assume_safe_numbers <ocaml default="false"> : bool;
(* when you are paranoid about minimizing FPs, and probably useful for
* writing secret detection rules *)
~taint_only_propagate_through_assignments <ocaml default="false"> : bool;
(* 'ac' stands for associative-commutative matching *)
~ac_matching <ocaml default="true"> : bool;
(* pretend && and || are commutative *)
(* THINK: Should we merge ac_matching and commutative_boolop into a single setting? *)
~commutative_boolop <ocaml default="false"> : bool;
(* DEPRECATED: remove me after 1.60.0 *)
~commutative_compop <ocaml default="false"> : bool;
~symmetric_eq <ocaml default="false"> : bool;
(* assign-patterns (e.g. `$X = $E`) will match var-defs (e.g. `var x = 1;`) *)
~vardef_assign <ocaml default="true"> : bool;
(* assign-patterns (e.g. `$X = function() { ... }`) will match fld-defs
* (e.g. `class Foo { x = function() { return; } }`).
*)
~flddef_assign <ocaml default="false"> : bool;
(* expression patterns (e.g. `f($X)`) will match attributes (e.g. `@f(a)`) *)
~attr_expr <ocaml default="true"> : bool;
(* treat arrows (a.k.a short lambdas) like other functions *)
~arrow_is_function <ocaml default="true">: bool;
(* treat let/const as equivalent to var (JS-specific) *)
~let_is_var <ocaml default="true">: bool;
(* !experimental: a bit hacky, and may introduce big perf regressions! *)
(* should be used with DeepEllipsis; do it implicitely has issues *)
~go_deeper_expr <ocaml default="true"> : bool;
(* this ultimately should go away once '...' works on the CFG *)
~go_deeper_stmt <ocaml default="true"> : bool;
(* implicitly assume deep ellipsis in expression statement *)
(* TODO: eventually the default should be 'false'. *)
~implicit_deep_exprstmt <ocaml default="true"> : bool;
(* implicitly assume ellipsis in record patterns *)
(* TODO: eventually the default should be 'false'. *)
~implicit_ellipsis <ocaml default="true"> : bool;
(* let `<foo />` singleton also match `<foo></foo>` *)
~xml_singleton_loose_matching <ocaml default="true"> : bool;
(* implicitly assume ellipsis in xml attributes *)
(* TODO: eventually the default should be 'false'. *)
~xml_attrs_implicit_ellipsis <ocaml default="true"> : bool;
(* if false, xml children are matched in any order *)
~xml_children_ordered <ocaml default="true"> : bool;
~generic_engine <ocaml default="`Spacegrep">: generic_engine;
~cpp_parsing_pref <ocaml default="`AsVarDefWithCtor">: cpp_parsing_opt;
(* Enable/disable multiline mode in aliengrep. This option is only
relevant to the 'aliengrep' generic engine.
The line mode differs from the default multiline mode in
these aspects:
- Regular ellipses ('...' or '$...FOO') don't match newlines.
- Long ellipses using four dots ('....' or '$....FOO') must be used
to match newlines.
- Newlines in patterns must match newlines in the target code.
- Double quotes and single quotes are treated as additional brace
pairs by default (see 'generic_braces').
*)
~generic_multiline
<ocaml default="true">
<python default="True">: bool;
(* The list of matching braces to be considered by the aliengrep engine.
The default braces in the default, multiline mode are:
- parentheses;
- square brackets;
- curly braces.
= ["(", ")"], ["[", "]"], ["{", "}"]
The default braces in the line mode are:
- all the braces valid in multline mode;
- double quotes;
- single quotes.
= ["(", ")"], ["[", "]"], ["{", "}"], ["\"", "\""], ["'", "'"]
*)
?generic_braces: (string * string) list option;
(* This option extends the set of braces instead of
replacing it like 'generic_braces' does. *)
~generic_extra_braces: (string * string) list;
(* Extra characters allowed in a so-called word when using the aliengrep
engine. A word is a sequence of characters that can be captured by
a metavariable. By default, a word follows the pattern [A-Za-z0-9_]+.
It may not be a substring of a longer word.
This option adds word characters. For example, the list ["-"] adds
a dash to the word characters, allowing '$FOO' to match 'e-mail'. *)
~generic_extra_word_characters: string list;
(* Perform case-insensitive matching according to Unicode rules.
The default is false.
This is supported only by the aliengrep engine.
Back-references, however, must still respect the case of the first
match e.g. "$A $A" matches "Hello Hello" but not "Hello hello". *)
~generic_caseless: bool;
(* Maximum number of newlines that an ellipsis can match with the spacegrep
engine. Use 0 to contain the match within a single line. *)
~generic_ellipsis_max_span <ocaml default="10">: int;
(* Preprocess comments away to facilitate matching with spacegrep. *)
?generic_comment_style: generic_comment_style option;
(* Whether a rule should be considered for interfile analysis. *)
~interfile <ocaml default="false"> : bool;
(* Whether last expression executed in a function is treated the
same as a return statement. This option is specific only to
languages that treat the last statement of a block inside
functions as a return statement, such as Ruby and Julia. *)
~implicit_return <ocaml default="true"> : bool;
(* TODO: equivalences:
* - require_to_import (but need pass config to Js_to_generic)
*)
}
type cpp_parsing_opt = [
| AsFunDef <json name="as_fundef">
| AsVarDefWithCtor <json name="as_vardef_with_ctor">
]
type generic_engine = [
| Aliengrep <json name="aliengrep">
| Spacegrep <json name="spacegrep"> (* default, legacy *)
]
type generic_comment_style = [
| C <json name="c"> (* /* ... */ *)
| Cpp <json name="cpp"> (* /* ... */ or // ... *)
| Shell <json name="shell"> (* # ... *)
]
type taint_match_on = [
| Source <json name="source">
| Sink <json name="sink"> (* default *)
]