Skip to content

Commit

Permalink
Implement regex.find_n (open-policy-agent#1001)
Browse files Browse the repository at this point in the history
Add regex.find_n built-in function

Fixes open-policy-agent#747

Signed-off-by: Kim Christensen <[email protected]>
Signed-off-by: Torin Sandall <[email protected]>
  • Loading branch information
kichristensen authored and tsandall committed Oct 12, 2018
1 parent 0052dc2 commit 0820ef6
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 0 deletions.
15 changes: 15 additions & 0 deletions ast/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ var DefaultBuiltins = [...]*Builtin{
RegexSplit,
GlobsMatch,
RegexTemplateMatch,
RegexFind,

// Sets
SetDiff,
Expand Down Expand Up @@ -623,6 +624,20 @@ var RegexSplit = &Builtin{
),
}

// RegexFind takes two strings and a number, the pattern, the value and number of match values to
// return, -1 means all match values.
var RegexFind = &Builtin{
Name: "regex.find_n",
Decl: types.NewFunction(
types.Args(
types.S,
types.S,
types.N,
),
types.NewArray(nil, types.S),
),
}

// GlobsMatch takes two strings regexp-style strings and evaluates to true if their
// intersection matches a non-empty set of non-empty strings.
// Examples:
Expand Down
1 change: 1 addition & 0 deletions docs/book/language-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ complex types.
| <span class="opa-keep-it-together">``regex.split(pattern, string, output)``</span> | 2 | ``output`` is ``array[string]`` representing elements of ``string`` separated by ``pattern`` |
| <span class="opa-keep-it-together">``regex.globs_match(glob1, glob2)``</span> | 2 | true if the intersection of regex-style globs ``glob1`` and ``glob2`` matches a non-empty set of non-empty strings. The set of regex symbols is limited for this builtin: only ``.``, ``*``, ``+``, ``[``, ``-``, ``]`` and ``\`` are treated as special symbols. |
| <span class="opa-keep-it-together">``regex.template_match(patter, string, delimiter_start, delimiter_end, output)``</span> | 4 | ``output`` is true if ``string`` matches ``pattern``. ``pattern`` is a string containing ``0..n`` regular expressions delimited by ``delimiter_start`` and ``delimiter_end``. Example ``regex.template_match("urn:foo:{.*}", "urn:foo:bar:baz", "{", "}", x)`` returns ``true`` for ``x``. |
| <span class="opa-keep-it-together">``regex.find_n(pattern, string, number)``</span> | 3 | returns an ``array[string]`` with the ``number`` of values matching the ``pattern``. A ``number`` of ``-1`` means all matches. |

### Types

Expand Down
27 changes: 27 additions & 0 deletions topdown/regex.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,37 @@ func builtinGlobsMatch(a, b ast.Value) (ast.Value, error) {
return ast.Boolean(ne), nil
}

func builtinRegexFind(a, b, c ast.Value) (ast.Value, error) {
s1, err := builtins.StringOperand(a, 1)
if err != nil {
return nil, err
}
s2, err := builtins.StringOperand(b, 2)
if err != nil {
return nil, err
}
n, err := builtins.IntOperand(c, 3)
if err != nil {
return nil, err
}
re, err := getRegexp(string(s1))
if err != nil {
return nil, err
}

elems := re.FindAllString(string(s2), n)
arr := make(ast.Array, len(elems))
for i := range arr {
arr[i] = ast.StringTerm(elems[i])
}
return arr, nil
}

func init() {
regexpCache = map[string]*regexp.Regexp{}
RegisterFunctionalBuiltin2(ast.RegexMatch.Name, builtinRegexMatch)
RegisterFunctionalBuiltin2(ast.RegexSplit.Name, builtinRegexSplit)
RegisterFunctionalBuiltin2(ast.GlobsMatch.Name, builtinGlobsMatch)
RegisterFunctionalBuiltin4(ast.RegexTemplateMatch.Name, builtinRegexMatchTemplate)
RegisterFunctionalBuiltin3(ast.RegexFind.Name, builtinRegexFind)
}
16 changes: 16 additions & 0 deletions topdown/regex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,19 @@ func TestRegexMatchTemplate(t *testing.T) {
runTopDownTestCase(t, map[string]interface{}{}, tc.note, tc.rules, tc.expected)
}
}

func TestRegexFind(t *testing.T) {
tests := []struct {
note string
rules []string
expected interface{}
}{
{"finds all match values", []string{`p[x] { x = regex.find_n("a.", "paranormal", -1) }`}, `[["ar", "an", "al"]]`},
{"finds specified number of match values", []string{`p[x] { x = regex.find_n("a.", "paranormal", 2) }`}, `[["ar", "an"]]`},
{"finds no matching values", []string{`p[x] { x = regex.find_n("bork", "paranormal", -1) }`}, `[[]]`},
}

for _, tc := range tests {
runTopDownTestCase(t, map[string]interface{}{}, tc.note, tc.rules, tc.expected)
}
}

0 comments on commit 0820ef6

Please sign in to comment.