Skip to content

Commit

Permalink
bsdgrep(1): various fixes of empty pattern/exit code/-c behavior
Browse files Browse the repository at this point in the history
When an empty pattern is encountered in the pattern list, I had previously
broken bsdgrep to count that as a "match all" and ignore any other patterns
in the list. This commit rectifies that mistake, among others:

- The -v flag semantics were not quite right; lines matched should have been
  counted differently based on whether the -v flag was set or not. procline
  now definitively returns whether it's matched or not, and interpreting
  that result has been kicked up a level.
- Empty patterns with the -x flag was broken similarly to empty patterns
  with the -w flag. The former is a whole-line match and should be more
  strict, only matching blank lines. No -x and no -w will will match the
  empty string at the beginning of each line.
- The exit code with -L was broken, w.r.t. modern grep. Modern grap will
  exit(0) if any file that didn't match was output, so our interpretation
  was simply backwards. The new interpretation makes sense to me.

Tests updated and added to try and catch some of this.

This misbehavior was found by autoconf while fixing ports found in PR 229925
expecting either a more sane or a more GNU-like sed.

MFC after:	1 week
  • Loading branch information
kevans91 committed Sep 25, 2019
1 parent 98e8ac8 commit fcd2c31
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 30 deletions.
64 changes: 63 additions & 1 deletion contrib/netbsd-tests/usr.bin/grep/t_grep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,60 @@ wflag_emptypat_body()
atf_check -o file:test4 grep -w -e "" test4
}

atf_test_case xflag_emptypat
xflag_emptypat_body()
{
printf "" > test1
printf "\n" > test2
printf "qaz" > test3
printf " qaz\n" > test4

# -x is whole-line, more strict than -w.
atf_check -s exit:1 -o empty grep -x -e "" test1

atf_check -o file:test2 grep -x -e "" test2

atf_check -s exit:1 -o empty grep -x -e "" test3

atf_check -s exit:1 -o empty grep -x -e "" test4

total=$(wc -l /COPYRIGHT | sed 's/[^0-9]//g')

# Simple checks that grep -x with an empty pattern isn't matching every
# line. The exact counts aren't important, as long as they don't
# match the total line count and as long as they don't match each other.
atf_check -o save:xpositive.count grep -Fxc '' /COPYRIGHT
atf_check -o save:xnegative.count grep -Fvxc '' /COPYRIGHT

atf_check -o not-inline:"${total}" cat xpositive.count
atf_check -o not-inline:"${total}" cat xnegative.count

atf_check -o not-file:xnegative.count cat xpositive.count
}

atf_test_case xflag_emptypat_plus
xflag_emptypat_plus_body()
{
printf "foo\n\nbar\n\nbaz\n" > target
printf "foo\n \nbar\n \nbaz\n" > target_spacelines
printf "foo\nbar\nbaz\n" > matches
printf " \n \n" > spacelines

printf "foo\n\nbar\n\nbaz\n" > patlist1
printf "foo\n\nba\n\nbaz\n" > patlist2

sed -e '/bar/d' target > matches_not2

# Normal handling first
atf_check -o file:target grep -Fxf patlist1 target
atf_check -o file:matches grep -Fxf patlist1 target_spacelines
atf_check -o file:matches_not2 grep -Fxf patlist2 target

# -v handling
atf_check -s exit:1 -o empty grep -Fvxf patlist1 target
atf_check -o file:spacelines grep -Fxvf patlist1 target_spacelines
}

atf_test_case excessive_matches
excessive_matches_head()
{
Expand Down Expand Up @@ -551,6 +605,12 @@ grep_nomatch_flags_head()

grep_nomatch_flags_body()
{
grep_type

if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then
atf_expect_fail "this test does not pass with GNU grep in base"
fi

printf "A\nB\nC\n" > test1

atf_check -o inline:"1\n" grep -c -C 1 -e "B" test1
Expand All @@ -563,7 +623,7 @@ grep_nomatch_flags_body()
atf_check -o inline:"test1\n" grep -l -A 1 -e "B" test1
atf_check -o inline:"test1\n" grep -l -C 1 -e "B" test1

atf_check -s exit:1 -o inline:"test1\n" grep -L -e "D" test1
atf_check -o inline:"test1\n" grep -L -e "D" test1

atf_check -o empty grep -q -e "B" test1
atf_check -o empty grep -q -B 1 -e "B" test1
Expand Down Expand Up @@ -777,6 +837,8 @@ atf_init_test_cases()
atf_add_test_case egrep_empty_invalid
atf_add_test_case zerolen
atf_add_test_case wflag_emptypat
atf_add_test_case xflag_emptypat
atf_add_test_case xflag_emptypat_plus
atf_add_test_case excessive_matches
atf_add_test_case wv_combo_break
atf_add_test_case fgrep_sanity
Expand Down
27 changes: 10 additions & 17 deletions usr.bin/grep/grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,20 +218,9 @@ static void
add_pattern(char *pat, size_t len)
{

/* Do not add further pattern is we already match everything */
if (matchall)
return;

/* Check if we can do a shortcut */
if (len == 0) {
matchall = true;
for (unsigned int i = 0; i < patterns; i++) {
free(pattern[i].pat);
}
pattern = grep_realloc(pattern, sizeof(struct pat));
pattern[0].pat = NULL;
pattern[0].len = 0;
patterns = 1;
return;
}
/* Increase size if necessary */
Expand Down Expand Up @@ -654,7 +643,7 @@ main(int argc, char *argv[])
aargv += optind;

/* Empty pattern file matches nothing */
if (!needpattern && (patterns == 0))
if (!needpattern && (patterns == 0) && !matchall)
exit(1);

/* Fail if we don't have any pattern */
Expand Down Expand Up @@ -701,11 +690,10 @@ main(int argc, char *argv[])

r_pattern = grep_calloc(patterns, sizeof(*r_pattern));

/* Don't process any patterns if we have a blank one */
#ifdef WITH_INTERNAL_NOSPEC
if (!matchall && grepbehave != GREP_FIXED) {
if (grepbehave != GREP_FIXED) {
#else
if (!matchall) {
{
#endif
/* Check if cheating is allowed (always is for fgrep). */
for (i = 0; i < patterns; ++i) {
Expand Down Expand Up @@ -737,7 +725,12 @@ main(int argc, char *argv[])
matched = true;
}

/* Find out the correct return value according to the
results and the command line option. */
if (Lflag)
matched = !matched;

/*
* Calculate the correct return value according to the
* results and the command line option.
*/
exit(matched ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));
}
37 changes: 25 additions & 12 deletions usr.bin/grep/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ procmatch_match(struct mprintc *mc, struct parsec *pc)
while (pc->matchidx >= MAX_MATCHES) {
/* Reset matchidx and try again */
pc->matchidx = 0;
if (procline(pc))
if (procline(pc) == !vflag)
printline(pc, ':');
else
break;
Expand Down Expand Up @@ -355,7 +355,7 @@ procfile(const char *fn)
return (0);
}

line_matched = procline(&pc);
line_matched = procline(&pc) == !vflag;
if (line_matched)
++lines;

Expand Down Expand Up @@ -469,17 +469,32 @@ procline(struct parsec *pc)

matchidx = pc->matchidx;

/* Special case: empty pattern with -w flag, check first character */
if (matchall && wflag) {
/*
* With matchall (empty pattern), we can try to take some shortcuts.
* Emtpy patterns trivially match every line except in the -w and -x
* cases. For -w (whole-word) cases, we only match if the first
* character isn't a word-character. For -x (whole-line) cases, we only
* match if the line is empty.
*/
if (matchall) {
if (pc->ln.len == 0)
return (true);
wend = L' ';
if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend))
return (false);
else
if (wflag) {
wend = L' ';
if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 &&
!iswword(wend))
return (true);
} else if (!xflag)
return (true);
} else if (matchall)
return (true);

/*
* If we don't have any other patterns, we really don't match.
* If we do have other patterns, we must fall through and check
* them.
*/
if (patterns == 0)
return (false);
}

matched = false;
st = pc->lnstart;
Expand Down Expand Up @@ -609,8 +624,6 @@ procline(struct parsec *pc)

/* Reflect the new matchidx in the context */
pc->matchidx = matchidx;
if (vflag)
matched = !matched;
return matched;
}

Expand Down

0 comments on commit fcd2c31

Please sign in to comment.