Skip to content

Commit

Permalink
folly: adding folly::ltrimWhitespace / folly::rtrimWhitespace
Browse files Browse the repository at this point in the history
Summary:
folly: adding folly::trimWhitespace

extending the idea of folly::skipWhitespace.
the reason for adding it, we want to be able to do:

auto s = skipWhitespace(trimWhitespace(" aaaa "))

very similar to python's strip()

Test Plan:
adding unit tests:

# make ; _bin/folly/test/string_test --gtest_filter="String.whitespace"
# fbmake --ccache off --distcc off dbg -j 16
fbmake dbg -j 16
Fbmake run id: G62i4cDP42U
Note: Google Test filter = String.whitespace
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from String
[ RUN      ] String.whitespace
[       OK ] String.whitespace (0 ms)
[----------] 1 test from String (0 ms total)

[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (0 ms total)
[  PASSED  ] 1 test.

Reviewed By: [email protected]

Subscribers: ldbrandy, folly-diffs@, yfeldblum, chalfant, azhavnerchik

FB internal diff: D2109364

Signature: t1:2109364:1433192444:862e237bba1928fcb94be1f95c57a68d457939e9
  • Loading branch information
Haim Grosman authored and bugok committed Jun 3, 2015
1 parent d9e08fa commit 5a2591e
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 6 deletions.
25 changes: 24 additions & 1 deletion folly/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,30 @@ std::string join(const Delim& delimiter, Iterator begin, Iterator end) {
* Returns a subpiece with all whitespace removed from the front of @sp.
* Whitespace means any of [' ', '\n', '\r', '\t'].
*/
StringPiece skipWhitespace(StringPiece sp);
StringPiece ltrimWhitespace(StringPiece sp);

/**
* Returns a subpiece with all whitespace removed from the back of @sp.
* Whitespace means any of [' ', '\n', '\r', '\t'].
*/
StringPiece rtrimWhitespace(StringPiece sp);

/**
* Returns a subpiece with all whitespace removed from the back and front of @sp.
* Whitespace means any of [' ', '\n', '\r', '\t'].
*/
inline StringPiece trimWhitespace(StringPiece sp) {
return ltrimWhitespace(rtrimWhitespace(sp));
}

/**
* Returns a subpiece with all whitespace removed from the front of @sp.
* Whitespace means any of [' ', '\n', '\r', '\t'].
* DEPRECATED: @see ltrimWhitespace @see rtrimWhitespace
*/
inline StringPiece skipWhitespace(StringPiece sp) {
return ltrimWhitespace(sp);
}

/**
* Fast, in-place lowercasing of ASCII alphabetic characters in strings.
Expand Down
27 changes: 22 additions & 5 deletions folly/StringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,40 @@

namespace folly {

StringPiece skipWhitespace(StringPiece sp) {
static inline bool is_oddspace(char c) {
return c == '\n' || c == '\t' || c == '\r';
}

StringPiece ltrimWhitespace(StringPiece sp) {
// Spaces other than ' ' characters are less common but should be
// checked. This configuration where we loop on the ' '
// separately from oddspaces was empirically fastest.
auto oddspace = [] (char c) {
return c == '\n' || c == '\t' || c == '\r';
};

loop:
for (; !sp.empty() && sp.front() == ' '; sp.pop_front()) {
}
if (!sp.empty() && oddspace(sp.front())) {
if (!sp.empty() && is_oddspace(sp.front())) {
sp.pop_front();
goto loop;
}

return sp;
}

StringPiece rtrimWhitespace(StringPiece sp) {
// Spaces other than ' ' characters are less common but should be
// checked. This configuration where we loop on the ' '
// separately from oddspaces was empirically fastest.

loop:
for (; !sp.empty() && sp.back() == ' '; sp.pop_back()) {
}
if (!sp.empty() && is_oddspace(sp.back())) {
sp.pop_back();
goto loop;
}

return sp;
}

}
47 changes: 47 additions & 0 deletions folly/test/StringTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1290,6 +1290,53 @@ BENCHMARK(joinInt, iters) {
}
}

TEST(String, whitespace) {
// trimWhitespace:
EXPECT_EQ("kavabanga",
trimWhitespace("kavabanga"));
EXPECT_EQ("kavabanga",
trimWhitespace("kavabanga \t \n "));
EXPECT_EQ("kavabanga",
trimWhitespace(" \t \r \n \n kavabanga"));
EXPECT_EQ("kavabanga",
trimWhitespace("\t \r \n kavabanga \t \n "));
EXPECT_EQ("kavabanga",
trimWhitespace(" \t \r \n \n kavabanga"));
EXPECT_EQ("kavabanga",
trimWhitespace("\t \r \n kavabanga \t \n "));
EXPECT_EQ(
ltrimWhitespace(rtrimWhitespace("kavabanga")),
rtrimWhitespace(ltrimWhitespace("kavabanga")));
EXPECT_EQ(
ltrimWhitespace(rtrimWhitespace("kavabanga \r\t\n")),
rtrimWhitespace(ltrimWhitespace("kavabanga \r\t\n")));
EXPECT_EQ("", trimWhitespace("\t \r \n \t \n "));
EXPECT_EQ("", trimWhitespace(""));
EXPECT_EQ("", trimWhitespace("\t"));
EXPECT_EQ("", trimWhitespace("\r"));
EXPECT_EQ("", trimWhitespace("\n"));
EXPECT_EQ("", trimWhitespace("\t "));
EXPECT_EQ("", trimWhitespace("\r "));
EXPECT_EQ("", trimWhitespace("\n "));
EXPECT_EQ("", trimWhitespace(" \t"));
EXPECT_EQ("", trimWhitespace(" \r"));
EXPECT_EQ("", trimWhitespace(" \n"));

// ltrimWhitespace:
EXPECT_EQ("kavabanga", ltrimWhitespace("\t kavabanga"));
EXPECT_EQ("kavabanga \r\n", ltrimWhitespace("\t kavabanga \r\n"));
EXPECT_EQ("", ltrimWhitespace("\r "));
EXPECT_EQ("", ltrimWhitespace("\n "));
EXPECT_EQ("", ltrimWhitespace("\r "));

// rtrimWhitespace:
EXPECT_EQ("\t kavabanga", rtrimWhitespace("\t kavabanga"));
EXPECT_EQ("\t kavabanga", rtrimWhitespace("\t kavabanga \r\n"));
EXPECT_EQ("", rtrimWhitespace("\r "));
EXPECT_EQ("", rtrimWhitespace("\n "));
EXPECT_EQ("", rtrimWhitespace("\r "));
}

int main(int argc, char *argv[]) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
Expand Down

0 comments on commit 5a2591e

Please sign in to comment.