diff --git a/blobstamper/stamp_dict.h b/blobstamper/stamp_dict.h index 49408db..f4b80d8 100644 --- a/blobstamper/stamp_dict.h +++ b/blobstamper/stamp_dict.h @@ -47,4 +47,10 @@ class StampDict: public StampBaseStr int maxSize() override {return stamp_size;} }; +class StampDictLCAlphaSmall : public StampDict +{ + public: + StampDictLCAlphaSmall (): StampDict(std::make_shared()) {}; +}; + #endif /* STAMP_DICT_H */ diff --git a/blobstamper/stamp_text.cpp b/blobstamper/stamp_text.cpp new file mode 100644 index 0000000..a7c1890 --- /dev/null +++ b/blobstamper/stamp_text.cpp @@ -0,0 +1,71 @@ +/****************************************************************************** + * + * Copyright 2021 Nikolay Shaplov (Postgres Professional) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#include"stamp_text.h" + +std::string +StampTextPulp::ExtractStr(Blob &blob) +{ + + std::vector data = blob.ChopBlank(*this); + + std::vector::iterator the_iterator; + + the_iterator = data.begin(); + std:: string res; + while (the_iterator != data.end()) + { + if (*the_iterator == '\0') { *the_iterator = ' '; } + res.push_back(*the_iterator++); + } + + return res; +} + +std::string StampTextPulpWords::ExtractStr(Blob &blob) +{ + std::vector data = ExtractStrVector(blob); + std::string res = ""; + + for(std::string s : data) + { + if (!res.empty()) + { + res+=" "; + } + res+= s; + } + return res; +} + +std::string StampTextDictWords::ExtractStr(Blob &blob) +{ + std::vector data = ExtractStrVector(blob); + std::string res = ""; + + for(std::string s : data) + { + if (!res.empty()) + { + res+=" "; + } + res+= s; + } + return res; +} + diff --git a/blobstamper/stamp_text.h b/blobstamper/stamp_text.h new file mode 100644 index 0000000..7651fb8 --- /dev/null +++ b/blobstamper/stamp_text.h @@ -0,0 +1,47 @@ +/****************************************************************************** + * + * Copyright 2021 Nikolay Shaplov (Postgres Professional) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#ifndef STAMP_TEXT_H +#define STAMP_TEXT_H + + +#include "galley.h" + +#include "stamp_dict.h" + +class StampTextPulp: public StampBaseStr +{ + public: + virtual int minSize() override {return 1;} + virtual int maxSize() override {return -1;} + std::string ExtractStr(Blob &blob) override; +}; + +class StampTextPulpWords: public GalleyVectorStrStampBase +{ + public: + virtual std::string ExtractStr(Blob &blob) override; +}; + +class StampTextDictWords: public GalleyVectorStrStampBase +{ + public: + virtual std::string ExtractStr(Blob &blob) override; +}; + +#endif /* STAMP_TEXT_H */ diff --git a/t/150-stamp_text.cpp b/t/150-stamp_text.cpp new file mode 100644 index 0000000..53cadc9 --- /dev/null +++ b/t/150-stamp_text.cpp @@ -0,0 +1,60 @@ +/****************************************************************************** + * + * Copyright 2021 Nikolay Shaplov (Postgres Professional) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#include + +#include +#include +#include +#define WANT_TEST_EXTRAS +#include + +#include "blobstamper/stamp_text.h" + +using namespace TAP; + +int +main() +{ + TEST_START(3); + { /* 1..1 */ + char data[] = "папа\0мама\0бабушка\0дедушка\0братик\0сестричка"; + Blob blob(data, (sizeof data)-1); + StampTextPulp stamp; + std::string s = stamp.ExtractStr(blob); + is(s, "папа мама бабушка дедушка братик сестричка", "StampTextSimple"); + + } + { /* 2..2 */ + char data[] = "dad\0mam\0granddad\0grandmam\0brother\0sister"; + Blob blob(data, (sizeof data)-1); + StampTextPulpWords stamp; + std::string s = stamp.ExtractStr(blob); + is(s, "d dad gra n dmam broth er siste", "GalleyTextSimple"); + + } + { /* 3..3 */ + char data[] = "abcdef" "abcdef" "ABCDEF" "012345"; + Blob blob(data, (sizeof data)-1); + StampTextDictWords stamp; + std::string s = stamp.ExtractStr(blob); + is(s, "gleam godfather graffiti greened grouping gunshots gleam godfather graffiti greened grouping gunshots dismally dissented divested doorstep dread drunks convertors corpulent counterparts cranking crippled crusades", "GalleyLCAlphaSmall"); + + } + TEST_END; +}