-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.cpp
121 lines (113 loc) · 2.6 KB
/
utils.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include <iostream>
#include <sstream>
#include <string>
#include <map>
#include "utils.h"
using namespace std;
/*
* reformat invalid URLs
*/
string Utils::reformatUrl(string url, string prefix, string host) {
if (url.substr(0, prefix.size()) != prefix) {
if (url.substr(0, 1) == "/" && url.length() > 1) {
return host += url;
}
}
return url;
}
/*
* reformat invalid Host
* valid host sample: https://www.ea.com
*/
string Utils::reformatHost(string url) {
if(url.substr(0, 4) != "http"){
url = "http://"+url;
}
if(url.substr(url.length() - 1) == "/"){
url.pop_back();
}
return url;
}
/*
* Check if URL is in valid format
*/
bool Utils::isUrlValid(string url, string host) {
// url variable is empty
if(url.length() == 0) {
return false;
}
// url variable is hash trigger
if(url.substr(0,1) == "#") {
return false;
}
// url variable is adding param
if(url.substr(0,1) == "?") {
return false;
}
if(url.length() > 1) {
// url variable is adding param
if(url.substr(0,2) == "/?") {
return false;
}
// url variable is hash trigger
if(url.substr(0,2) == "/#") {
return false;
}
if(url.length() >= host.size()) {
if (url.substr(0, host.size()+1) == host+"?") {
return false;
}
if (url.substr(0, host.size()+1) == host+"#") {
return false;
}
if (url.substr(0, host.size()+2) == host+"/?") {
return false;
}
if (url.substr(0, host.size()+2) == host+"/#") {
return false;
}
}
}
return true;
}
/*
* Check if URL is external
*/
bool Utils::isExternalUrl(string url, string host) {
if (url.substr(0, host.size()) != host) {
return true;
}
return false;
}
/*
* Count total number of unique words in a string.
*/
map<string,size_t> Utils::countUniqueWords(string text) {
map<string,size_t> wordcount;
stringstream is(text);
string word;
int number_of_words = 0;
while (is >> word)
if(wordcount.find(word) == wordcount.end()){
// Unique word
wordcount[word] = 1;
}
else {
// Duplicate word
wordcount[word] += 1;
}
return wordcount;
}
/*
* Add two maps together with the following behavior:
* If key exists add two key values together.
* If key does not exist. Insert pair to map.
* This Method is used for calculating total as
* We'd like to filter out duplicate words across pages
*/
map<string,size_t> Utils::countTotalUniqueWord(map<string,size_t> map1, map<string,size_t> map2) {
for(auto it = map2.begin(); it != map2.end(); ++it) {
map1[it->first] += it->second;
}
return map1;
}