Skip to content

Commit 43f7883

Browse files
author
Pedro Bernardo
committed
Added pairRdd/aggregation/reducebykey/WordCount.py
1 parent 7bcccc9 commit 43f7883

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from pyspark import SparkContext
2+
3+
if __name__ == "__main__":
4+
5+
sc = SparkContext("local", "wordCounts")
6+
sc.setLogLevel("ERROR")
7+
8+
lines = sc.textFile("in/word_count.text")
9+
wordRdd = lines.flatMap(lambda line: line.split(" "))
10+
wordPairRdd = wordRdd.map(lambda word: (word, 1))
11+
12+
wordCounts = wordPairRdd.reduceByKey(lambda x, y: x + y)
13+
for word, count in wordCounts.collect():
14+
print("{} : {}".format(word, count))

0 commit comments

Comments
 (0)