pip install dpark
# dpark.yaml
log_level: INFO
num_workers: 4
memory_limit: 4G
bind_ip: "localhost"
port: 7000
python
from dpark import DparkContext
dpark_ctx = DparkContext()
data = dpark_ctx.textFile('data.txt')
result = data.flatMap(lambda line: line.split(' ')) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a + b) \
.collect()
for word, count in result:
print(f'{word}: {count}')
dpark_ctx.stop()