pip install dpark # dpark.yaml log_level: INFO num_workers: 4 memory_limit: 4G bind_ip: "localhost" port: 7000 python from dpark import DparkContext dpark_ctx = DparkContext() data = dpark_ctx.textFile('data.txt') result = data.flatMap(lambda line: line.split(' ')) \ .map(lambda word: (word, 1)) \ .reduceByKey(lambda a, b: a + b) \ .collect() for word, count in result: print(f'{word}: {count}') dpark_ctx.stop()


上一篇:
下一篇:
切换中文