$ pip install dpark
from dpark import DparkContext
dpark_context = DparkContext()
data = dpark_context.textFile("data.txt")
processed_data = data.filter(lambda line: line.contains("keyword")).map(lambda line: line.split(",")).cache()
from dpark import DparkContext
dpark_context = DparkContext()
dpark_context.defaultParallelism = 100
result = processed_data.map(lambda item: compute(item)).reduce(lambda x, y: merge(x, y))
result.saveAsTextFile("result.txt")