python from dpark import DparkContext dpark_context = DparkContext() data = dpark_context.textFile("data.txt") cleaned_data = data.distinct() converted_data = cleaned_data.map(lambda x: int(x)) converted_data.pprint() dpark_context.stop() python from dpark import DparkContext dpark_context = DparkContext() data = dpark_context.textFile("data.txt") word_count = data.flatMap(lambda line: line.split()).map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b) word_count.pprint() dpark_context.stop() python from dpark import DparkContext from sklearn.ensemble import RandomForestClassifier dpark_context = DparkContext() train_data = dpark_context.textFile("train_data.txt") # ... model = RandomForestClassifier(n_estimators=100) model.fit(train_features, train_labels) test_data = dpark_context.textFile("test_data.txt") # ... test_predictions = model.predict(test_features) test_predictions.pprint() dpark_context.stop()


上一篇:
下一篇:
切换中文