// Step 1: Read the input file from HDFS
val inputData = sc.textFile("/user/training/bgs/input/input.txt")
// Step 2: Split the input into words
val words = inputData.flatMap(line => line.split(" "))
// Step 3: Map words to (word, 1)
val wordPairs = words.map(word => (word, 1))
// Step 4: Reduce by key to count occurrences of each word
val wordCount = wordPairs.reduceByKey((x, y) => x + y)
// Step 5: Collect and print the results
val result = wordCount.collect()
result.foreach(println)
// Step 6: Save the result to HDFS
wordCount.saveAsTextFile("hdfs://namenode:9000/user/hadoop/output/word_count")
// Step 7: Stop the SparkContext (optional)
sc.stop()
No comments:
Post a Comment