7.7.5 普通 WordCount 案例
package chapter07object TestWordCount__简单版 { def main ( args: Array[ String] ) : Unit = { val stringList = List ( "Hello Scala Hbase kafka" , "Hello Scala Hbase" , " Hello Scala" , "Hello" ) val wordList: List[ String] = stringList. flatMap ( str = > str. split ( " " ) ) ; println ( wordList) println ( "------------------------------------------------------------------------------------------" ) val wordSame: Map[ String, List[ String] ] = wordList. groupBy ( word = > word) println ( wordSame) println ( "------------------------------------------------------------------------------------------" ) val wordToCount: Map[ String, Int] = wordSame. map ( tuple= > ( tuple. _1, tuple. _2. size) ) println ( wordToCount) println ( "------------------------------------------------------------------------------------------" ) val sortList: List[ ( String, Int) ] = wordToCount. toList. sortWith{ ( A, B) = > { A. _2> B. _2} } println ( sortList) println ( "------------------------------------------------------------------------------------------" ) val resThreeList: List[ ( String, Int) ] = sortList. take ( 3 ) println ( resThreeList) } }
7.7.6 复杂 WordCount 案例
TestWordCount__复杂版__方式01
package chapter07object TestWordCount__复杂版__方式01 { def main ( args: Array[ String] ) : Unit = { val tupleList = List ( ( "Hello Scala Spark World " , 4 ) , ( "Hello Scala Spark" , 3 ) , ( " Hello Scala" , 2 ) , ( "Hello" , 1 ) ) val stringList: List[ String] = tupleList. map ( t = > ( t. _1 + " " ) * t. _2) val words: List[ String] = stringList. flatMap ( s= > s. split ( " " ) ) val groupMap: Map[ String, List[ String] ] = words. groupBy ( word = > word) val wordToCount: Map[ String, Int] = groupMap. map ( t= > ( t. _1, t. _2. size) ) val wordCountList: List[ ( String, Int) ] = wordToCount. toList. sortWith{ ( left, right) = > { left. _2 > right. _2} } . take ( 3 ) tupleList. map ( t= > ( t. _1+ " " ) * t. _2) . flatMap ( _. split ( " " ) . groupBy ( word= > word) . map ( t= > ( t. _1, t. _2. size) ) ) println ( wordCountList) }
}
TestWordCount__复杂版__方式02
package chapter07object TestWordCount__复杂版__方式02 { def main ( args: Array[ String] ) : Unit = { val tuples = List ( ( "Hello Scala Hbase kafka" , 4 ) , ( "Hello Scala Hbase" , 3 ) , ( " Hello Scala" , 2 ) , ( "Hello" , 1 ) ) val wordToCountList: List[ ( String, Int) ] = tuples. flatMap{ t = > { val strings: Array[ String] = t. _1. split ( " " ) strings. map ( word= > ( word, t. _2) ) } } val wordToTuoleMap: Map[ String, List[ ( String, Int) ] ] = wordToCountList. groupBy ( t= > t. _1) val stringToInts: Map[ String, List[ Int] ] = wordToTuoleMap. mapValues{ datas= > datas. map ( t= > t. _2) } stringToIntsval wordToCountMap: Map[ String, List[ Int] ] = wordToTuoleMap. map{ t= > { ( t. _1, t. _2. map ( t1 = > t1. _2) ) } } val wordToTotalCountMap: Map[ String, Int] = wordToTotalCountMap. map ( t= > ( t. _1, t. _2) ) println ( wordToCountMap) } }