7.1.1 不可变集合继承图
7.1.2 可变集合继承图
7.2.3 不可变数组与可变数组的转换
7.7.6 复杂 WordCount 案例
1)方式一
object TestWordCount {def main(args: Array[String]): Unit = {// 第一种方式(不通用)val tupleList = List(("Hello Scala Spark World ", 4), ("Hello
Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))val stringList: List[String] = tupleList.map(t=>(t._1 + "
") * t._2)//val words: List[String] =
stringList.flatMap(s=>s.split(" "))val words: List[String] = stringList.flatMap(_.split(" "))
//在 map 中,如果传进来什么就返回什么,不要用_省略val groupMap: Map[String, List[String]] =
words.groupBy(word=>word)//val groupMap: Map[String, List[String]] =
words.groupBy(_)// (word, list) => (word, count)val wordToCount: Map[String, Int] = groupMap.map(t=>(t._1,
t._2.size))val wordCountList: List[(String, Int)] =
wordToCount.toList.sortWith {(left, right) => {left._2 > right._2}}.take(3)//tupleList.map(t=>(t._1 + " ") * t._2).flatMap(_.split("
")).groupBy(word=>word).map(t=>(t._1, t._2.size))println(wordCountList)}
}
2)方式二
object TestWordCount {def main(args: Array[String]): Unit = {val tuples = List(("Hello Scala Spark World", 4), ("Hello
Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))// (Hello,4),(Scala,4),(Spark,4),(World,4)// (Hello,3),(Scala,3),(Spark,3)// (Hello,2),(Scala,2)// (Hello,1)val wordToCountList: List[(String, Int)] = tuples.flatMap
{t => {val strings: Array[String] = t._1.split(" ")strings.map(word => (word, t._2))}}// Hello, List((Hello,4), (Hello,3), (Hello,2), (Hello,1))// Scala, List((Scala,4), (Scala,3), (Scala,2)// Spark, List((Spark,4), (Spark,3)// Word, List((Word,4))val wordToTupleMap: Map[String, List[(String, Int)]] =
wordToCountList.groupBy(t=>t._1)val stringToInts: Map[String, List[Int]] =
wordToTupleMap.mapValues {datas => datas.map(t => t._2)}stringToIntsval wordToCountMap: Map[String, List[Int]] =
wordToTupleMap.map {t => {(t._1, t._2.map(t1 => t1._2))}}val wordToTotalCountMap: Map[String, Int] =
wordToCountMap.map(t=>(t._1, t._2.sum))println(wordToTotalCountMap)}
}