Using Gradle to create a simple Scala Spark application

gradle init — type scala-library — package com.tutorial.spark
plugins {
// Apply the scala plugin to add support for Scala
id 'scala'

// // Apply the java-library plugin for API and implementation separation.
// id 'java-library'

id 'idea'

id 'application'
}

repositories {
// Use jcenter for resolving dependencies.
// You can declare any Maven/Ivy/file repository here.
jcenter()
}

dependencies {
// // For running just with Spark Core
// implementation 'org.apache.spark:spark-core_2.12:3.0.0'

// Spark SQL subsumes Spark Core
implementation 'org.apache.spark:spark-sql_2.12:3.0.0'

// Use Scalatest for testing our library
testImplementation 'junit:junit:null'
testImplementation 'org.scalatest:scalatest_2.13:3.1.2'
testImplementation 'org.scalatestplus:junit-4-12_2.13:3.1.2.0'

// Need scala-xml at test runtime
testRuntimeOnly 'org.scala-lang.modules:scala-xml_2.13:1.2.0'
}

application {
// Define the main class for the application.
mainClassName = 'com.tutorial.spark.SimpleSparkJob'
}
package com.tutorial.spark

import org.apache.spark.{SparkConf, SparkContext}

object WordCount {
def main(args: Array[String]) {
// simplified from wiki: https://en.wikipedia.org/wiki/Apache_Spark
val conf = new SparkConf().setAppName("wiki_test") // create a spark config object
val sc = new SparkContext(conf) // Create a spark context
val lines = Array("a b c", "a b")
val wordFreq = sc.parallelize(lines).flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
wordFreq.sortBy(s => -s._2).map(x => (x._2, x._1)).top(10) // Get the top 10 words. Swap word and count to sort by count.
println(s"================\nword frequency as map: ${wordFreq.collectAsMap().toString()}")
}
}
spark-submit --class com.tutorial.spark.WordCount build/libs/simple-scala-spark-gradle.jar
package com.tutorial.spark

import org.apache.spark.sql.SparkSession

object SimpleSparkJob {
def main(args: Array[String]) {
val logFile = "/Users/yanggao/tools/spark-3.0.0-bin-hadoop2.7/README.md" // Should be some file on your system
val spark = SparkSession.builder.appName("Simple Application").getOrCreate()
val logData = spark.read.textFile(logFile).cache()
val numAs = logData.filter(line => line.contains("a")).count()
val numBs = logData.filter(line => line.contains("b")).count()
println(s"Lines with a: $numAs, Lines with b: $numBs")
spark.stop()
}
}
spark-submit --class com.tutorial.spark.SimpleSparkJob build/libs/simple-scala-spark-gradle.jar

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store