//spark : My SparkSession
import spark.implicits._
val df = Seq(
(111111, "A", "True"),
(111111, "A", "False"),
(111111, "A", "False"),
(222222, "B", "False"),
(222222, "B", "False"),
(333333, "C", "True"),
(333333, "C", "True"),
(333333, "C", "Unsure")
).toDF("item_id", "location_id", "decision")
df.printSchema()
/** root
* |-- item_id: integer (nullable = false)
* |-- location_id: string (nullable = true)
* |-- decision: string (nullable = true)
*/