import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD} import org.apache.spark.{SparkConf, SparkContext} object kimiYang { var conf = new SparkConf() .setMaster("local") .setAppName("kimiYang"); var sc = new SparkContext(conf); def main(args: Array[String]): Unit = { val data = sc.textFile("kimi.txt"); val parsedData = data.map{line => val parts = line.split('|'); LabeledPoint(parts(0).toDouble,Vectors.dense(parts(1).split(',').map(_.toDouble))) }.cache(); val model = LinearRegressionWithSGD.train(parsedData,10,0.1); println("model Weight" + model.weights) val valuesAndPreds = parsedData.map{point => { val prediction = model.predict(point.features); (point.label,prediction); } } val MSE = valuesAndPreds.map{case(v,p) => math.pow((v - p),2)}.mean(); println("MSE" + MSE); } }
結果:
model Weight[-2.1793201852602822E42,-4.305555405890157E44]
MSE1.4628747951573787E95
數據
65|7,400 90|5,1300 100|4,1100 110|3,1300 60|9,300 100|5,1000 75|7,600 80|6,1200 70|6,500 50|8,30