LinearRegressionCmTemporalModelTest.scala 1.52 KB
Newer Older
1 2
package com.cablelabs.eventgen.algorithm

3
import java.io.{File, FileInputStream}
4
import java.text.SimpleDateFormat
5

6
import com.cablelabs.eventgen.AnalyzerTester
7
import com.cablelabs.eventgen.model.{InputDefinition, LinearRegressionDefinition}
8 9

/**
10
 * Tests the LinearRegressionModel against a known set of 30k events of CM data
11 12
 * Tests may fail during build. see https://issues.apache.org/jira/browse/SPARK-2243
 * Set Spark URI to local[1] to mitigate but will slow tests down
13
 */
14
class LinearRegressionCmTemporalModelTest extends AnalyzerTester {
15

16
  val dateFormat = "MM-dd-yyyy HH:mm:ss a"
17
  val dateFormatter = new SimpleDateFormat(dateFormat)
18 19
  val inputDef = InputDefinition.inputDefinition(
    new FileInputStream(new File("testData/cm/definition/cm-linear-regression-input.yaml")))
20
  val eventUri = new File("testData/cm/events/cm_1a.txt").toURI.toString
21
  val delim = '|'
22

23
  analyzerTest("Analyze linear regression model should predict temporal values with minimal error") {
24
    val trainingSet = analyzer.temporalTrainingSet().collect()
25
    val temporalModel = new LinearRegressionModel(inputDef.temporal,
26
      analyzer.data.sparkContext.parallelize(trainingSet),
27
      inputDef.algoWeights(inputDef.temporal),
28 29
      inputDef.temporal.algoDef.asInstanceOf[LinearRegressionDefinition].iterations,
      inputDef.temporal.algoDef.asInstanceOf[LinearRegressionDefinition].stepSize)
30
    assert(trainingSet != null && trainingSet.size == 1000)
31
    ModelValidator.validateModel(temporalModel, trainingSet, analyzer, .99, outputValues = false)
32 33 34
  }

}