Commit 70b9beab authored by Steven Pisarski's avatar Steven Pisarski

Initial refactoring for making more code more functional around creating...

Initial refactoring for making more code more functional around creating machine learning feature sets for work required for tuning the predictions.
parent cbbf7247
......@@ -67,10 +67,6 @@ object AnalyzeData extends App {
System.exit(0)
}
def sparkContext(uri: String, name: String) = {
}
/**
* Takes the prediction metrics gathered from gatherPredictionMetrics() and outputs them to the OutputStream
* of your choice
......
......@@ -5,7 +5,7 @@ import java.util.Date
import _root_.akka.actor.Address
import com.cablelabs.eventgen.akka.{Event, GeneratorActors}
import com.cablelabs.eventgen.algorithm.Model
import com.cablelabs.eventgen.model.{InputDefinition, OutputDefinition}
import com.cablelabs.eventgen.model.{Fact, InputDefinition, OutputDefinition}
import com.typesafe.scalalogging.slf4j.Logger
import org.slf4j.LoggerFactory
......@@ -52,7 +52,10 @@ class Engine(val inputDef: InputDefinition, val temporalAlgo: Model, val factAlg
val out1 = inputDef.temporal.name -> new Date(dateOffset + seedEventDate.getTime)
val out2 = inputDef.dimensionValues(seedEvent).map(p => p._1 -> p._2).toMap
val out3 = factAlgos.map(p => p.field.name -> p.predict(inputDef.factAlgoFeatures(seedEvent, p.field.name))).toMap
val out3 = factAlgos.map(p => p.field match {
case fact: Fact =>
p.field.name -> p.predict(inputDef.factAlgoFeatures(seedEvent, fact))
}).toMap
val out = out2 ++ out3 + out1
logger.debug(s"Generated next event $out")
out
......
......@@ -240,8 +240,7 @@ class SparkAnalyzer(val data: RDD[(String, Date, Map[String, Any])], val inputDe
case algoDef:SupervisedTraining =>
logger.info("Retrieving the training sets for each fact prediction algorithm")
data.flatMap[(LabeledPoint, Any)](p => {
val features = inputDef.algoFeatures(p._3, inputDef.positionalFacts.indexOf(fact), algoDef.polyDegree,
algoDef.flatten)
val features = inputDef.factAlgoFeatures(p._3, fact)
Seq((LabeledPoint(inputDef.fieldMap.get(name).get.mlTrainingValue(p._3), new DenseVector(features.toArray)),
fact.eventValue[Any](p._3)))
})
......
......@@ -167,7 +167,7 @@ class InputDefinition(val temporal: Temporal, val dimensionSet: Set[Dimension],
def temporalAlgoFeatures(event: Map[String, Any]): Seq[Double] = {
temporal.algoDef match {
case algoDef:SupervisedTraining =>
algoFeatures(event, temporal.factIndex, algoDef.polyDegree, algoDef.flatten)
algoFeatures(event, temporal.factIndex, algoDef)
case _ => Seq[Double]()
}
}
......@@ -175,14 +175,13 @@ class InputDefinition(val temporal: Temporal, val dimensionSet: Set[Dimension],
/**
* Return the training features for a given event and fact attribute by name
* @param event - the event to parse
* @param name - the fact name for which the training set will be generated
* @param fact - the fact field for which the training set will be generated
* @return - the features
*/
def factAlgoFeatures(event: Map[String, Any], name: String): Seq[Double] = {
val fact = facts.get(name).get
def factAlgoFeatures(event: Map[String, Any], fact: Fact): Seq[Double] = {
fact.algoDef match {
case algoDef:SupervisedTraining =>
algoFeatures(event, positionalFacts.indexOf(fact), algoDef.polyDegree, algoDef.flatten)
algoFeatures(event, fact.position, algoDef)
case _ => Seq[Double]()
}
}
......@@ -190,18 +189,20 @@ class InputDefinition(val temporal: Temporal, val dimensionSet: Set[Dimension],
/**
* Return the training features for the ML algorithms
* @param event - the event to parse
* @param factIndex - generates the feature set up to the index of the fact requested.
* @param factPosition - generates the feature set up to the index of the fact requested.
* When 0, no facts will be included. When < 0, all will be generated
* @return - the feature set
*/
def algoFeatures(event: Map[String, Any], factIndex: Int, polyDegree: Int, flatten: Int): Seq[Double] = {
var featureList = temporal.denormalize(event).map(f => f._2.toDouble).toSeq
// Populate dimension features
positionalDims.foreach(dim => featureList = featureList :+ dim.mlTrainingValue(event))
val factIndexToInclude = if (factIndex < 0) facts.size else factIndex
for (i <- 0 until factIndexToInclude)
featureList = featureList :+ positionalFacts(i).mlTrainingValue(event)
AlgorithmUtil.polynomial(polyDegree, AlgorithmUtil.flatten(featureList.toSeq, flatten))
private def algoFeatures(event: Map[String, Any], factPosition: Int, algo: SupervisedTraining): Seq[Double] = {
// Populate temporal features
val features = temporal.denormalize(event).map(f => f._2.toDouble).toSeq ++:
positionalDims.filter(p => !algo.omitFields.contains(p.name)).map(_.mlTrainingValue(event)) ++:
positionalFacts.filter(p =>
if (factPosition < 0) true
else p.position < factPosition).map(_.mlTrainingValue(event))
// TODO - determine if we should allow for filtering facts too
// positionalFacts.filter(p => p.position < factIndex && !algo.omitFields.contains(p.name)).map(_.mlTrainingValue(event))
AlgorithmUtil.polynomial(algo.polyDegree, AlgorithmUtil.flatten(features.toSeq, algo.flatten))
}
/**
......@@ -211,21 +212,12 @@ class InputDefinition(val temporal: Temporal, val dimensionSet: Set[Dimension],
def temporalAlgoWeights(): Seq[Double] = {
temporal.algoDef match {
case algoDef:SupervisedTraining =>
var weightList = List[Double]()
// TODO - add these weights to the temporal definition
// Populate temporal features
for (i <- 1 to temporal.denormFields.size) weightList = weightList.::(100)
// Populate dimension features
positionalDims.foreach(dim => weightList = weightList.::(2000))
if (temporal.factIndex < 0)
positionalFacts.foreach (fact => weightList = weightList.::(10))
else
for (i <- 0 to temporal.factIndex - 1)
weightList = weightList.::(10)
AlgorithmUtil.polynomialWeights(algoDef.polyDegree, weightList.toSeq)
val weights = temporal.denormFields.map(f => 100d) ++:
positionalDims.map(f => 2000d) ++:
(if (temporal.factIndex < 0) positionalFacts.map(fact => 10d)
else positionalFacts.filter(_.position < temporal.factIndex).map(f => 10d))
AlgorithmUtil.polynomialWeights(algoDef.polyDegree, weights)
case _ =>
Seq[Double]()
}
......@@ -317,6 +309,7 @@ class FactFieldYaml(@JsonProperty("name") name: String,
class AlgoYaml(@JsonProperty("name") name: String,
@JsonProperty("constType") constType: String,
@JsonProperty("constVal") constVal: String,
@JsonProperty("omitFields") _omitFields: Set[String],
@JsonProperty("flatten") flatten: Int,
@JsonProperty("polyDegree") polyDegree: Int,
@JsonProperty("iterations") iterations: Int,
......@@ -324,13 +317,14 @@ class AlgoYaml(@JsonProperty("name") name: String,
@JsonProperty("lambda") lambda: Float) {
require(name != null && (name == "linearRegression" || name == "naiveBayes" || name == "constant")) // Currently only support these
val omitFields = if (_omitFields == null) Set[String]() else _omitFields
// TODO - test me
val algorithm = {
name match {
case "linearRegression" =>
new LinearRegressionDefinition(flatten, polyDegree, iterations, stepSize)
new LinearRegressionDefinition(omitFields, flatten, polyDegree, iterations, stepSize)
case "naiveBayes" =>
new NaiveBayesDefinition(flatten, polyDegree, lambda)
new NaiveBayesDefinition(omitFields, flatten, polyDegree, lambda)
case "constant" =>
constType match {
case "string" => new ConstantStringDefinition(constVal)
......
......@@ -10,6 +10,12 @@ trait AlgorithmDefinition extends Serializable
*/
trait SupervisedTraining extends AlgorithmDefinition {
/**
* The names of the fields to omit from the algorithm's training set
* @return
*/
def omitFields: Set[String]
/**
* Denotes how many times the feature set will be flattened by taking the original value and applying the log10
* value
......@@ -46,14 +52,16 @@ trait ConstantDefinition extends AlgorithmDefinition {
* @param iterations - the number of gradient descent iterations used during the training phase
* @param stepSize - amount to move the point during gradient descent for each step iteration
*/
class LinearRegressionDefinition(override val flatten: Int = 0, override val polyDegree: Int = 1, val iterations: Int,
val stepSize: Double) extends RegressionDefinition
class LinearRegressionDefinition(override val omitFields: Set[String], override val flatten: Int = 0,
override val polyDegree: Int = 1, val iterations: Int, val stepSize: Double)
extends RegressionDefinition
/**
* Attributes required for training a Naive Bayes classification algorithm
* @param lambda - the smoothing parameter used when training the predictive algoritm
*/
class NaiveBayesDefinition(override val flatten: Int = 0, override val polyDegree: Int = 1, val lambda: Double) extends ClassificationDefinition
class NaiveBayesDefinition(override val omitFields: Set[String], override val flatten: Int = 0,
override val polyDegree: Int = 1, val lambda: Double) extends ClassificationDefinition
/**
* Definition of algorithm that always returns the invValue as its predicted value
......
......@@ -12,7 +12,7 @@ class DateFactTest extends UnitSpec {
val dateFormat = "MM/dd/yyyy HH:mm:ss a"
test("DateFact construction all fields positionally should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 1, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 1, .001)
val fact = new DateFact("testFact", "test desc", 10, mlDef, dateFormat)
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -22,7 +22,7 @@ class DateFactTest extends UnitSpec {
}
test("DateFact construction all fields by name in order except description should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 1, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 1, .001)
val fact = new DateFact(algoDef = mlDef, name = "testFact", position = 10, dateFmtStr = dateFormat)
assert("testFact" == fact.name)
assert("" == fact.description)
......@@ -32,7 +32,7 @@ class DateFactTest extends UnitSpec {
}
test("DateFact construction all fields by name out of order should return proper values") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new DateFact(algoDef = mlDef, position = 10, name = "testFact", dateFmtStr = dateFormat, description = "test desc")
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -43,7 +43,7 @@ class DateFactTest extends UnitSpec {
}
test("DateFact parseValue method should return the proper value with a one element map") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new DateFact("testFact", "test desc", 10, mlDef, dateFormat)
val nowDate = new Date
val now = fact.dateFormat.format(nowDate)
......@@ -52,11 +52,13 @@ class DateFactTest extends UnitSpec {
}
test("DateFact parseDate method should return the proper date value with a one element map") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new DateFact("testFact", "test desc", 10, mlDef, dateFormat)
val nowDate = new Date
val map = Map[String, Any]("testFact" -> nowDate)
assert((nowDate.getTime - fact.eventValue[Date](map).getTime).abs < 1000)
}
// TODO - add tests for dimensional filtering
}
......@@ -13,7 +13,7 @@ class DateTemporalTest extends UnitSpec {
val dateFormat = "MM/dd/yyyy HH:mm:ss a"
test("DateTemporal construction all fields positionally should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 3, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 3, .001)
val temporal = new DateTemporal("testTemporal", "test desc", List[String](), mlDef, dateFormat, -1)
assert("testTemporal" == temporal.name)
assert("test desc" == temporal.description)
......@@ -26,7 +26,7 @@ class DateTemporalTest extends UnitSpec {
}
test("DateTemporal construction all fields by name in order except description should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 3, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 3, .001)
val temporal = new DateTemporal(algoDef = mlDef, name = "testTemporal", denormFields = List[String](),
dateFmtStr = dateFormat, factIndex = -1)
assert("testTemporal" == temporal.name)
......@@ -40,7 +40,7 @@ class DateTemporalTest extends UnitSpec {
}
test("DateTemporal construction all fields by name out of order should return proper values") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val temporal = new DateTemporal(algoDef = mlDef, name = "testTemporal", denormFields = List[String](),
dateFmtStr = dateFormat, description = "test desc", factIndex = -1)
assert("testTemporal" == temporal.name)
......@@ -55,7 +55,7 @@ class DateTemporalTest extends UnitSpec {
}
test("DateTemporal parseValue method should return the proper value with a one element map") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val temporal = new DateTemporal("testTemporal", "test desc", List[String](), mlDef, dateFormat, -1)
val nowDate = new Date
val now = temporal.dateFormat.format(nowDate)
......@@ -67,7 +67,7 @@ class DateTemporalTest extends UnitSpec {
val denormFields = List[String]("timestamp", "hour_of_day", "day_of_month", "day_of_week", "day_of_year",
"minute_of_day", "minute_of_hour", "month_of_year", "second_of_day", "second_of_minute", "millis_of_day",
"week_of_weekyear", "year")
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val temporal = new DateTemporal("testTemporal", "test desc", denormFields, mlDef, dateFormat, -1)
denormFields.foreach((field: String) => {
assert(temporal.denormFields.contains(field))
......@@ -113,4 +113,5 @@ class DateTemporalTest extends UnitSpec {
denormMap.foreach((record: (String, Long)) => {validateDenormTime(record._1, record._2)})
}
// TODO - add tests for dimensional filtering
}
......@@ -8,7 +8,7 @@ import com.cablelabs.eventgen.UnitSpec
class FloatFactTest extends UnitSpec {
test("FloatFact construction all fields positionally should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 1, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 1, .001)
val fact = new FloatFact("testFact", "test desc", 10, mlDef)
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -17,7 +17,7 @@ class FloatFactTest extends UnitSpec {
}
test("FloatFact construction all fields by name in order except description should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 1, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 1, .001)
val fact = new FloatFact(algoDef = mlDef, name = "testFact", position = 2)
assert("testFact" == fact.name)
assert("" == fact.description)
......@@ -26,7 +26,7 @@ class FloatFactTest extends UnitSpec {
}
test("FloatFact construction all fields by name out of order should return proper values") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new FloatFact(algoDef = mlDef, position = 10, name = "testFact", description = "test desc")
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -36,10 +36,11 @@ class FloatFactTest extends UnitSpec {
}
test("FloatFact eventValue method should return the proper value with a one element map") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new FloatFact("testFact", "test desc", 10, mlDef)
val map = Map[String, Any]("testFact" -> 1.2)
assert(1.2 == fact.eventValue[Double](map))
}
// TODO - add tests for dimensional filtering
}
......@@ -26,7 +26,7 @@ class InputDefinitionTest extends UnitSpec {
new StringDimension("stringDim", "", 40)
)
def facMlDef = new NaiveBayesDefinition(0, 1, .001)
val facMlDef = new NaiveBayesDefinition(Set(), 0, 1, .001)
val eachFactType = Set[Fact](
new DateFact("dateFact", "", 10, facMlDef, dateFormat),
new FloatFact("floatFact", "", 20, facMlDef),
......@@ -34,7 +34,7 @@ class InputDefinitionTest extends UnitSpec {
new StringFact("stringFact", "", 40, facMlDef)
)
def temporalMlDef = new NaiveBayesDefinition(0, 1, .001)
def temporalMlDef = new NaiveBayesDefinition(Set(), 0, 1, .001)
val temporal = new DateTemporal("temporal", "",
List[String]("hour_of_day", "day_of_month", "day_of_week", "day_of_year"), temporalMlDef, dateFormat, -1)
val inputDef = new InputDefinition(temporal, eachDimType, eachFactType)
......@@ -209,7 +209,7 @@ class InputDefinitionTest extends UnitSpec {
}
test("InputDefinition algoFeatures() method should a return all ML training features except for the lowest priority fact") {
val trainingSet = inputDef.algoFeatures(event.toMap, 3, 1, 0)
val trainingSet = inputDef.factAlgoFeatures(event.toMap, inputDef.facts("stringFact"))
val dt = new DateTime(inputDef.temporal.eventValue[Date](event.toMap))
assert(trainingSet.size == 11)
......@@ -232,7 +232,7 @@ class InputDefinitionTest extends UnitSpec {
}
test("InputDefinition algoFeatures() method should a return all ML training features for predicting stringFact") {
val trainingSet = inputDef.factAlgoFeatures(event.toMap, "stringFact")
val trainingSet = inputDef.factAlgoFeatures(event.toMap, inputDef.facts("stringFact"))
val dt = new DateTime(inputDef.temporal.eventValue[Date](event.toMap))
assert(trainingSet.size == 11)
......@@ -255,7 +255,7 @@ class InputDefinitionTest extends UnitSpec {
}
test("InputDefinition algoFeatures() method should a return all ML training features for predicting dateFact") {
val trainingSet = inputDef.factAlgoFeatures(event.toMap, "dateFact")
val trainingSet = inputDef.factAlgoFeatures(event.toMap, inputDef.facts("dateFact"))
val dt = new DateTime(inputDef.temporal.eventValue[Date](event.toMap))
assert(trainingSet.size == 8)
......@@ -446,4 +446,5 @@ class InputDefinitionTest extends UnitSpec {
}
}
// TODO - add tests for dimensional filtering
}
......@@ -8,7 +8,7 @@ import com.cablelabs.eventgen.UnitSpec
class IntegerFactTest extends UnitSpec {
test("IntegerFact construction all fields positionally should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 1, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 1, .001)
val fact = new IntegerFact("testFact", "test desc", 10, mlDef)
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -17,7 +17,7 @@ class IntegerFactTest extends UnitSpec {
}
test("IntegerFact construction all fields by name in order except description should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 1, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 1, .001)
val fact = new IntegerFact(algoDef = mlDef, name = "testFact", position = 10)
assert("testFact" == fact.name)
assert("" == fact.description)
......@@ -26,7 +26,7 @@ class IntegerFactTest extends UnitSpec {
}
test("IntegerFact construction all fields by name out of order should return proper values") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new IntegerFact(algoDef = mlDef, position = 10, name = "testFact", description = "test desc")
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -36,10 +36,11 @@ class IntegerFactTest extends UnitSpec {
}
test("IntegerFact eventValue method should return the proper value with a one element map") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new IntegerFact("testFact", "test desc", 10, mlDef)
val map = Map[String, Any]("testFact" -> 1)
assert(1 == fact.eventValue[Int](map))
}
// TODO - add tests for dimensional filtering
}
......@@ -8,36 +8,36 @@ import com.cablelabs.eventgen.UnitSpec
class MachineLearningTest extends UnitSpec {
test("LinearRegression constructor with positional parameters should properly populate the object") {
val algo = new LinearRegressionDefinition(0, 1, 20, .001)
val algo = new LinearRegressionDefinition(Set(), 0, 1, 20, .001)
assert(20 == algo.iterations)
assert(.001 == algo.stepSize)
}
test("LinearRegression constructor with named parameters should properly populate the object") {
val algo = new LinearRegressionDefinition(stepSize = .001, iterations = 20)
val algo = new LinearRegressionDefinition(Set(), stepSize = .001, iterations = 20)
assert(20 == algo.iterations)
assert(.001 == algo.stepSize)
}
test("NaiveBayes constructor with positional parameters should properly populate the object") {
val algo = new NaiveBayesDefinition(lambda = .001)
val algo = new NaiveBayesDefinition(Set(), lambda = .001)
assert(.001 == algo.lambda)
}
test("NaiveBayes constructor with named parameters should properly populate the object") {
val algo = new NaiveBayesDefinition(lambda = .001)
val algo = new NaiveBayesDefinition(Set(), lambda = .001)
assert(.001 == algo.lambda)
}
test("TemporalMlDefinition constructor with positional parameters should properly populate the object") {
val definition = new NaiveBayesDefinition(2, 3, .001)
val definition = new NaiveBayesDefinition(Set(), 2, 3, .001)
assert(definition.flatten == 2)
assert(3 == definition.polyDegree)
assert(.001 == definition.lambda)
}
test("TemporalMlDefinition constructor with named parameters should properly populate the object") {
val definition = new LinearRegressionDefinition(2, 3, 20, .001)
val definition = new LinearRegressionDefinition(Set(), 2, 3, 20, .001)
assert(definition.flatten == 2)
assert(3 == definition.polyDegree)
assert(20 == definition.iterations)
......@@ -45,21 +45,21 @@ class MachineLearningTest extends UnitSpec {
}
test("TemporalMlDefinition constructor omitting the polyDegree should properly populate the object") {
val definition = new NaiveBayesDefinition(lambda = .001, flatten = 2)
val definition = new NaiveBayesDefinition(Set(), lambda = .001, flatten = 2)
assert(definition.flatten == 2)
assert(1 == definition.polyDegree)
assert(.001 == definition.lambda)
}
test("FactMlDefinition constructor with positional parameters should properly populate the object") {
val definition = new NaiveBayesDefinition(2, 3, .001)
val definition = new NaiveBayesDefinition(Set(), 2, 3, .001)
assert(definition.flatten == 2)
assert(3 == definition.polyDegree)
assert(.001 == definition.lambda)
}
test("FactMlDefinition constructor with named parameters should properly populate the object") {
val definition = new LinearRegressionDefinition(iterations = 20, stepSize = .001, polyDegree = 3, flatten = 2)
val definition = new LinearRegressionDefinition(Set(), iterations = 20, stepSize = .001, polyDegree = 3, flatten = 2)
assert(definition.flatten == 2)
assert(3 == definition.polyDegree)
assert(20 == definition.iterations)
......@@ -67,7 +67,7 @@ class MachineLearningTest extends UnitSpec {
}
test("FactMlDefinition constructor omitting the polyDegree should properly populate the object") {
val definition = new NaiveBayesDefinition(lambda = .001, flatten = 2)
val definition = new NaiveBayesDefinition(Set(), lambda = .001, flatten = 2)
assert(definition.flatten == 2)
assert(1 == definition.polyDegree)
assert(.001 == definition.lambda)
......@@ -88,4 +88,5 @@ class MachineLearningTest extends UnitSpec {
assert("constant value" == definition.value)
}
// TODO - add tests for dimensional filtering
}
......@@ -8,7 +8,7 @@ import com.cablelabs.eventgen.UnitSpec
class StringFactTest extends UnitSpec {
test("StringFact construction all fields positionally should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 1, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 1, .001)
val fact = new StringFact("testFact", "test desc", 10, mlDef)
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -17,7 +17,7 @@ class StringFactTest extends UnitSpec {
}
test("StringFact construction all fields by name in order except description should return proper values") {
val mlDef = new NaiveBayesDefinition(2, 1, .001)
val mlDef = new NaiveBayesDefinition(Set(), 2, 1, .001)
val fact = new StringFact(algoDef = mlDef, name = "testFact", position = 10)
assert("testFact" == fact.name)
assert("" == fact.description)
......@@ -26,7 +26,7 @@ class StringFactTest extends UnitSpec {
}
test("StringFact construction all fields by name out of order should return proper values") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new StringFact(algoDef = mlDef, position = 10, name = "testFact", description = "test desc")
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -36,10 +36,11 @@ class StringFactTest extends UnitSpec {
}
test("StringFact parseValue method should return the proper value with a one element map") {
val mlDef = new LinearRegressionDefinition(2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val fact = new StringFact("testFact", "test desc", 10, mlDef)
val map = Map[String, Any]("testFact" -> "testFactValue")
assert("testFactValue" == fact.eventValue[String](map))
}
// TODO - add tests for dimensional filtering
}
sparkUri: local[8]
appName: CM-Analyzer-small
schemaUri: testData/cm/definition/cm-constant-input-fact-pred-test.yaml
fileDelim: "|"
eventsUri: testData/cm/events/
factTrainingMetricsUri: /Users/spisarski/tmp/factTraining-large/
numThreads: 10
\ No newline at end of file
sparkUri: local[8]
appName: CM-Analyzer-small
schemaUri: testData/cm/definition/cm-constant-input-fact-pred-test.yaml
fileDelim: "|"
eventsUri: testData/cm/events/cm_1a.txt
factTrainingMetricsUri: /Users/spisarski/tmp/factTraining-small/
numThreads: 10
\ No newline at end of file
temporal:
name: poll_date
description: Date of CM poll
type: date
dateFormat: MM-dd-yyyy-HH:mm:ss
factIndex: -1
denormFields:
- day_of_week
- day_of_month
- day_of_year
- hour_of_day
algo:
name: constant
constType: integer
constVal: 60000
dimensions:
- name: cmts
description: The CMTS name
type: string
position: 10
- name: node
description: The Node name
type: string
position: 20
- name: mac
description: The MAC address
type: string
position: 30
- name: lat
description: The geo latitude
type: float
position: 40
- name: lng
description: The geo longitude
type: float
position: 50
facts:
- name: downstream_receive_power_num
description: fact 1-out
type: float
position: 10
algo:
name: linearRegression
flatten: 2
polyDegree: 2
iterations: 50
stepSize: 0.01
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment