Commit 13882b62 authored by Steven Pisarski's avatar Steven Pisarski

Added the ability to configure the fields not to be used for supervised...

Added the ability to configure the fields not to be used for supervised machine learning and configuring the weights to be used when training regression prediction training algorithms.
parent 1be62cba
......@@ -109,7 +109,7 @@ object Generator extends App {
val labelMap = trainingSet.map(p => p.label -> p.label).collect().toMap
new NaiveBayesModel(inputDef.temporal, trainingSet, labelMap, definition.lambda)
case definition: LinearRegressionDefinition =>
new LinearRegressionModel(inputDef.temporal, trainingSet, inputDef.temporalAlgoWeights(),
new LinearRegressionModel(inputDef.temporal, trainingSet, inputDef.algoWeights(inputDef.temporal),
definition.iterations, definition.stepSize)
}
case definition: ConstantDefinition => definition match {
......
......@@ -5,8 +5,6 @@ import java.util.Date
import org.joda.time.DateTime
import scala.collection.mutable
/**
* Abstract class for all event fields
* @param name - the field name
......@@ -35,8 +33,8 @@ abstract class InputField(override val name: String, override val description: S
*/
abstract class Temporal(override val name: String, override val description: String = "", val denormFields: Seq[String],
val algoDef: AlgorithmDefinition, override val factPosition: Int)
extends InputField(name, description) with TemporalRole {
def denormalize(event: Map[String, Any]): Map[String, Long]
extends InputField(name, description) with TemporalRole with AlgorithmRole {
def denormalize(event: Map[String, Any]): Seq[(String, Long)]
}
/**
......@@ -56,12 +54,12 @@ class DateTemporal(override val name: String, override val description: String =
* @param event - the event to parse
* @return - the map
*/
override def denormalize(event: Map[String, Any]): Map[String, Long] = {
override def denormalize(event: Map[String, Any]): Seq[(String, Long)] = {
val date = eventValue[Date](event)
val dt = new DateTime(date)
// TODO - try and make more functional
var outMap = new mutable.LinkedHashMap[String, Long]()
var out = Seq[(String, Long)]()
/**
* Closure to map a configured field to the associated value on the event
......@@ -70,31 +68,31 @@ class DateTemporal(override val name: String, override val description: String =
*/
def mapField(field: String) = field match {
case "timestamp" =>
outMap += (field -> dt.getMillis)
out = out :+ field -> dt.getMillis
case "hour_of_day" =>
outMap += (field -> dt.hourOfDay.get)
out = out :+ field -> dt.hourOfDay.get.toLong
case "day_of_month" =>
outMap += (field -> dt.dayOfMonth.get)
out = out :+ field -> dt.dayOfMonth.get.toLong
case "day_of_week" =>
outMap += (field -> dt.dayOfWeek.get)
out = out :+ field -> dt.dayOfWeek.get.toLong
case "day_of_year" =>
outMap += (field -> dt.dayOfYear.get)
out = out :+ field -> dt.dayOfYear.get.toLong
case "minute_of_day" =>
outMap += (field -> dt.minuteOfDay.get)
out = out :+ field -> dt.minuteOfDay.get.toLong
case "minute_of_hour" =>
outMap += (field -> dt.minuteOfHour.get)
out = out :+ field -> dt.minuteOfHour.get.toLong
case "month_of_year" =>
outMap += (field -> dt.monthOfYear.get)
out = out :+ field -> dt.monthOfYear.get.toLong
case "second_of_day" =>
outMap += (field -> dt.secondOfDay.get)
out = out :+ field -> dt.secondOfDay.get.toLong
case "second_of_minute" =>
outMap += (field -> dt.secondOfMinute.get)
out = out :+ field -> dt.secondOfMinute.get.toLong
case "millis_of_day" =>
outMap += (field -> dt.millisOfDay.get)
out = out :+ field -> dt.millisOfDay.get.toLong
case "week_of_weekyear" =>
outMap += (field -> dt.weekOfWeekyear.get)
out = out :+ field -> dt.weekOfWeekyear.get.toLong
case "year" =>
outMap += (field -> dt.year.get)
out = out :+ field -> dt.year.get.toLong
case _ =>
// do nothing
}
......@@ -104,7 +102,7 @@ class DateTemporal(override val name: String, override val description: String =
mapField(field)
})
else mapField("timestamp")
outMap.toMap
out
}
}
......@@ -138,8 +136,7 @@ class StringDimension(override val name: String, override val description: Strin
* Class for integer value Dimensions
* see param descriptions in super
*/
// TODO - refactor name to IntDimension to be congruent with the other integer type fields
class IntegerDimension(override val name: String, override val description: String = "", override val position: Int)
class IntDimension(override val name: String, override val description: String = "", override val position: Int)
extends Dimension(name, description, position) with IntegerRole
/**
......@@ -157,7 +154,7 @@ class FloatDimension(override val name: String, override val description: String
*/
abstract class Fact(override val name: String, override val description: String = "",
val position: Int, val algoDef: AlgorithmDefinition)
extends InputField(name, description) with FactRole
extends InputField(name, description) with FactRole with AlgorithmRole
/**
* Class for Date facts
......@@ -182,8 +179,7 @@ class StringFact(override val name: String, override val description: String = "
* Class for integer value facts
* see param descriptions in super
*/
// TODO - refactor name to IntFact to be congruent with the other integer type fields
class IntegerFact(override val name: String, override val description: String = "",
class IntFact(override val name: String, override val description: String = "",
override val position: Int, override val algoDef: AlgorithmDefinition)
extends Fact(name, description, position, algoDef) with IntegerRole
......
......@@ -112,6 +112,17 @@ trait TemporalRole extends GenerationRole {
*/
trait DimensionRole extends PositionalRole
/**
* Trait designed for other items that have an AlgorithmDefinition
*/
trait AlgorithmRole {
/**
* The algorithm
* @return
*/
def algoDef: AlgorithmDefinition
}
/**
* The role to be extended for all fact types
*/
......
......@@ -41,8 +41,6 @@ class InputDefinition(val temporal: Temporal, val dimensionSet: Set[Dimension],
require(dimensionSet != null && dimensionSet.size > 0)
require(factSet != null && factSet.size > 0)
// TODO - make functional, this is ugly
/**
* The Dimensions by name
*/
......@@ -187,41 +185,97 @@ class InputDefinition(val temporal: Temporal, val dimensionSet: Set[Dimension],
}
/**
* Return the training features for the ML algorithms
* Return the training features for a SupervisedTraining algorithm
* @param event - the event to parse
* @param factPosition - generates the feature set up to the index of the fact requested.
* When 0, no facts will be included. When < 0, all will be generated
* @return - the feature set
*/
private def algoFeatures(event: Map[String, Any], factPosition: Int, algo: SupervisedTraining): Seq[Double] = {
// Populate temporal features
val features = temporal.denormalize(event).map(f => f._2.toDouble).toSeq ++:
positionalDims.filter(p => !algo.omitFields.contains(p.name)).map(_.mlTrainingValue(event)) ++:
positionalFacts.filter(p =>
if (factPosition < 0) true
else p.position < factPosition).map(_.mlTrainingValue(event))
// TODO - determine if we should allow for filtering facts too
// positionalFacts.filter(p => p.position < factPosition && !algo.omitFields.contains(p.name)).map(_.mlTrainingValue(event))
AlgorithmUtil.polynomial(algo.polyDegree, AlgorithmUtil.flatten(features.toSeq, algo.flatten))
}
private def algoFeatures(event: Map[String, Any], factPosition: Int, algo: SupervisedTraining): Seq[Double] =
AlgorithmUtil.polynomial(algo.polyDegree,
AlgorithmUtil.flatten(
temporalFeatures(event, algo) ++: dimensionFeatures(event, algo) ++: factFeatures(event, factPosition, algo),
algo.flatten))
/**
* Returns the associated weights of each feature for the temporal prediction algorithm
* @return - a list of weights with the polynomial degree applied
* Return the temporal training features for a SupervisedTraining algorithm
* @param event - the event to parse
* @return - the feature set
*/
def temporalAlgoWeights(): Seq[Double] = {
temporal.algoDef match {
case algoDef:SupervisedTraining =>
// TODO - add these weights to the temporal definition
val weights = temporal.denormFields.map(f => 100d) ++:
positionalDims.map(f => 2000d) ++:
(if (temporal.factPosition < 0) positionalFacts.map(fact => 10d)
else positionalFacts.filter(_.position < temporal.factPosition).map(f => 10d))
AlgorithmUtil.polynomialWeights(algoDef.polyDegree, weights)
case _ =>
Seq[Double]()
private def temporalFeatures(event: Map[String, Any], algo: SupervisedTraining): Seq[Double] =
temporal.denormalize(event).filter(p => !algo.omitFields.contains(p._1)).map(f => f._2.toDouble)
/**
* Return the dimensional training features for a SupervisedTraining algorithm
* @param event - the event to parse
* @return - the feature set
*/
private def dimensionFeatures(event: Map[String, Any], algo: SupervisedTraining): Seq[Double] =
positionalDims.filter(p => !algo.omitFields.contains(p.name)).map(_.mlTrainingValue(event))
/**
* Return the fact training features for a SupervisedTraining algorithm
* @param event - the event to parse
* @param factPosition - generates the feature set up to the index of the fact requested.
* When 0, no facts will be included. When < 0, all will be generated
* @return - the feature set
*/
private def factFeatures(event: Map[String, Any], factPosition: Int, algo: SupervisedTraining): Seq[Double] =
positionalFacts.filter(p =>
if (factPosition < 0) !algo.omitFields.contains(p.name)
else p.position < factPosition && !algo.omitFields.contains(p.name)
).map(_.mlTrainingValue(event))
/**
* Returns the machine learning training weights as configured
* @param field - the field to process
* @return - the weights only for temporal or fact RegressionDefinitions else and empty Seq will be returned
*/
def algoWeights(field: AlgorithmRole): Seq[Double] = field match {
case temporal: Temporal => temporal.algoDef match {
case regression: RegressionDefinition =>
AlgorithmUtil.polynomialWeights(regression.polyDegree,
temporalWeights(regression) ++: dimensionWeights(regression) ++: factWeights(regression, temporal.factPosition))
case _ => Seq()
}
case fact: Fact => fact.algoDef match {
case regression: RegressionDefinition =>
AlgorithmUtil.polynomialWeights(regression.polyDegree,
temporalWeights(regression) ++: dimensionWeights(regression) ++: factWeights(regression, fact.position))
case _ => Seq()
}
case _ => Seq()
}
/**
* Returns the weights for the temporal features
* @param regression - the regression definition
* @return - the weights
*/
private def temporalWeights(regression: RegressionDefinition): Seq[Double] =
temporal.denormFields.filter(f => !regression.omitFields.contains(f))
.map(f => if (regression.weights.get(f) == None) 0d else regression.weights(f))
/**
* Returns the weights for the dimension features
* @param regression - the regression definition
* @return - the weights
*/
private def dimensionWeights(regression: RegressionDefinition): Seq[Double] =
positionalDims.filter(p => !regression.omitFields.contains(p.name))
.map(f => if (regression.weights.get(f.name) == None) 0d else regression.weights(f.name))
/**
* Returns the weights for the fact features
* @param regression - the regression definition
* @return - the weights
*/
private def factWeights(regression: RegressionDefinition, factPosition: Int): Seq[Double] =
positionalFacts.filter(p =>
if (factPosition < 0) !regression.omitFields.contains(p.name)
else p.position < factPosition && !regression.omitFields.contains(p.name))
.map(f => if (regression.weights.get(f.name) == None) 0d else regression.weights(f.name))
}
import com.fasterxml.jackson.annotation.JsonProperty
......@@ -275,7 +329,7 @@ class DimensionFieldYaml(@JsonProperty("name") name: String,
val dimField: Dimension = fieldType match {
case "date" => new DateDimension(name, description, position, dateFormat)
case "string" => new StringDimension(name, description, position)
case "integer" => new IntegerDimension(name, description, position)
case "integer" => new IntDimension(name, description, position)
case "float" => new FloatDimension(name, description, position)
}
}
......@@ -298,7 +352,7 @@ class FactFieldYaml(@JsonProperty("name") name: String,
val factField: Fact = fieldType match {
case "date" => new DateFact(name, description, position, algo.algorithm, dateFormat)
case "string" => new StringFact(name, description, position, algo.algorithm)
case "integer" => new IntegerFact(name, description, position, algo.algorithm)
case "integer" => new IntFact(name, description, position, algo.algorithm)
case "float" => new FloatFact(name, description, position, algo.algorithm)
}
}
......@@ -309,20 +363,28 @@ class FactFieldYaml(@JsonProperty("name") name: String,
class AlgoYaml(@JsonProperty("name") name: String,
@JsonProperty("constType") constType: String,
@JsonProperty("constVal") constVal: String,
@JsonProperty("omitFields") _omitFields: Set[String],
@JsonProperty("omitFields") jOmitFields: java.util.Set[String],
@JsonProperty("flatten") flatten: Int,
@JsonProperty("polyDegree") polyDegree: Int,
@JsonProperty("iterations") iterations: Int,
@JsonProperty("stepSize") stepSize: Float,
@JsonProperty("lambda") lambda: Float) {
require(name != null && (name == "linearRegression" || name == "naiveBayes" || name == "constant")) // Currently only support these
@JsonProperty("lambda") lambda: Float,
@JsonProperty("weights") jWeights: java.util.Set[AlgoWeights]) {
// Currently only support these
require(name != null && (name == "linearRegression" || name == "naiveBayes" || name == "constant"))
private val omitFields =
if (jOmitFields == null) Set[String]()
else jOmitFields.asScala.toSet
val omitFields = if (_omitFields == null) Set[String]() else _omitFields
// TODO - test me
val algorithm = {
name match {
case "linearRegression" =>
new LinearRegressionDefinition(omitFields, flatten, polyDegree, iterations, stepSize)
val weights =
if (jWeights == null) Map[String, Int]()
else jWeights.asScala.map(f => f.name -> f.weight).toMap
new LinearRegressionDefinition(omitFields, weights, flatten, polyDegree, iterations, stepSize)
case "naiveBayes" =>
new NaiveBayesDefinition(omitFields, flatten, polyDegree, lambda)
case "constant" =>
......@@ -333,5 +395,11 @@ class AlgoYaml(@JsonProperty("name") name: String,
}
}
}
}
/**
* Used for adding a weight to a training set feature for supervised algorithm training
* @param name - the field name
* @param weight - the field's weight
*/
class AlgoWeights(@JsonProperty("name") val name: String, @JsonProperty("weight") val weight: Int)
\ No newline at end of file
......@@ -33,7 +33,15 @@ trait SupervisedTraining extends AlgorithmDefinition {
/**
* Trait for regression algorithms
*/
trait RegressionDefinition extends SupervisedTraining
trait RegressionDefinition extends SupervisedTraining {
/**
* Configured values used for field weights where the key will be the field name. When not configured, the
* weight value for the given field will be 0
* @return
*/
def weights: Map[String, Int]
}
/**
* Trait for classification algorithms
......@@ -52,8 +60,9 @@ trait ConstantDefinition extends AlgorithmDefinition {
* @param iterations - the number of gradient descent iterations used during the training phase
* @param stepSize - amount to move the point during gradient descent for each step iteration
*/
class LinearRegressionDefinition(override val omitFields: Set[String], override val flatten: Int = 0,
override val polyDegree: Int = 1, val iterations: Int, val stepSize: Double)
class LinearRegressionDefinition(override val omitFields: Set[String], override val weights: Map[String, Int],
override val flatten: Int = 0, override val polyDegree: Int = 1,
val iterations: Int, val stepSize: Double)
extends RegressionDefinition
/**
......@@ -61,7 +70,8 @@ class LinearRegressionDefinition(override val omitFields: Set[String], override
* @param lambda - the smoothing parameter used when training the predictive algoritm
*/
class NaiveBayesDefinition(override val omitFields: Set[String], override val flatten: Int = 0,
override val polyDegree: Int = 1, val lambda: Double) extends ClassificationDefinition
override val polyDegree: Int = 1,val lambda: Double)
extends ClassificationDefinition
/**
* Definition of algorithm that always returns the invValue as its predicted value
......
......@@ -13,6 +13,14 @@ temporal:
polyDegree: 3
iterations: 20
stepSize: 0.001
omitFields:
- string_dim
- int_fact
weights:
- name: hour_of_day
weight: 100
- name: string_dim
weight: 10
dimensions:
- name: int_dim
description: Integer Dimension
......
......@@ -3,7 +3,7 @@ temporal:
description: Temporal 1
type: date
dateFormat: MM/dd/yyyy HH:mm:ss a
factPosition: -1
factPosition: 40
denormFields:
- hour_of_day
- day_of_month
......@@ -12,6 +12,9 @@ temporal:
flatten: 2
polyDegree: 3
lambda: 0.001
omitFields:
- string_dim
- int_fact
dimensions:
- name: string_dim
description: String Dimension
......
......@@ -21,7 +21,7 @@ class ScheduleAndNotifyTest extends SparkTestUtils with BeforeAndAfter {
val dateFormat = new SimpleDateFormat(dateFmtStr)
val od = new OutputDefinition("vm", "localhost", 61616, "topic", "test",
Set[OutputField](
new IntOutput(name = "intOutput", inputField = new IntegerDimension(name = "intDim", position = 4)),
new IntOutput(name = "intOutput", inputField = new IntDimension(name = "intDim", position = 4)),
new DateOutput(name = "dateOutput", inputField = new DateTemporal(name = "dateTemporal",
denormFields = List[String](), algoDef = new ConstantIntDefinition(1000), factPosition = 1, dateFmtStr = dateFmtStr),
dateFmtStr = dateFmtStr)),
......
......@@ -9,19 +9,19 @@ import com.cablelabs.eventgen.model._
class ConstantModelTest extends UnitSpec {
test("ConstantIntModel shouldalways predict the same") {
val model = new ConstantIntModel(new IntegerFact("test", "test", 1, new ConstantIntDefinition(1000)), 1000)
val model = new ConstantIntModel(new IntFact("test", "test", 1, new ConstantIntDefinition(1000)), 1000)
assert(1000 == model.predict(null))
assert(1000 == model.predictRaw(null))
}
test("ConstantFloatModel shouldalways predict the same") {
val model = new ConstantFloatModel(new IntegerFact("test", "test", 1, new ConstantFloatDefinition(1000.01)), 1000.01)
val model = new ConstantFloatModel(new IntFact("test", "test", 1, new ConstantFloatDefinition(1000.01)), 1000.01)
assert(1000.01 == model.predict(null))
assert(1000.01 == model.predictRaw(null))
}
test("ConstantStringModel shouldalways predict the same") {
val model = new ConstantStringModel(new IntegerFact("test", "test", 1, new ConstantStringDefinition("pred value")), "pred value")
val model = new ConstantStringModel(new IntFact("test", "test", 1, new ConstantStringDefinition("pred value")), "pred value")
assert("pred value" == model.predict(null))
assert("pred value".hashCode == model.predictRaw(null))
}
......
......@@ -24,7 +24,7 @@ class LinearRegressionCmModelTest extends AnalyzerTester {
val trainingSet = analyzer.temporalTrainingSet().collect()
val temporalModel = new LinearRegressionModel(inputDef.temporal,
analyzer.data.sparkContext.parallelize(trainingSet),
inputDef.temporalAlgoWeights(),
inputDef.algoWeights(inputDef.temporal),
inputDef.temporal.algoDef.asInstanceOf[LinearRegressionDefinition].iterations,
inputDef.temporal.algoDef.asInstanceOf[LinearRegressionDefinition].stepSize)
assert(trainingSet != null && trainingSet.size == 1000)
......
......@@ -24,7 +24,7 @@ class LinearRegressionIvrModelTest extends AnalyzerTester {
val trainingSet = analyzer.temporalTrainingSet().collect()
val temporalModel = new LinearRegressionModel(inputDef.temporal,
analyzer.data.sparkContext.parallelize(trainingSet),
inputDef.temporalAlgoWeights(),
inputDef.algoWeights(inputDef.temporal),
inputDef.temporal.algoDef.asInstanceOf[LinearRegressionDefinition].iterations,
inputDef.temporal.algoDef.asInstanceOf[LinearRegressionDefinition].stepSize)
assert(trainingSet != null && trainingSet.size == 1000)
......
......@@ -32,7 +32,7 @@ class DateFactTest extends UnitSpec {
}
test("DateFact construction all fields by name out of order should return proper values") {
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), Map(), 2, 1, 20, .001)
val fact = new DateFact(algoDef = mlDef, position = 10, name = "testFact", dateFmtStr = dateFormat, description = "test desc")
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -43,7 +43,7 @@ class DateFactTest extends UnitSpec {
}
test("DateFact parseValue method should return the proper value with a one element map") {
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), Map(), 2, 1, 20, .001)
val fact = new DateFact("testFact", "test desc", 10, mlDef, dateFormat)
val nowDate = new Date
val now = fact.dateFormat.format(nowDate)
......@@ -52,13 +52,13 @@ class DateFactTest extends UnitSpec {
}
test("DateFact parseDate method should return the proper date value with a one element map") {
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), Map(), 2, 1, 20, .001)
val fact = new DateFact("testFact", "test desc", 10, mlDef, dateFormat)
val nowDate = new Date
val map = Map[String, Any]("testFact" -> nowDate)
assert((nowDate.getTime - fact.eventValue[Date](map).getTime).abs < 1000)
}
// TODO - add tests for dimensional filtering
// TODO - add tests for dimensional filtering and weights
}
......@@ -40,7 +40,7 @@ class DateTemporalTest extends UnitSpec {
}
test("DateTemporal construction all fields by name out of order should return proper values") {
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), Map(), 2, 1, 20, .001)
val temporal = new DateTemporal(algoDef = mlDef, name = "testTemporal", denormFields = List[String](),
dateFmtStr = dateFormat, description = "test desc", factPosition = -1)
assert("testTemporal" == temporal.name)
......@@ -55,7 +55,7 @@ class DateTemporalTest extends UnitSpec {
}
test("DateTemporal parseValue method should return the proper value with a one element map") {
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), Map(), 2, 1, 20, .001)
val temporal = new DateTemporal("testTemporal", "test desc", List[String](), mlDef, dateFormat, -1)
val nowDate = new Date
val now = temporal.dateFormat.format(nowDate)
......@@ -67,7 +67,7 @@ class DateTemporalTest extends UnitSpec {
val denormFields = List[String]("timestamp", "hour_of_day", "day_of_month", "day_of_week", "day_of_year",
"minute_of_day", "minute_of_hour", "month_of_year", "second_of_day", "second_of_minute", "millis_of_day",
"week_of_weekyear", "year")
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), Map(), 2, 1, 20, .001)
val temporal = new DateTemporal("testTemporal", "test desc", denormFields, mlDef, dateFormat, -1)
denormFields.foreach((field: String) => {
assert(temporal.denormFields.contains(field))
......@@ -113,5 +113,5 @@ class DateTemporalTest extends UnitSpec {
denormMap.foreach((record: (String, Long)) => {validateDenormTime(record._1, record._2)})
}
// TODO - add tests for dimensional filtering
// TODO - add tests for dimensional filtering and weights
}
......@@ -26,7 +26,7 @@ class FloatFactTest extends UnitSpec {
}
test("FloatFact construction all fields by name out of order should return proper values") {
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), Map(), 2, 1, 20, .001)
val fact = new FloatFact(algoDef = mlDef, position = 10, name = "testFact", description = "test desc")
assert("testFact" == fact.name)
assert("test desc" == fact.description)
......@@ -36,11 +36,11 @@ class FloatFactTest extends UnitSpec {
}
test("FloatFact eventValue method should return the proper value with a one element map") {
val mlDef = new LinearRegressionDefinition(Set(), 2, 1, 20, .001)
val mlDef = new LinearRegressionDefinition(Set(), Map(), 2, 1, 20, .001)
val fact = new FloatFact("testFact", "test desc", 10, mlDef)
val map = Map[String, Any]("testFact" -> 1.2)
assert(1.2 == fact.eventValue[Double](map))
}
// TODO - add tests for dimensional filtering
// TODO - add tests for dimensional filtering and weights
}
......@@ -22,7 +22,7 @@ class InputDefinitionTest extends UnitSpec {
val eachDimType = Set[Dimension](
new DateDimension("dateDim", "", 10, dateFormat),
new FloatDimension("floatDim", "", 20),
new IntegerDimension("intDim", "", 30),
new IntDimension("intDim", "", 30),
new StringDimension("stringDim", "", 40)
)
......@@ -30,7 +30,7 @@ class InputDefinitionTest extends UnitSpec {
val eachFactType = Set[Fact](
new DateFact("dateFact", "", 10, facMlDef, dateFormat),
new FloatFact("floatFact", "", 20, facMlDef),
new IntegerFact("intFact", "", 30, facMlDef),
new IntFact("intFact", "", 30, facMlDef),
new StringFact("stringFact", "", 40, facMlDef)
)
......@@ -52,8 +52,8 @@ class InputDefinitionTest extends UnitSpec {
test("Construction should contain the dims and fact") {
val temporal = new DateTemporal("testTemporal", "test desc", List[String](), temporalMlDef, dateFormat, -1)
val dims = Set[Dimension](new IntegerDimension("testDim", "test desc", 1))
val facts = Set[Fact](new IntegerFact("testFact", "test desc", 10, facMlDef))
val dims = Set[Dimension](new IntDimension("testDim", "test desc", 1))
val facts = Set[Fact](new IntFact("testFact", "test desc", 10, facMlDef))
val inputDef = new InputDefinition(temporal, dims, facts)
assert(temporal == inputDef.temporal)
assert(1 == inputDef.dimensions.size)
......@@ -278,6 +278,12 @@ class InputDefinitionTest extends UnitSpec {
assert(inputDef.temporal != null)
assert(inputDef.temporal.name == "temporal_field")
assert(inputDef.temporal.algoDef.asInstanceOf[LinearRegressionDefinition].flatten == 2)
assert(2 == inputDef.temporal.algoDef.asInstanceOf[SupervisedTraining].omitFields.size)
assert(inputDef.temporal.algoDef.asInstanceOf[SupervisedTraining].omitFields.contains("string_dim"))
assert(inputDef.temporal.algoDef.asInstanceOf[SupervisedTraining].omitFields.contains("int_fact"))
assert(2 == inputDef.temporal.algoDef.asInstanceOf[RegressionDefinition].weights.size)
assert(100 == inputDef.temporal.algoDef.asInstanceOf[RegressionDefinition].weights("hour_of_day"))
assert(10 == inputDef.temporal.algoDef.asInstanceOf[RegressionDefinition].weights("string_dim"))
assert(3 == inputDef.temporal.algoDef.asInstanceOf[LinearRegressionDefinition].polyDegree)
assert(inputDef.temporal.factPosition == -1)
assert(inputDef.temporal.algoDef.isInstanceOf[LinearRegressionDefinition])
......@@ -291,7 +297,7 @@ class InputDefinitionTest extends UnitSpec {
entry._1 match {
case "int_dim" =>
assert(entry._1 == entry._2.name)
assert(entry._2.isInstanceOf[IntegerDimension])
assert(entry._2.isInstanceOf[IntDimension])
case "float_dim" =>
assert(entry._1 == entry._2.name)
assert(entry._2.isInstanceOf[FloatDimension])
......@@ -310,7 +316,7 @@ class InputDefinitionTest extends UnitSpec {