package com.cablelabs.eventgen.model object MachineLearning { /** * Derived from https://github.com/adityashah30/multipolyfit/blob/master/src/polynomial/PolynomialRegression.java * @param values - the values to convert * @return - tuple2 where the two Lists (values/weights) have a polynomial applied * their sizes will equal values.size * degree */ // TODO - consider moving this method to the SupervisedModel trait private[model] def polynomialWithWeights(degree: Int, values: Seq[Double], weights: Seq[Double]): (Seq[Double], Seq[Double]) = { val thisDegree = if (degree < 1) 1 else degree if (thisDegree != 1) (polynomial(thisDegree, values), polynomialWeights(thisDegree, weights)) else (values, weights) } /** * Applies a polynomial function to a ML feature set * Derived from https://github.com/adityashah30/multipolyfit/blob/master/src/polynomial/PolynomialRegression.java * @param degree - the polynomial degree. if < 1, 1 will be used * @param values - the values to which to apply the polynomial function * @return - a new list who's size is = values.size * degree */ // TODO - consider moving this method to the SupervisedModel trait def polynomial(degree: Int, values: Seq[Double]): Seq[Double] = { val thisDegree = if (degree < 1) 1 else degree if (thisDegree != 1) { val out = Array.fill[Double](values.size * thisDegree)(0d) for (col <- 0 to values.size - 1) { for (deg <- 0 to thisDegree - 1) { val index = col * thisDegree + deg out(index) = Math.pow(values(col), deg + 1) } } out.toSeq } else values } /** * Derives the field weights based on the polynomial degree parameter * @param degree - the polynomial degree. if < 1, 1 will be used * @param weights - the weights without any polynomial function applied * @return - the weights with a polynomial function applied */ // TODO - consider moving this method to the SupervisedModel trait def polynomialWeights(degree: Int, weights: Seq[Double]): Seq[Double] = { val thisDegree = if (degree < 1) 1 else degree if (thisDegree != 1) { val out = Array.fill[Double](weights.size * thisDegree)(0d) for (col <- 0 to weights.size - 1) { for (deg <- 0 to thisDegree - 1) { val index = col * thisDegree + deg out(index) = weights(col) } } out.toSeq } else weights } } /** * Algorithm trait - right now there is nothing to define but keeping for semantic and future extension reasons */ trait AlgorithmDefinition extends Serializable /** * Trait for algorithms that require supervised training */ trait SupervisedTraining extends AlgorithmDefinition { /** * The names of the fields to omit from the algorithm's training set * @return */ def omitFields: Set[String] /** * Partial function to flatten out the feature set values * @return */ def flatten: (Seq[Double]) => Seq[Double] /** * Denotes the polynomial degrees to apply to the machine learning feature set * @return */ def polyDegree: Int } /** * Trait for regression algorithms */ trait RegressionDefinition extends SupervisedTraining { /** * Configured values used for field weights where the key will be the field name. When not configured, the * weight value for the given field will be 0 * @return */ def weights: Map[String, Int] } /** * Trait for classification algorithms */ trait ClassificationDefinition extends SupervisedTraining /** * Trait to define algorithms that return a constant value when the Algorithm predict() method is called */ trait ConstantDefinition extends AlgorithmDefinition { def value: Any } /** * Attributes required for training a Linear Regression algorithm * @param iterations - the number of gradient descent iterations used during the training phase * @param stepSize - amount to move the point during gradient descent for each step iteration */ class LinearRegressionDefinition(override val omitFields: Set[String], override val weights: Map[String, Int], override val flatten: (Seq[Double]) => Seq[Double], override val polyDegree: Int = 1, val iterations: Int, val stepSize: Double) extends RegressionDefinition /** * Attributes required for training a Naive Bayes classification algorithm * @param lambda - the smoothing parameter used when training the predictive algoritm */ class NaiveBayesDefinition(override val omitFields: Set[String], override val flatten: (Seq[Double]) => Seq[Double], override val polyDegree: Int = 1,val lambda: Double) extends ClassificationDefinition /** * Definition of algorithm that always returns the invValue as its predicted value */ class ConstantIntDefinition(val value: Any) extends ConstantDefinition /** * Definition of algorithm that always returns the invValue as its predicted value */ class ConstantFloatDefinition(val value: Any) extends ConstantDefinition /** * Definition of algorithm that always returns the invValue as its predicted value */ class ConstantStringDefinition(val value: Any) extends ConstantDefinition