MachineLearning.scala 5.19 KB
Newer Older
1 2
package com.cablelabs.eventgen.model

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
object MachineLearning {

  /**
   * Derived from https://github.com/adityashah30/multipolyfit/blob/master/src/polynomial/PolynomialRegression.java
   * @param values - the values to convert
   * @return - tuple2 where the two Lists (values/weights) have a polynomial applied
   *           their sizes will equal values.size * degree
   */
  // TODO - consider moving this method to the SupervisedModel trait
  private[model] def polynomialWithWeights(degree: Int, values: Seq[Double], weights: Seq[Double]): (Seq[Double], Seq[Double]) = {
    val thisDegree = if (degree < 1) 1 else degree
    if (thisDegree != 1) (polynomial(thisDegree, values), polynomialWeights(thisDegree, weights))
    else (values, weights)
  }

  /**
   * Applies a polynomial function to a ML feature set
   * Derived from https://github.com/adityashah30/multipolyfit/blob/master/src/polynomial/PolynomialRegression.java
   * @param degree - the polynomial degree. if < 1, 1 will be used
   * @param values - the values to which to apply the polynomial function
   * @return - a new list who's size is = values.size * degree
   */
  // TODO - consider moving this method to the SupervisedModel trait
  def polynomial(degree: Int, values: Seq[Double]): Seq[Double] = {
    val thisDegree = if (degree < 1) 1 else degree
    if (thisDegree != 1) {
      val out = Array.fill[Double](values.size * thisDegree)(0d)
      for (col <- 0 to values.size - 1) {
        for (deg <- 0 to thisDegree - 1) {
          val index = col * thisDegree + deg
          out(index) = Math.pow(values(col), deg + 1)
        }
      }
      out.toSeq
    } else values
  }

  /**
   * Derives the field weights based on the polynomial degree parameter
   * @param degree - the polynomial degree. if < 1, 1 will be used
   * @param weights - the weights without any polynomial function applied
   * @return - the weights with a polynomial function applied
   */
  // TODO - consider moving this method to the SupervisedModel trait
  def polynomialWeights(degree: Int, weights: Seq[Double]): Seq[Double] = {
    val thisDegree = if (degree < 1) 1 else degree
    if (thisDegree != 1) {
      val out = Array.fill[Double](weights.size * thisDegree)(0d)
      for (col <- 0 to weights.size - 1) {
        for (deg <- 0 to thisDegree - 1) {
          val index = col * thisDegree + deg
          out(index) = weights(col)
        }
      }
      out.toSeq
    } else weights
  }
}

62 63 64
/**
 * Algorithm trait - right now there is nothing to define but keeping for semantic and future extension reasons
 */
65
trait AlgorithmDefinition extends Serializable
66

67 68 69
/**
 * Trait for algorithms that require supervised training
 */
70 71
trait SupervisedTraining extends AlgorithmDefinition {

72 73 74 75 76 77
  /**
   * The names of the fields to omit from the algorithm's training set
   * @return
   */
  def omitFields: Set[String]

78
  /**
79
   * Partial function to flatten out the feature set values
80 81
   * @return
   */
82
  def flatten: (Seq[Double]) => Seq[Double]
83 84 85 86 87

  /**
   * Denotes the polynomial degrees to apply to the machine learning feature set
   * @return
   */
88
  def polyDegree: Int
89
}
90

91 92 93
/**
 * Trait for regression algorithms
 */
94 95 96 97 98 99 100 101 102
trait RegressionDefinition extends SupervisedTraining {

  /**
   * Configured values used for field weights where the key will be the field name. When not configured, the
   * weight value for the given field will be 0
   * @return
   */
  def weights: Map[String, Int]
}
103 104 105 106

/**
 * Trait for classification algorithms
 */
107 108 109 110 111 112
trait ClassificationDefinition extends SupervisedTraining

/**
 * Trait to define algorithms that return a constant value when the Algorithm predict() method is called
 */
trait ConstantDefinition extends AlgorithmDefinition {
113
  def value: Any
114
}
115 116 117 118 119 120

/**
 * Attributes required for training a Linear Regression algorithm
 * @param iterations - the number of gradient descent iterations used during the training phase
 * @param stepSize - amount to move the point during gradient descent for each step iteration
 */
121
class LinearRegressionDefinition(override val omitFields: Set[String], override val weights: Map[String, Int],
122 123 124
                                 override val flatten: (Seq[Double]) => Seq[Double],
                                 override val polyDegree: Int = 1, val iterations: Int,
                                 val stepSize: Double)
125
  extends RegressionDefinition
126 127 128 129 130

/**
 * Attributes required for training a Naive Bayes classification algorithm
 * @param lambda - the smoothing parameter used when training the predictive algoritm
 */
131 132
class NaiveBayesDefinition(override val omitFields: Set[String],
                           override val flatten: (Seq[Double]) => Seq[Double],
133 134
                           override val polyDegree: Int = 1,val lambda: Double)
  extends ClassificationDefinition
135

136 137 138
/**
 * Definition of algorithm that always returns the invValue as its predicted value
 */
139
class ConstantIntDefinition(val value: Any) extends ConstantDefinition
140 141 142
/**
 * Definition of algorithm that always returns the invValue as its predicted value
 */
143
class ConstantFloatDefinition(val value: Any) extends ConstantDefinition
144 145 146 147

/**
 * Definition of algorithm that always returns the invValue as its predicted value
 */
148
class ConstantStringDefinition(val value: Any) extends ConstantDefinition