Commit 31978dd8 by Steven Pisarski

### Making code more functional

parent 41eace3c
 ... ... @@ -15,14 +15,9 @@ object AlgorithmUtil { * their sizes will equal values.size * degree */ def polynomialWithWeights(degree: Int, values: List[Double], weights: List[Double]): (List[Double], List[Double]) = { var thisDegree = degree if (degree < 1) thisDegree = 1 if (degree != 1) { (polynomial(thisDegree, values), polynomialWeights(thisDegree, weights)) } else { (values, weights) } val thisDegree = if (degree < 1) 1 else degree if (thisDegree != 1) (polynomial(thisDegree, values), polynomialWeights(thisDegree, weights)) else (values, weights) } /** ... ... @@ -33,10 +28,8 @@ object AlgorithmUtil { * @return - a new list who's size is = values.size * degree */ def polynomial(degree: Int, values: List[Double]): List[Double] = { var thisDegree = degree if (degree < 1) thisDegree = 1 if (degree != 1) { val thisDegree = if (degree < 1) 1 else degree if (thisDegree != 1) { val out = Array.fill[Double](values.size * thisDegree)(0d) for (col <- 0 to values.size - 1) { for (deg <- 0 to thisDegree - 1) { ... ... @@ -45,9 +38,7 @@ object AlgorithmUtil { } } out.toList } else { values } } else values } /** ... ... @@ -57,8 +48,7 @@ object AlgorithmUtil { * @return - the weights with a polynomial function applied */ def polynomialWeights(degree: Int, weights: List[Double]): List[Double] = { var thisDegree = degree if (degree < 1) thisDegree = 1 val thisDegree = if (degree < 1) 1 else degree if (thisDegree != 1) { val out = Array.fill[Double](weights.size * thisDegree)(0d) for (col <- 0 to weights.size - 1) { ... ... @@ -68,9 +58,7 @@ object AlgorithmUtil { } } out.toList } else { weights } } else weights } /** ... ... @@ -80,17 +68,14 @@ object AlgorithmUtil { * @param numIter - the number of times to perform the flattening function * @return - a new list of flattened features */ def flatten(features: List[Double], numIter: Int): List[Double] = { def flatten(features: List[Double], numIter: Int): List[Double] = if (numIter > 0) { val flattened = features.map(num => { if (num == 0) 0 else Math.log(Math.abs(num)) }) flatten(flattened, numIter - 1) } else { features } } } else features /** * Returns a tuple containing the list of sorted dates corresponding, a list of durations ... ... @@ -102,6 +87,7 @@ object AlgorithmUtil { val sorted = dates.sortBy(x => x.getTime) val durations = new ArrayBuffer[Double]() var prevDate = new Date(0) // TODO - try to achieve same with a higher order function or recursion sorted.foreach(p => { if (prevDate.getTime == 0) { durations += 0d ... ... @@ -110,11 +96,7 @@ object AlgorithmUtil { } prevDate = p }) var avg = 0d if (durations.size > 1) { avg = durations.sum / (durations.size - 1) } val avg = if (durations.size > 1) durations.sum / (durations.size - 1) else 0d assert(sorted.size == durations.size) (sorted, durations.toList, avg) } ... ...
 ... ... @@ -85,38 +85,26 @@ trait ClassificationModel extends SupervisedModel { val pred = model.predict(new DenseVector(features.toArray)) val key = closestKey(pred, labelKeys.size / 2, 0) val label = labelMap.get(key) if (label == None) { throw new RuntimeException("Predicted value not contained in the label map") } else { label.get } if (label == None) throw new RuntimeException("Predicted value not contained in the label map") else label.get } /** * Returns the closest key from the predicted to the list holding the classification values * TODO - write unit test for this private method */ private[this] def closestKey(value: Double, index: Int, lastIndex: Int): Double = { if(index == lastIndex) { if (index < labelKeys.size && index >= 0 && labelKeys.size > 0) { labelKeys(index) } else { 0 } } else { var newIndex = 0 if(value > labelKeys(index)) { newIndex = index / 2 } else { newIndex = (labelKeys.size - index) / 2 + index } private[this] def closestKey(value: Double, index: Int, lastIndex: Int): Double = if(index == lastIndex) if (index < labelKeys.size && index >= 0 && labelKeys.size > 0) labelKeys(index) else 0 else { val newIndex = if(value > labelKeys(index)) index / 2 else(labelKeys.size - index) / 2 + index closestKey(value, newIndex, index) } } override def predictRaw(features: List[Double]): Double = { model.predict(new DenseVector(features.toArray)) } override def predictRaw(features: List[Double]): Double = model.predict(new DenseVector(features.toArray)) } /** ... ... @@ -135,9 +123,7 @@ trait RegressionModel extends SupervisedModel { field.convert(predictRaw(features)) } override def predictRaw(features: List[Double]): Double = { model.predict(new DenseVector(features.toArray)) } override def predictRaw(features: List[Double]): Double = model.predict(new DenseVector(features.toArray)) } /** ... ... @@ -160,15 +146,15 @@ class NaiveBayesModel(override val field: Field, override val trainingSet: RDD[L * @param numIterations - the number of times gradient descent will run during training * @param stepSize - the size of each step taken during gradient descent */ class LinearRegressionModel(override val field: Field, override val trainingSet: RDD[LabeledPoint], val weights: List[Double] = List[Double](), val numIterations: Int, val stepSize: Double) extends RegressionModel { private[algorithm] var model:SparkRegressionModel = null class LinearRegressionModel(override val field: Field, override val trainingSet: RDD[LabeledPoint], val weights: List[Double] = List[Double](),val numIterations: Int, val stepSize: Double) extends RegressionModel { val featuresSize = trainingSet.first().features.size if (weights.isEmpty) { model = LinearRegressionWithSGD.train(trainingSet, numIterations, stepSize, 1.0) } else { private[algorithm] val model = if (weights.isEmpty) LinearRegressionWithSGD.train(trainingSet, numIterations, stepSize, 1.0) else { require(weights.size == featuresSize) model = LinearRegressionWithSGD.train(trainingSet, numIterations, stepSize, 1.0, new DenseVector(weights.toArray)) LinearRegressionWithSGD.train(trainingSet, numIterations, stepSize, 1.0, new DenseVector(weights.toArray)) } } ... ...
 ... ... @@ -90,6 +90,7 @@ object SparkAnalyzer { val fileSystem = FileSystem.get(new URI(fileUri), new Configuration()) val fileEntries = fileSystem.listStatus(new Path(fileUri)) logger.info(s"Searching \${fileEntries.length} files for the proper header with the delimiter '\$delim'") // TODO - try and make more functional var hdrs: Array[String] = null var headerLine = "" var ctr = 0 ... ... @@ -137,6 +138,7 @@ class SparkAnalyzer(val data: RDD[(String, Date, Map[String, Any])], val inputDe data.groupBy(_._1).flatMap[(String, Map[String, Any])](p => { var outVals = ("", Map[String, Any]()) var date = new Date(0) // TODO - try and make more functional p._2.foreach(e => { if (e._2.getTime > date.getTime) { date = e._2 ... ... @@ -276,6 +278,7 @@ class SparkAnalyzer(val data: RDD[(String, Date, Map[String, Any])], val inputDe var list = new ArrayBuffer[Double]() var earliest = new Date() var latest = new Date(0) // TODO - try and make more functional durations.foreach(duration => { duration._2._2.filter(_ != 0).foreach(d => { total += d ... ...
 ... ... @@ -36,7 +36,7 @@ abstract class InputField(override val name: String, override val description: S abstract class Temporal(override val name: String, override val description: String = "", val denormFields: List[String], val algoDef: AlgorithmDefinition, override val factIndex: Int) extends InputField(name, description) with TemporalRole { def denormalize(event: Map[String, Any]): mutable.LinkedHashMap[String, Long] def denormalize(event: Map[String, Any]): Map[String, Long] } /** ... ... @@ -56,9 +56,11 @@ class DateTemporal(override val name: String, override val description: String = * @param event - the event to parse * @return - the map */ override def denormalize(event: Map[String, Any]): mutable.LinkedHashMap[String, Long] = { override def denormalize(event: Map[String, Any]): Map[String, Long] = { val date = eventValue[Date](event) val dt = new DateTime(date) // TODO - try and make more functional var outMap = new mutable.LinkedHashMap[String, Long]() /** ... ... @@ -97,14 +99,12 @@ class DateTemporal(override val name: String, override val description: String = // do nothing } if (denormFields.size > 0) { if (denormFields.size > 0) denormFields.foreach((field: String) => { mapField(field) }) } else { mapField("timestamp") } outMap else mapField("timestamp") outMap.toMap } } ... ...
 ... ... @@ -22,7 +22,7 @@ object EventUtil { * @param input - the JsonInput * @return - returns an InputDefinition */ // TODO - use YAML instead and maybe support a single input/output definition // TODO - use YAML instead and maybe support a single input/output definition and it will be much more functional def inputDefinition(input: JsonInput): InputDefinition = { val values = parse(input).values.asInstanceOf[Map[String, Map[String, Any]]] ... ...