Commit 88cba7cd authored by Steven Pisarski's avatar Steven Pisarski

Cleanup and replacement of configuration and historical files required for automated testing.

parent 8cf6d3d2
......@@ -245,7 +245,7 @@ class FormatAndRoute(val outputDefs: Set[OutputDefinition]) extends Actor {
val actor = outDef.protocol match {
case "stomp" =>
context.actorOf(
Props(classOf[StompRouter], outDef.host, outDef.port, outDef.name, outDef.routeType),
Props(classOf[StompRouter], outDef.host, outDef.port, outDef.name, outDef.routeType, outDef.user, outDef.pass),
s"StompRouter-${outDef.name}")
case "rabbitMq" =>
context.actorOf(
......
......@@ -36,7 +36,7 @@ class AnalyzeDataYamlTest extends UnitSpec {
assert("/tmp/cm/analysis/dimEventsCount.txt" == config.dimEventsCountUri)
assert("/tmp/cm/analysis/factValues.txt" == config.factValuesUri)
assert("/tmp/cm/analysis/temporalTrainingMetrics.txt" == config.temporalTrainingMetricsUri)
assert("/tmp/cm/analysis/factTrainingMetrics.txt" == config.factTrainingMetricsUri)
assert("/tmp/cm/analysis/factTrainingMetrics" == config.factTrainingMetricsUri)
assert(9 == config.numThreads)
}
......
......@@ -14,10 +14,10 @@ class EngineTest extends EngineTester {
val dateFormat = "MM-dd-yyyy HH:mm:ss a"
val dateFormatter = new SimpleDateFormat(dateFormat)
var inputDef = InputDefinition.inputDefinition(
new FileInputStream(new File("testData/generator/definition/cm-constant-short-input.yaml")))
new FileInputStream(new File("testData/cm/definition/cm-constant-short-input.yaml")))
val outputDefs = OutputDefinition.outputDefinitions(
new FileInputStream(new File("testData/cm/definition/cm-out.yaml")), inputDef)
val eventUri = new File("testData/generator/events/cm.txt").toURI.toString
val eventUri = new File("testData/cm/events/cm_1a.txt").toURI.toString
val delim = '|'
val outDef = outputDefs.iterator.next()
......
......@@ -14,7 +14,7 @@ class GeneratorCmConstantTest extends EngineTester {
val dateFormat = "MM-dd-yyyy HH:mm:ss a"
val dateFormatter = new SimpleDateFormat(dateFormat)
val inputDef = InputDefinition.inputDefinition(
new FileInputStream(new File("testData/cm/definition/cm-constant-input.yaml")))
new FileInputStream(new File("testData/cm/definition/cm-constant-short-input.yaml")))
val outputDefs = OutputDefinition.outputDefinitions(
new FileInputStream(new File("testData/cm/definition/cm-out.yaml")), inputDef)
val eventUri = new File("testData/cm/events/cm_1a.txt").toURI.toString
......
......@@ -14,7 +14,7 @@ class GeneratorCmLinearRegressionTest extends EngineTester {
val dateFormat = "MM-dd-yyyy HH:mm:ss a"
val dateFormatter = new SimpleDateFormat(dateFormat)
val inputDef = InputDefinition.inputDefinition(
new FileInputStream(new File("testData/cm/definition/cm-linear-regression-input.yaml")))
new FileInputStream(new File("testData/cm/definition/cm-constant-short-input.yaml")))
val outputDefs = OutputDefinition.outputDefinitions(
new FileInputStream(new File("testData/cm/definition/cm-out.yaml")), inputDef)
val eventUri = new File("testData/cm/events/cm_1a.txt").toURI.toString
......
......@@ -13,7 +13,7 @@ class CmFactPredictionsTest extends FactPredictionsTester {
val dateFormat = "MM-dd-yyyy HH:mm:ss a"
val dateFormatter = new SimpleDateFormat(dateFormat)
val inputDef = InputDefinition.inputDefinition(
new FileInputStream(new File("testData/cm/definition/cm-fact-pred-input.yaml")))
new FileInputStream(new File("testData/cm/definition/cm-constant-short-input.yaml")))
val eventUri = new File("testData/cm/events/cm_1a.txt").toURI.toString
val delim = '|'
......
......@@ -12,7 +12,7 @@ abstract class FactPredictionsTester extends AnalyzerTester {
// minimal training set
var ignoreFacts = Set[String]()
analyzerTest("Analyze fact preditions to ensure the average prediction is within 95% of the average label") {
analyzerTest("Analyze fact preditions to ensure the average prediction is within 90% of the average label") {
inputDef.positionalFacts.filter(f => !ignoreFacts.contains(f.name))foreach(fact => {
val trainingSet = analyzer.factTrainingSet(fact.name)
assert(trainingSet != null && trainingSet.count() == 1000)
......@@ -26,7 +26,7 @@ abstract class FactPredictionsTester extends AnalyzerTester {
}
println(s"Validating model for fact with name - ${fact.name}")
ModelValidator.validateModel(model, trainingSet.collect(), analyzer, 0.95, outputValues = false)
ModelValidator.validateModel(model, trainingSet.collect(), analyzer, 0.90, outputValues = false)
})
}
}
1.0,2.0
1.5,2.5
2.0,3.0
2.5,3.5
3.0,4.0
\ No newline at end of file
sparkUri: local[8]
appName: CM-Analyzer-small
schemaUri: testData/cm/definition/cm-constant-input-fact-pred-test.yaml
fileDelim: "|"
eventsUri: testData/cm/events/
factTrainingMetricsUri: /Users/spisarski/tmp/factTraining-large/
numThreads: 10
\ No newline at end of file
sparkUri: local[8]
appName: CM-Analyzer-small
schemaUri: testData/cm/definition/cm-constant-input-fact-pred-test.yaml
fileDelim: "|"
eventsUri: testData/cm/events/cm_1a.txt
factTrainingMetricsUri: /Users/spisarski/tmp/factTraining-small/
numThreads: 10
\ No newline at end of file
......@@ -11,5 +11,5 @@ dimCountUri: /tmp/cm/analysis/dimCount.txt
dimEventsCountUri: /tmp/cm/analysis/dimEventsCount.txt
factValuesUri: /tmp/cm/analysis/factValues.txt
temporalTrainingMetricsUri: /tmp/cm/analysis/temporalTrainingMetrics.txt
factTrainingMetricsUri: /tmp/cm/analysis/factTrainingMetrics.txt
factTrainingMetricsUri: /tmp/cm/analysis/factTrainingMetrics
numThreads: 9
\ No newline at end of file
temporal:
name: poll_date
description: Date of CM poll
type: date
dateFormat: MM-dd-yyyy-HH:mm:ss
factPosition: -1
denormFields:
- day_of_week
- day_of_month
- day_of_year
- hour_of_day
algo:
name: constant
constType: integer
constVal: 60000
dimensions:
- name: cmts
description: The CMTS name
type: string
position: 10
- name: node
description: The Node name
type: string
position: 20
- name: mac
description: The MAC address
type: string
position: 30
- name: lat
description: The geo latitude
type: float
position: 40
- name: lng
description: The geo longitude
type: float
position: 50
facts:
- name: downstream_receive_power_num
description: fact 1-out
type: float
position: 10
algo:
name: linearRegression
flatten: 2
polyDegree: 2
iterations: 50
stepSize: 0.01
temporal:
name: poll_date
description: Date of CM poll
type: date
dateFormat: MM-dd-yyyy-HH:mm:ss
factPosition: -1
denormFields:
- day_of_week
- day_of_month
- day_of_year
- hour_of_day
algo:
name: constant
constType: integer
constVal: 28800000
dimensions:
- name: cmts
description: The CMTS name
type: string
position: 10
- name: node
description: The Node name
type: string
position: 20
- name: mac
description: The MAC address
type: string
position: 30
- name: lat
description: The geo latitude
type: float
position: 40
- name: lng
description: The geo longitude
type: float
position: 50
facts:
- name: downstream_receive_power_num
description: fact 1-out
type: float
position: 100
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: upstream_transmit_power_num
description: fact 2-out
type: float
position: 110
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: downstream_snr_rt
description: fact 3-out
type: float
position: 130
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: t3_timeouts_cnt
description: fact 4
type: integer
position: 40
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: t4_timeouts_cnt
description: fact 4b
type: integer
position: 41
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: lost_syncs_cnt
description: fact 5
type: integer
position: 50
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: resets_cnt
description: fact 6
type: integer
position: 60
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: ds_fec_corrected_cnt
description: fact 7
type: integer
position: 70
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: ds_fec_uncorrected_cnt
description: fact 8
type: integer
position: 80
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
- name: ds_fec_unerrored_cnt
description: fact 9
type: integer
position: 90
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
......@@ -12,7 +12,7 @@ temporal:
algo:
name: constant
constType: integer
constVal: 60000
constVal: 1000
dimensions:
- name: cmts
description: The CMTS name
......@@ -38,90 +38,360 @@ facts:
- name: downstream_receive_power_num
description: fact 1-out
type: float
position: 10
position: 140
algo:
name: linearRegression
flatten:
mode: log
base: 2
iterations: 3
polyDegree: 3
iterations: 20
stepSize: 0.001
iterations: 75
stepSize: 0.002
weights:
- name: day_of_week
weight: 10
- name: day_of_month
weight: 10
- name: day_of_year
weight: 10
- name: hour_of_day
weight: 50
- name: month_of_year
weight: 25
- name: year
weight: 10
- name: cmts
weight: 150
- name: node
weight: 200
- name: mac
weight: 250
- name: lat
weight: 250
- name: lng
weight: 250
- name: upstream_transmit_power_num
description: fact 2-out
type: float
position: 20
position: 110
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
flatten:
mode: log
base: 5
iterations: 2
polyDegree: 2
iterations: 75
stepSize: 0.04
weights:
- name: day_of_week
weight: 10
- name: day_of_month
weight: 10
- name: day_of_year
weight: 10
- name: hour_of_day
weight: 100
- name: month_of_year
weight: 50
- name: year
weight: 10
- name: cmts
weight: 100
- name: node
weight: 100
- name: mac
weight: 150
- name: lat
weight: 150
- name: lng
weight: 150
- name: downstream_snr_rt
description: fact 3-out
type: float
position: 30
position: 130
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
flatten:
mode: log
base: 5
iterations: 2
polyDegree: 2
iterations: 75
stepSize: 0.05
weights:
- name: day_of_week
weight: 10
- name: day_of_month
weight: 10
- name: day_of_year
weight: 10
- name: hour_of_day
weight: 100
- name: month_of_year
weight: 50
- name: year
weight: 10
- name: cmts
weight: 100
- name: node
weight: 100
- name: mac
weight: 150
- name: lat
weight: 150
- name: lng
weight: 150
- name: t3_timeouts_cnt
description: fact 4
type: integer
position: 40
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
flatten:
mode: log
base: 5
iterations: 2
polyDegree: 4
iterations: 30
stepSize: 0.02
weights:
- name: day_of_week
weight: 10
- name: day_of_month
weight: 10
- name: day_of_year
weight: 10
- name: hour_of_day
weight: 100
- name: month_of_year
weight: 50
- name: year
weight: 10
- name: cmts
weight: 100
- name: node
weight: 100
- name: mac
weight: 150
- name: lat
weight: 150
- name: lng
weight: 150
- name: t4_timeouts_cnt
description: fact 4b
type: integer
position: 40
position: 41
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
flatten:
mode: log
base: 5
iterations: 2
polyDegree: 4
iterations: 30
stepSize: 0.02
weights:
- name: day_of_week
weight: 10
- name: day_of_month
weight: 10
- name: day_of_year
weight: 10
- name: hour_of_day
weight: 100
- name: month_of_year
weight: 50
- name: year
weight: 10
- name: cmts
weight: 100
- name: node
weight: 100
- name: mac
weight: 150
- name: lat
weight: 150
- name: lng
weight: 150
- name: lost_syncs_cnt
description: fact 5
type: integer
position: 50
algo:
name: linearRegression
flatten:
mode: log
base: 5
iterations: 3
polyDegree: 3
iterations: 20
stepSize: 0.001
iterations: 50
stepSize: 0.02
weights:
- name: day_of_week
weight: 10
- name: day_of_month
weight: 10
- name: day_of_year
weight: 10
- name: hour_of_day
weight: 100
- name: month_of_year
weight: 50
- name: year
weight: 10
- name: cmts
weight: 100
- name: node
weight: 100
- name: mac
weight: 150
- name: lat
weight: 150
- name: lng
weight: 150
- name: resets_cnt
description: fact 6
type: integer
position: 60
algo:
name: linearRegression
polyDegree: 3
iterations: 20
stepSize: 0.001
flatten:
mode: log
base: 5
iterations: 2
polyDegree: 2
iterations: 70
stepSize: 0.025
weights:
- name: day_of_week
weight: 10
- name: day_of_month
weight: 10
- name: day_of_year
weight: 10
- name: hour_of_day
weight: 100
- name: month_of_year
weight: 50
- name: year
weight: 10
- name: cmts
weight: 100
- name: node
weight: 100
- name: mac
weight: 150
- name: lat
weight: 150
- name: lng
weight: 150