Commit 3281f443 authored by abarth@webkit.org's avatar abarth@webkit.org

2010-04-05 Adam Barth <abarth@webkit.org>

        Unreviewed.

        Tighten SheriffBot's flaky test detector
        https://bugs.webkit.org/show_bug.cgi?id=37063

        Instead of just looking for two sequential red builds, look for two
        sequential failures of the same test.  This should reduce sheriffbot
        false positive substantially.

        I'm landing this change unreviewed because I've noticed SheriffBot
        triggering a lot more false positives now that we've expanded the set
        of core builders.  I've tried to take Eric's comments on Bug 37063 into
        account.  I'm happy to iterate on this patch tomorrow once Eric wakes
        up.

        * Scripts/webkitpy/common/net/buildbot.py:
        * Scripts/webkitpy/common/net/buildbot_unittest.py:
        * Scripts/webkitpy/tool/commands/queries.py:

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@57065 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 32dee7a4
2010-04-05 Adam Barth <abarth@webkit.org>
Unreviewed.
Tighten SheriffBot's flaky test detector
https://bugs.webkit.org/show_bug.cgi?id=37063
Instead of just looking for two sequential red builds, look for two
sequential failures of the same test. This should reduce sheriffbot
false positive substantially.
I'm landing this change unreviewed because I've noticed SheriffBot
triggering a lot more false positives now that we've expanded the set
of core builders. I've tried to take Eric's comments on Bug 37063 into
account. I'm happy to iterate on this patch tomorrow once Eric wakes
up.
* Scripts/webkitpy/common/net/buildbot.py:
* Scripts/webkitpy/common/net/buildbot_unittest.py:
* Scripts/webkitpy/tool/commands/queries.py:
2010-04-04 John Gregg <johnnyg@google.com>
Unreviewed, add myself to the committers list.
......
......@@ -136,44 +136,82 @@ class Builder(object):
)
return build
# FIXME: We should not have to pass a red_build_number, but rather Builder should
# know what its "latest_build" is.
def find_green_to_red_transition(self, red_build_number, look_back_limit=30):
# walk backwards until we find a green build
red_build = self.build(red_build_number)
green_build = None
def find_failure_transition(self, red_build, look_back_limit=30):
if not red_build or red_build.is_green():
return (None, None)
common_failures = None
current_build = red_build
build_after_current_build = None
look_back_count = 0
while red_build:
if look_back_count >= look_back_limit:
break
# Use a previous_build() method to avoid assuming build numbers are sequential.
before_red_build = red_build.previous_build()
if before_red_build and before_red_build.is_green():
green_build = before_red_build
while current_build:
if current_build.is_green():
# current_build can't possibly have any failures in common
# with red_build because it's green.
break
red_build = before_red_build
results = current_build.layout_test_results()
# We treat a lack of results as if all the test failed.
# This occurs, for example, when we can't compile at all.
if results:
failures = set(results.failing_tests())
if common_failures == None:
common_failures = failures
common_failures = common_failures.intersection(failures)
if not common_failures:
# current_build doesn't have any failures in common with
# the red build we're worried about. We assume that any
# failures in current_build were due to flakiness.
break
look_back_count += 1
return (green_build, red_build)
def suspect_revisions_for_green_to_red_transition(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
(last_green_build, first_red_build) = self.find_green_to_red_transition(red_build_number, look_back_limit)
if not last_green_build:
if look_back_count > look_back_limit:
return (None, current_build)
build_after_current_build = current_build
current_build = current_build.previous_build()
# We must iterate at least once because red_build is red.
assert(build_after_current_build)
# Current build must either be green or have no failures in common
# with red build, so we've found our failure transition.
return (current_build, build_after_current_build)
def blameworthy_revisions(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
red_build = self.build(red_build_number)
(last_good_build, first_bad_build) = \
self.find_failure_transition(red_build, look_back_limit)
if not last_good_build:
return [] # We ran off the limit of our search
# if avoid_flakey_tests, require at least 2 red builds before we suspect a green to red transition.
if avoid_flakey_tests and first_red_build == self.build(red_build_number):
# If avoid_flakey_tests, require at least 2 bad builds before we
# suspect a real failure transition.
if avoid_flakey_tests and first_bad_build == red_build:
return []
suspect_revisions = range(first_red_build.revision(), last_green_build.revision(), -1)
suspect_revisions = range(first_bad_build.revision(),
last_good_build.revision(),
-1)
suspect_revisions.reverse()
return suspect_revisions
class LayoutTestResults(object):
stderr_key = u'Tests that had stderr output:'
fail_key = u'Tests where results did not match expected results:'
timeout_key = u'Tests that timed out:'
crash_key = u'Tests that caused the DumpRenderTree tool to crash:'
missing_key = u'Tests that had no expected results (probably new):'
expected_keys = [
stderr_key,
fail_key,
crash_key,
timeout_key,
missing_key,
]
@classmethod
def _parse_results_html(cls, page):
parsed_results = {}
tables = BeautifulSoup(page).findAll("table")
for table in tables:
table_title = table.findPreviousSibling("p").string
if table_title not in cls.expected_keys:
raise "Unhandled title: %s" % str(table_title)
# We might want to translate table titles into identifiers at some point.
parsed_results[table_title] = [row.find("a").string for row in table.findAll("tr")]
......@@ -204,6 +242,10 @@ class LayoutTestResults(object):
def parsed_results(self):
return self._parsed_results
def failing_tests(self):
failing_keys = [self.fail_key, self.crash_key, self.timeout_key]
return sorted(sum([tests for key, tests in self._parsed_results.items() if key in failing_keys], []))
class Build(object):
def __init__(self, builder, build_number, revision, is_green):
......@@ -391,7 +433,7 @@ class BuildBot(object):
if builder_status["is_green"]:
continue
builder = self.builder_with_name(builder_status["name"])
revisions = builder.suspect_revisions_for_green_to_red_transition(builder_status["build_number"])
revisions = builder.blameworthy_revisions(builder_status["build_number"])
for revision in revisions:
failing_bots = revision_to_failing_bots.get(revision, [])
failing_bots.append(builder)
......
......@@ -34,42 +34,73 @@ from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
class BuilderTest(unittest.TestCase):
def setUp(self):
self.buildbot = BuildBot()
self.builder = Builder("Test Builder", self.buildbot)
def _install_fetch_build(self, failure):
def _mock_fetch_build(build_number):
return Build(
build = Build(
builder=self.builder,
build_number=build_number,
revision=build_number + 1000,
is_green=build_number < 4
)
build._layout_test_results = LayoutTestResults(
"http://buildbot.example.com/foo", {
LayoutTestResults.fail_key: failure(build_number),
})
return build
self.builder._fetch_build = _mock_fetch_build
def test_find_green_to_red_transition(self):
(green_build, red_build) = self.builder.find_green_to_red_transition(10)
def setUp(self):
self.buildbot = BuildBot()
self.builder = Builder("Test Builder", self.buildbot)
self._install_fetch_build(lambda build_number: ["test1", "test2"])
def test_find_failure_transition(self):
(green_build, red_build) = self.builder.find_failure_transition(self.builder.build(10))
self.assertEqual(green_build.revision(), 1003)
self.assertEqual(red_build.revision(), 1004)
(green_build, red_build) = self.builder.find_green_to_red_transition(10, look_back_limit=2)
(green_build, red_build) = self.builder.find_failure_transition(self.builder.build(10), look_back_limit=2)
self.assertEqual(green_build, None)
self.assertEqual(red_build.revision(), 1008)
def test_none_build(self):
self.builder._fetch_build = lambda build_number: None
(green_build, red_build) = self.builder.find_green_to_red_transition(10)
(green_build, red_build) = self.builder.find_failure_transition(self.builder.build(10))
self.assertEqual(green_build, None)
self.assertEqual(red_build, None)
def test_suspect_revisions_for_green_to_red_transition(self):
self.assertEqual(self.builder.suspect_revisions_for_green_to_red_transition(10), [1004])
self.assertEqual(self.builder.suspect_revisions_for_green_to_red_transition(10, look_back_limit=2), [])
def test_flaky_tests(self):
self._install_fetch_build(lambda build_number: ["test1"] if build_number % 2 else ["test2"])
(green_build, red_build) = self.builder.find_failure_transition(self.builder.build(10))
self.assertEqual(green_build.revision(), 1009)
self.assertEqual(red_build.revision(), 1010)
def test_failure_and_flaky(self):
self._install_fetch_build(lambda build_number: ["test1", "test2"] if build_number % 2 else ["test2"])
(green_build, red_build) = self.builder.find_failure_transition(self.builder.build(10))
self.assertEqual(green_build.revision(), 1003)
self.assertEqual(red_build.revision(), 1004)
def test_no_results(self):
self._install_fetch_build(lambda build_number: ["test1", "test2"] if build_number % 2 else ["test2"])
(green_build, red_build) = self.builder.find_failure_transition(self.builder.build(10))
self.assertEqual(green_build.revision(), 1003)
self.assertEqual(red_build.revision(), 1004)
def test_failure_after_flaky(self):
self._install_fetch_build(lambda build_number: ["test1", "test2"] if build_number > 6 else ["test3"])
(green_build, red_build) = self.builder.find_failure_transition(self.builder.build(10))
self.assertEqual(green_build.revision(), 1006)
self.assertEqual(red_build.revision(), 1007)
def test_blameworthy_revisions(self):
self.assertEqual(self.builder.blameworthy_revisions(10), [1004])
self.assertEqual(self.builder.blameworthy_revisions(10, look_back_limit=2), [])
# Flakey test avoidance requires at least 2 red builds:
self.assertEqual(self.builder.suspect_revisions_for_green_to_red_transition(4), [])
self.assertEqual(self.builder.suspect_revisions_for_green_to_red_transition(4, avoid_flakey_tests=False), [1004])
self.assertEqual(self.builder.blameworthy_revisions(4), [])
self.assertEqual(self.builder.blameworthy_revisions(4, avoid_flakey_tests=False), [1004])
# Green builder:
self.assertEqual(self.builder.suspect_revisions_for_green_to_red_transition(3), [])
self.assertEqual(self.builder.blameworthy_revisions(3), [])
def test_build_caching(self):
self.assertEqual(self.builder.build(10), self.builder.build(10))
......
......@@ -130,11 +130,12 @@ class WhatBroke(AbstractDeclarativeCommand):
print " Reviewer: %s" % (commit_info.reviewer() or commit_info.reviewer_text())
print " Committer: %s" % commit_info.committer()
# FIXME: This is slightly different from Builder.suspect_revisions_for_green_to_red_transition
# FIXME: This is slightly different from Builder.blameworthy_revisions
# due to needing to detect the "hit the limit" case an print a special message.
def _print_blame_information_for_builder(self, builder_status, name_width, avoid_flakey_tests=True):
builder = self.tool.buildbot.builder_with_name(builder_status["name"])
(last_green_build, first_red_build) = builder.find_green_to_red_transition(builder_status["build_number"])
red_build = builder.build(builder_status["build_number"])
(last_green_build, first_red_build) = builder.find_failure_transition(red_build)
if not first_red_build:
self._print_builder_line(builder.name(), name_width, "FAIL (error loading build information)")
return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment