wktesthunter

#!/usr/bin/env python3

import sys
import argparse

import re
import subprocess
import signal

from collections import Counter

from hunterpy.definitions import bots, NOERROR, UNKNOWN
from hunterpy.jsonresults import ResultsInterpreter, ResultsParser
from hunterpy.utils import OptionalColorText, sigterm_this_process

extrahelp = '''
About the colors:

    - A red color means that the test result and the expectation didn't matched.
    - A green color means that the test result was expected.
      - If 'FAIL' is on the expectation, then the test matches the expectation
        if the result is any of 'FAIL', 'TEXT', 'IMAGE+TEXT' or 'AUDIO'.
    - No color means UNKNOWN (no data)


The meaning of the possible results for a test on a revision is:

    "UNKNOWN" means that we have no data for that revision. This means that
    the bot didn't ran the test for this revision. This could have been
    caused by a compilation failure on that revision, or could have been
    that the bot skipped that revision because of the size of the build queue
    at that moment, or maybe the bot was offline.... etc.


    "NOERROR" means that we don't know the exactly result of this test on that revision,
    but we know that the complete set of layout tests finished on that revision and no
    specific problem was reported with this test.
    This usually means one of this four options:
    - The test executed without failure (if the test was not flagged on TestExpectations).
    - The test gave the failure expected on TestExpectations.
    - The test didn't exist by that revision.
    - The test was marked to be skipped.


    "IMAGE" means ImageOnlyFailure

    "MISSING" means Missing results.

    "PASS" means the test passed. This can be bad (if the color is red means that the test
     was not expected to pass according to TestExpectations).

    "TEXT" means text failure.

    "CRASH" means the test crashed.

    "AUDIO" means audio failure

    "FAIL" means failure

Note for lazy people:

    * As an extra goodie from python's argparse, the parameters can be shorted to any length
      meanwhile there is no possible confusion between them. For example, you can use any of
      "--onlyprinterr", "--only" or "--o"; or you can use "--mergeunknown", "--merge" or "--m"
'''


def interval_print(firstrev, lastrev, result, startcolor):
    if lastrev == None:
        startstr=("%d@main" %firstrev)
    else:
        startstr=("[%d@main-%d@main]" %(firstrev,lastrev))

    if startcolor != None:
        endcolor='\033[0m'
    else:
        endcolor=''
        startcolor=''

    print('%s%s%s%s%s' %(startcolor,startstr," "*(30-len(startstr)),result,endcolor))


def get_percent_str(part, total):
    percent_part = float(100) * float(part) / float(total)
    return "%d%%" % int(percent_part) if percent_part.is_integer() else "%.1f%%" % percent_part


# TODO:
# - get advantage of flakyhunter new code and move this to reuse as much of possible of that (or merge both search paths somehow)
# - also implement checking history from git to show on the waterfall when a revision changed something (not only when the test was added)
def print_history_for_test(test, debug, detail, mergeunknown, ignoreunknown, showexpected, usecolor, printprogress, botkey, processes, limit_to_last_n_results, reportsummary):

    start_at_revision = 1
    maybe_color = OptionalColorText(usecolor)

    results_parser = ResultsParser(botkey, processes, limit_to_last_n_results, printprogress, debug)
    startrev, minrev, maxrev, resultsfortests = results_parser.get_results_for_tests([test])
    print ('INFO: The revisions processed for the bot %s are on the interval %s' %
           (maybe_color.bold(botkey), maybe_color.bold('[%d@main-%d@main]' %(startrev, maxrev))))

    lastprinted = UNKNOWN
    if mergeunknown:
        lastprinted = "%s/%s" %(NOERROR, UNKNOWN)
    lastrevprinted = startrev - 1
    # trick to end the print the last interval on the loop.
    resultsfortests[test][maxrev+1]={}
    resultsfortests[test][maxrev+1]['result']='END'
    print ("\nResults for bot %s and test: %s\n" %(maybe_color.bold(botkey),maybe_color.bold(test)))
    failed_expectations = []
    test_results = []
    number_of_completed_runs = 0


    for revision in range(startrev,maxrev+2):
        if revision in resultsfortests[test]:
            toprint = resultsfortests[test][revision]['result']
            if mergeunknown and (toprint == NOERROR or toprint == UNKNOWN):
                toprint = "%s/%s" %(NOERROR, UNKNOWN)
            if showexpected and 'expected' in resultsfortests[test][revision]:
                toprint = "%s (Expected: %s)" %(toprint,resultsfortests[test][revision]['expected'])
            if reportsummary:
                # only count revisions were the bot finished the run (result != UNKNOWN)
                if resultsfortests[test][revision]['result'] != UNKNOWN and resultsfortests[test][revision]['result'] != 'END':
                    number_of_completed_runs += 1
                    if ResultsInterpreter.is_result_for_test_unexpected(resultsfortests[test][revision]):
                        failed_expectations.append(toprint)
                    test_results.append(resultsfortests[test][revision]['result'])
        else:
            if mergeunknown:
                toprint = "%s/%s" %(NOERROR, UNKNOWN)
            else:
                toprint = UNKNOWN
        if lastprinted == toprint:
            continue
        else:
            # On the loop for printing we print previous iteration (revision-1)
            if detail or (lastprinted != UNKNOWN and lastprinted != NOERROR):
                color = None
                if usecolor:
                    # We are going to print. Choose the color
                    if UNKNOWN not in lastprinted:
                        color = '\033[0;32m' # green
                    if revision-1 in resultsfortests[test]:
                        if ResultsInterpreter.is_result_for_test_unexpected(resultsfortests[test][revision-1]):
                            color = '\033[0;31m' # red
                            if debug: print ('INFO: Marking test % as red on rev %d@main, because actual result "%s" does not match the expectation "%s"'
                                        %(test, revision-1, resultsfortests[test][revision-1]['result'], resultsfortests[test][revision-1]['expected']))

                if lastprinted != UNKNOWN or not ignoreunknown:
                    if revision > startrev:
                        if revision-lastrevprinted > 2:
                            interval_print (lastrevprinted+1, revision-1, lastprinted, color)
                        else:
                            interval_print (revision-1, None, lastprinted, color)
            lastrevprinted = revision-1
            lastprinted = toprint
    print("\n")

    if reportsummary:
        print ("The bot %s completed %s runs on the interval %s" % (
            maybe_color.bold(botkey),
            maybe_color.bold(number_of_completed_runs),
            maybe_color.bold("[%d@main-%d@main]" % (minrev, maxrev))))
        number_of_failed_runs = len(failed_expectations)
        print ("Frecuency of results for test: %s" %(maybe_color.bold(test)))
        counted_test_results = Counter(test_results)
        for test_result in counted_test_results:
            print(maybe_color.bold("  {:<22} ->  {}".format("%d times [%s]" % (counted_test_results[test_result],get_percent_str(counted_test_results[test_result], number_of_completed_runs)), test_result)))
        print ("")
        if number_of_completed_runs == 0:
            print(maybe_color.red("The bot didn't finished any run on the interval. Please try to raise the number of results to try with --depth 8000 (or more)"))
        elif number_of_failed_runs == 0:
            print(maybe_color.green("All runs on the interval were sucessful (either the expectation was matched or the test passed"))
        else:
            print("Of those %s runs it didn't match the expectation %s times [%s]" % (maybe_color.bold(number_of_completed_runs), maybe_color.bold(number_of_failed_runs), maybe_color.bold(get_percent_str(number_of_failed_runs, number_of_completed_runs))))
            print("Details of the type and frequency of unmatched expectations:")
            counted_failed_expectations = Counter(failed_expectations)
            for failed_expectation in counted_failed_expectations:
                print(maybe_color.red("  {:<22} ->  {}".format("%d times [%s]" % (counted_failed_expectations[failed_expectation],get_percent_str(counted_failed_expectations[failed_expectation], number_of_completed_runs)), failed_expectation)))

    return 0


if __name__ == '__main__':
    import multiprocessing as mp

    parser = argparse.ArgumentParser(epilog=extrahelp, formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("--debug", help="Print debug messages.", action="store_true")
    parser.add_argument("--onlyprinterr", help="Don't print NOERROR or UNKNOWN results.", action="store_true")
    parser.add_argument("--mergeunknown", help="If set, will join the UNKNOWN results (no data for this revision)\n"
                                               "with the NOERROR results (we have data, but no error/result was reported).\n", action="store_true")
    parser.add_argument("--ignoreunknown", help="If set, will not print UNKNOWN (no data for this revision)\n"
                                          "This option disregards --mergeunknown.\n", action="store_true")
    parser.add_argument("--noshowexpected", help="Print only the test results without the expected result.", action="store_true")
    parser.add_argument("--nocolor", help="Don't print colors", action="store_true")
    parser.add_argument("--noprintprogress", help="Don't print percentage progress of computations..", action="store_true")
    parser.add_argument("--noreportsummary", help="Don't print the summary at the end with the result and expectations frequencies.", action="store_true")
    parser.add_argument("--bot", help="Check the test results for a specific bot. Default is: %(default)s", choices=list(bots.keys()), default="gtk-release")
    parser.add_argument("-j", type=int, help="Number of processes to use", default=mp.cpu_count())
    parser.add_argument("test_name", type=str, help="Name for the test (as specified on run-webkit-tests).")
    number_revisions = parser.add_mutually_exclusive_group()
    number_revisions.add_argument('--all', action='store_true', help='Search in all the revisions known for bot')
    number_revisions.add_argument('--depth', help='Search in only the last X revisions known for bot. Default is 4000.', default=4000)
    args = parser.parse_args()


    args_should_printprogress = False if not args.noprintprogress else sys.stderr.isatty()
    if args.all: args_limit_to_last_n_results = None
    elif args.depth: args_limit_to_last_n_results = int(args.depth)


    # Exit inmediately on CTRL+C or broken-pipe situations.
    signal.signal(signal.SIGINT, sigterm_this_process)
    signal.signal(signal.SIGPIPE, sigterm_this_process)
    print_history_for_test(args.test_name, args.debug, not args.onlyprinterr, args.mergeunknown,
                           args.ignoreunknown, not args.noshowexpected, not args.nocolor,
                           not args_should_printprogress, args.bot, args.j, args_limit_to_last_n_results, not args.noreportsummary)