Skip to content

Commit

Permalink
WIP: measure branches with ast instead of bytecode
Browse files Browse the repository at this point in the history
--HG--
branch : ast-branch
  • Loading branch information
nedbat committed Dec 24, 2015
1 parent ae91d45 commit b17f276
Show file tree
Hide file tree
Showing 6 changed files with 431 additions and 12 deletions.
230 changes: 228 additions & 2 deletions coverage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""Code parsing for coverage.py."""

import ast
import collections
import dis
import re
Expand Down Expand Up @@ -260,6 +261,18 @@ def arcs(self):
self._all_arcs.add((fl1, fl2))
return self._all_arcs

def ast_arcs(self):
aaa = AstArcAnalyzer(self.text)
arcs = aaa.collect_arcs()

arcs_ = set()
for l1, l2 in arcs:
fl1 = self.first_line(l1)
fl2 = self.first_line(l2)
if fl1 != fl2:
arcs_.add((fl1, fl2))
return arcs_

def exit_counts(self):
"""Get a count of exits from that each line.
Expand Down Expand Up @@ -288,6 +301,168 @@ def exit_counts(self):
return exit_counts


class AstArcAnalyzer(object):
def __init__(self, text):
self.root_node = ast.parse(text)
ast_dump(self.root_node)

self.arcs = None
# References to the nearest enclosing thing of its kind.
self.function_start = None
self.loop_start = None

# Break-exits from a loop
self.break_exits = None

def line_for_node(self, node):
"""What is the right line number to use for this node?"""
node_name = node.__class__.__name__
if node_name == "Assign":
return node.value.lineno
elif node_name == "comprehension":
# TODO: is this how to get the line number for a comprehension?
return node.target.lineno
else:
return node.lineno

def collect_arcs(self):
self.arcs = set()
self.add_arcs_for_code_objects(self.root_node)
return self.arcs

def add_arcs(self, node):
"""add the arcs for `node`.
Return a set of line numbers, exits from this node to the next.
"""
node_name = node.__class__.__name__
#print("Adding arcs for {}".format(node_name))

handler = getattr(self, "handle_" + node_name, self.handle_default)
return handler(node)

def add_body_arcs(self, body, from_line):
prev_lines = set([from_line])
for body_node in body:
lineno = self.line_for_node(body_node)
for prev_lineno in prev_lines:
self.arcs.add((prev_lineno, lineno))
prev_lines = self.add_arcs(body_node)
return prev_lines

def is_constant_expr(self, node):
"""Is this a compile-time constant?"""
node_name = node.__class__.__name__
return node_name in ["NameConstant", "Num"]

# tests to write:
# TODO: while EXPR:
# TODO: while False:
# TODO: multi-target assignment with computed targets
# TODO: listcomps hidden deep in other expressions
# TODO: listcomps hidden in lists: x = [[i for i in range(10)]]
# TODO: multi-line listcomps
# TODO: nested function definitions

def handle_Break(self, node):
here = self.line_for_node(node)
# TODO: what if self.break_exits is None?
self.break_exits.add(here)
return set([])

def handle_Continue(self, node):
here = self.line_for_node(node)
# TODO: what if self.loop_start is None?
self.arcs.add((here, self.loop_start))
return set([])

def handle_For(self, node):
start = self.line_for_node(node.iter)
loop_state = self.loop_start, self.break_exits
self.loop_start = start
self.break_exits = set()
exits = self.add_body_arcs(node.body, from_line=start)
for exit in exits:
self.arcs.add((exit, start))
exits = self.break_exits
self.loop_start, self.break_exits = loop_state
if node.orelse:
else_start = self.line_for_node(node.orelse[0])
self.arcs.add((start, else_start))
else_exits = self.add_body_arcs(node.orelse, from_line=start)
exits |= else_exits
else:
# no else clause: exit from the for line.
exits.add(start)
return exits

def handle_FunctionDef(self, node):
start = self.line_for_node(node)
# the body is handled in add_arcs_for_code_objects.
exits = set([start])
return exits

def handle_If(self, node):
start = self.line_for_node(node.test)
exits = self.add_body_arcs(node.body, from_line=start)
exits |= self.add_body_arcs(node.orelse, from_line=start)
return exits

def handle_Module(self, node):
raise Exception("TODO: this shouldn't happen")

def handle_Return(self, node):
here = self.line_for_node(node)
# TODO: what if self.function_start is None?
self.arcs.add((here, -self.function_start))
return set([])

def handle_While(self, node):
constant_test = self.is_constant_expr(node.test)
start = to_top = self.line_for_node(node.test)
if constant_test:
to_top = self.line_for_node(node.body[0])
loop_state = self.loop_start, self.break_exits
self.loop_start = start
self.break_exits = set()
exits = self.add_body_arcs(node.body, from_line=start)
for exit in exits:
self.arcs.add((exit, to_top))
exits = self.break_exits
self.loop_start, self.break_exits = loop_state
# TODO: orelse
return exits

def handle_default(self, node):
node_name = node.__class__.__name__
if node_name not in ["Assign", "Assert", "AugAssign", "Expr"]:
print("*** Unhandled: {}".format(node))
return set([self.line_for_node(node)])

def add_arcs_for_code_objects(self, root_node):
for node in ast.walk(root_node):
node_name = node.__class__.__name__
if node_name == "Module":
start = self.line_for_node(node.body[0])
exits = self.add_body_arcs(node.body, from_line=-1)
for exit in exits:
self.arcs.add((exit, -start))
elif node_name == "FunctionDef":
start = self.line_for_node(node)
self.function_start = start
func_exits = self.add_body_arcs(node.body, from_line=-1)
for exit in func_exits:
self.arcs.add((exit, -start))
self.function_start = None
elif node_name == "comprehension":
start = self.line_for_node(node)
self.arcs.add((-1, start))
self.arcs.add((start, -start))
# TODO: guaranteed this won't work for multi-line comps.




## Opcodes that guide the ByteParser.

def _opcode(name):
Expand Down Expand Up @@ -321,7 +496,7 @@ def _opcode_set(*names):

# Opcodes that push a block on the block stack.
OPS_PUSH_BLOCK = _opcode_set(
'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH', 'SETUP_ASYNC_WITH',
)

# Block types for exception handling.
Expand All @@ -330,6 +505,8 @@ def _opcode_set(*names):
# Opcodes that pop a block from the block stack.
OPS_POP_BLOCK = _opcode_set('POP_BLOCK')

OPS_GET_AITER = _opcode_set('GET_AITER')

# Opcodes that have a jump destination, but aren't really a jump.
OPS_NO_JUMP = OPS_PUSH_BLOCK

Expand Down Expand Up @@ -449,6 +626,8 @@ def _split_into_chunks(self):
# is a count of how many ignores are left.
ignore_branch = 0

ignore_pop_block = 0

# We have to handle the last two bytecodes specially.
ult = penult = None

Expand Down Expand Up @@ -507,7 +686,10 @@ def _split_into_chunks(self):
block_stack.append((bc.op, bc.jump_to))
if bc.op in OPS_POP_BLOCK:
# The opcode pops a block from the block stack.
block_stack.pop()
if ignore_pop_block:
ignore_pop_block -= 1
else:
block_stack.pop()
if bc.op in OPS_CHUNK_END:
# This opcode forces the end of the chunk.
if bc.op == OP_BREAK_LOOP:
Expand All @@ -527,6 +709,15 @@ def _split_into_chunks(self):
# branch, so that except's don't count as branches.
ignore_branch += 1

if bc.op in OPS_GET_AITER:
# GET_AITER is weird: First, it seems to generate one more
# POP_BLOCK than SETUP_*, so we have to prepare to ignore one
# of the POP_BLOCKS. Second, we don't have a clear branch to
# the exit of the loop, so we peek into the block stack to find
# it.
ignore_pop_block += 1
chunk.exits.add(block_stack[-1][1])

penult = ult
ult = bc

Expand Down Expand Up @@ -686,3 +877,38 @@ def __repr__(self):
"v" if self.entrance else "",
list(self.exits),
)


SKIP_FIELDS = ["ctx"]

def ast_dump(node, depth=0):
indent = " " * depth
lineno = getattr(node, "lineno", None)
if lineno is not None:
linemark = " @ {0}".format(lineno)
else:
linemark = ""
print("{0}<{1}{2}".format(indent, node.__class__.__name__, linemark))

indent += " "
for field_name, value in ast.iter_fields(node):
if field_name in SKIP_FIELDS:
continue
prefix = "{0}{1}:".format(indent, field_name)
if value is None:
print("{0} None".format(prefix))
elif isinstance(value, (str, int)):
print("{0} {1!r}".format(prefix, value))
elif isinstance(value, list):
if value == []:
print("{0} []".format(prefix))
else:
print("{0} [".format(prefix))
for n in value:
ast_dump(n, depth + 8)
print("{0}]".format(indent))
else:
print(prefix)
ast_dump(value, depth + 8)

print("{0}>".format(" " * depth))
4 changes: 4 additions & 0 deletions coverage/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ def no_branch_lines(self):
def arcs(self):
return self.parser.arcs()

@expensive
def ast_arcs(self):
return self.parser.ast_arcs()

@expensive
def exit_counts(self):
return self.parser.exit_counts()
Expand Down
5 changes: 5 additions & 0 deletions coverage/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, data, file_reporter):

if self.data.has_arcs():
self._arc_possibilities = sorted(self.file_reporter.arcs())
self._ast_arc_possibilities = sorted(self.file_reporter.ast_arcs())
self.exit_counts = self.file_reporter.exit_counts()
self.no_branch = self.file_reporter.no_branch_lines()
n_branches = self.total_branches()
Expand All @@ -36,6 +37,7 @@ def __init__(self, data, file_reporter):
n_missing_branches = sum(len(v) for k,v in iitems(mba))
else:
self._arc_possibilities = []
self._ast_arc_possibilities = []
self.exit_counts = {}
self.no_branch = set()
n_branches = n_partial_branches = n_missing_branches = 0
Expand Down Expand Up @@ -66,6 +68,9 @@ def arc_possibilities(self):
"""Returns a sorted list of the arcs in the code."""
return self._arc_possibilities

def ast_arc_possibilities(self):
return self._ast_arc_possibilities

def arcs_executed(self):
"""Returns a sorted list of the arcs actually executed in the code."""
executed = self.data.arcs(self.filename) or []
Expand Down
25 changes: 15 additions & 10 deletions lab/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def one_file(self, options, filename):

if options.dis:
print("Main code:")
self.disassemble(bp, histogram=options.histogram)
self.disassemble(bp, chunks=options.chunks, histogram=options.histogram)

arcs = bp._all_arcs()
if options.chunks:
Expand Down Expand Up @@ -123,15 +123,20 @@ def one_file(self, options, filename):
m2 = 'C'
if lineno in cp.raw_excluded:
m3 = 'x'
a = arc_chars[lineno].ljust(arc_width)

if arc_chars:
a = arc_chars[lineno].ljust(arc_width)
else:
a = ""

print("%4d %s%s%s%s%s %s" % (lineno, m0, m1, m2, m3, a, ltext))

def disassemble(self, byte_parser, histogram=False):
def disassemble(self, byte_parser, chunks=False, histogram=False):
"""Disassemble code, for ad-hoc experimenting."""

for bp in byte_parser.child_parsers():
chunks = bp._split_into_chunks()
chunkd = dict((chunk.byte, chunk) for chunk in chunks)
if chunks:
chunkd = dict((chunk.byte, chunk) for chunk in bp._split_into_chunks())
if bp.text:
srclines = bp.text.splitlines()
else:
Expand All @@ -151,11 +156,11 @@ def disassemble(self, byte_parser, histogram=False):
elif disline.offset > 0:
print("")
line = disgen.format_dis_line(disline)
chunk = chunkd.get(disline.offset)
if chunk:
chunkstr = ":: %r" % chunk
else:
chunkstr = ""
chunkstr = ""
if chunks:
chunk = chunkd.get(disline.offset)
if chunk:
chunkstr = ":: %r" % chunk
print("%-70s%s" % (line, chunkstr))

print("")
Expand Down
1 change: 1 addition & 0 deletions tests/coveragetest.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def check_coverage(

if arcs is not None:
self.assert_equal_args(analysis.arc_possibilities(), arcs, "Possible arcs differ")
self.assert_equal_args(analysis.ast_arc_possibilities(), arcs, "Possible ast arcs differ")

if arcs_missing is not None:
self.assert_equal_args(
Expand Down
Loading

0 comments on commit b17f276

Please sign in to comment.