mirror of
https://github.com/syssec-utd/pylingual.git
synced 2026-05-10 18:39:03 -07:00
Merge pull request #75 from XinlongCS/cflow-refactor
While Loops/Breaks and Continues + Misc improvements
This commit is contained in:
@@ -7,7 +7,7 @@ from pylingual.editable_bytecode import Inst
|
||||
import networkx as nx
|
||||
|
||||
from ..cft import ControlFlowTemplate, EdgeKind, SourceContext, SourceLine, register_template, EdgeCategory, out_edge_dict, MetaTemplate, indent_str
|
||||
from ..utils import E, N, T, defer_source_to, remove_nodes, versions_from, without_instructions, has_no_lines, exact_instructions, make_try_match
|
||||
from ..utils import E, N, T, defer_source_to, no_self_edges, remove_nodes, versions_from, without_instructions, has_no_lines, exact_instructions, make_try_match
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pylingual.control_flow_reconstruction.cfg import CFG
|
||||
@@ -48,7 +48,7 @@ class RemoveUnreachable(ControlFlowTemplate):
|
||||
return None
|
||||
|
||||
valid = list(nx.dfs_preorder_nodes(cfg, source=cfg.start))
|
||||
invalid = [n for n in cfg.nodes if n not in valid]
|
||||
invalid = [n for n in cfg.nodes if n not in valid and has_no_lines(cfg, n)]
|
||||
if invalid:
|
||||
cfg.remove_nodes_from(invalid)
|
||||
return node
|
||||
@@ -59,6 +59,7 @@ class JumpTemplate(ControlFlowTemplate):
|
||||
template = T(
|
||||
body=~N("jump", None).with_cond(without_instructions("CLEANUP_THROW")),
|
||||
jump=N("tail", "block?")
|
||||
.with_cond(no_self_edges)
|
||||
.with_in_deg(1)
|
||||
.with_cond(
|
||||
exact_instructions("JUMP_BACKWARD_NO_INTERRUPT"),
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
from ..cft import ControlFlowTemplate, EdgeKind, register_template
|
||||
from ..utils import T, N, defer_source_to, run_is, has_no_lines, with_instructions, has_instval, starting_instructions, to_indented_source, make_try_match, without_top_level_instructions
|
||||
from .Loop import BreakTemplate
|
||||
from .Loop import BreakTemplate, ContinueTemplate
|
||||
|
||||
|
||||
@register_template(1, 40)
|
||||
class IfElse(ControlFlowTemplate):
|
||||
template = T(
|
||||
if_header=~N("if_body", "else_body").with_cond(without_top_level_instructions("WITH_EXCEPT_START", "CHECK_EXC_MATCH", "FOR_ITER")),
|
||||
if_body=N.tail().with_in_deg(1).of_type(BreakTemplate) | ~N("tail.").with_in_deg(1),
|
||||
else_body=N.tail().with_in_deg(1).of_type(BreakTemplate) | ~N("tail.").with_cond(without_top_level_instructions("RERAISE", "END_FINALLY")).with_in_deg(1),
|
||||
if_body=N(None).with_in_deg(1).of_type(BreakTemplate, ContinueTemplate) | ~N("tail.").with_in_deg(1),
|
||||
else_body=N("tail.").with_in_deg(1).of_type(BreakTemplate, ContinueTemplate) | ~N("tail.").with_cond(without_top_level_instructions("RERAISE", "END_FINALLY")).with_in_deg(1),
|
||||
tail=N.tail(),
|
||||
)
|
||||
|
||||
|
||||
@@ -292,14 +292,14 @@ class TryFinally3_11(ControlFlowTemplate):
|
||||
try_header=N("try_body"),
|
||||
try_body=N("finally_body", None, "fail_body"),
|
||||
finally_body=~N("tail.").with_in_deg(1).with_cond(no_back_edges),
|
||||
fail_body=N(E.exc("reraise")).with_cond(ending_instructions("POP_TOP", "RERAISE"), ending_instructions("DELETE_SUBSCR", "RERAISE")),
|
||||
fail_body=N(E.exc("reraise")).with_cond(without_top_level_instructions("DELETE_FAST")),
|
||||
reraise=reraise,
|
||||
tail=N.tail(),
|
||||
)
|
||||
template2 = T(
|
||||
try_except=N("finally_body", None, "fail_body").of_type(Try3_11, TryElse3_11, Try3_12, TryElse3_12),
|
||||
finally_body=~N("tail.").with_in_deg(1).with_cond(no_back_edges),
|
||||
fail_body=N(E.exc("reraise")).with_cond(ending_instructions("POP_TOP", "RERAISE")),
|
||||
fail_body=N(E.exc("reraise")).with_cond(without_top_level_instructions("DELETE_FAST")),
|
||||
reraise=reraise,
|
||||
tail=N.tail(),
|
||||
)
|
||||
@@ -531,14 +531,14 @@ class TryFinally3_9(ControlFlowTemplate):
|
||||
try_header=N("try_body"),
|
||||
try_body=N("finally_body", None, "fail_body"),
|
||||
finally_body=~N("tail.").with_in_deg(1).with_cond(no_back_edges),
|
||||
fail_body=N("tail.").with_cond(ending_instructions("POP_TOP", "RERAISE"), ending_instructions("DELETE_SUBSCR", "RERAISE")),
|
||||
fail_body=N("tail.").with_cond(without_top_level_instructions("DELETE_FAST")),
|
||||
tail=N.tail(),
|
||||
)
|
||||
template2 = T(
|
||||
try_except=N("finally_tail", None, "fail_body").of_type(TryElse3_9, Try3_9),
|
||||
finally_tail=N("finally_body", None, "fail_body"),
|
||||
finally_body=~N("tail.").with_in_deg(1).with_cond(no_back_edges),
|
||||
fail_body=N("tail.").with_cond(ending_instructions("POP_TOP", "RERAISE")),
|
||||
fail_body=N("tail.").with_cond(without_top_level_instructions("DELETE_FAST")),
|
||||
tail=N.tail(),
|
||||
)
|
||||
|
||||
@@ -790,14 +790,14 @@ class TryFinally3_6(ControlFlowTemplate):
|
||||
try_header=N("try_body"),
|
||||
try_body=N("finally_body", None, "fail_body"),
|
||||
finally_body=~N("fail_body").with_in_deg(1).with_cond(no_back_edges),
|
||||
fail_body=N("tail.").with_cond(with_instructions("POP_TOP", "END_FINALLY"), with_instructions("LOAD_CONST", "RETURN_VALUE"), with_instructions("DELETE_SUBSCR", "END_FINALLY")),
|
||||
fail_body=N("tail.").with_cond(without_top_level_instructions("DELETE_FAST")),
|
||||
tail=N.tail(),
|
||||
)
|
||||
template2 = T(
|
||||
try_except=N("finally_tail", None, "fail_body").of_type(TryElse3_6, Try3_6, ReturnFinally3_6),
|
||||
finally_tail=N("finally_body", None, "fail_body"),
|
||||
finally_body=~N("fail_body").with_in_deg(1).with_cond(no_back_edges),
|
||||
fail_body=N("tail.").with_cond(with_instructions("POP_TOP", "END_FINALLY"), with_instructions("LOAD_CONST", "RETURN_VALUE")),
|
||||
fail_body=N("tail.").with_cond(without_top_level_instructions("DELETE_FAST")),
|
||||
tail=N.tail(),
|
||||
)
|
||||
|
||||
|
||||
@@ -1,17 +1,27 @@
|
||||
from __future__ import annotations
|
||||
from itertools import chain
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pylingual.control_flow_reconstruction.source import SourceContext, SourceLine
|
||||
|
||||
from ..cft import ControlFlowTemplate, EdgeKind, register_template
|
||||
from ..utils import (
|
||||
T,
|
||||
N,
|
||||
no_back_edges,
|
||||
versions_below,
|
||||
versions_from,
|
||||
with_instructions,
|
||||
exact_instructions,
|
||||
has_no_lines,
|
||||
has_some_lines,
|
||||
condense_mapping,
|
||||
defer_source_to,
|
||||
starting_instructions,
|
||||
to_indented_source,
|
||||
make_try_match,
|
||||
with_top_level_instructions,
|
||||
without_top_level_instructions,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -36,9 +46,11 @@ class ForLoop(ControlFlowTemplate):
|
||||
"""
|
||||
|
||||
|
||||
@register_template(0, 2)
|
||||
class SelfLoop(ControlFlowTemplate):
|
||||
template = T(loop_body=~N("loop_body", None))
|
||||
@register_template(0, 2, *versions_below(3, 10))
|
||||
class SelfLoop3_6(ControlFlowTemplate):
|
||||
template = T(
|
||||
loop_body=~N("loop_body", None)
|
||||
)
|
||||
|
||||
try_match = make_try_match({}, "loop_body")
|
||||
|
||||
@@ -50,6 +62,26 @@ class SelfLoop(ControlFlowTemplate):
|
||||
"""
|
||||
|
||||
|
||||
@register_template(0, 2, *versions_from(3, 10))
|
||||
class SelfLoop3_10(ControlFlowTemplate):
|
||||
template = T(
|
||||
loop_header=~N("loop_body", "RET_CONST?").with_cond(no_back_edges),
|
||||
loop_body=~N("loop_body", None),
|
||||
RET_CONST=N.tail(),
|
||||
)
|
||||
|
||||
try_match = make_try_match({}, "loop_header", "loop_body", "RET_CONST")
|
||||
|
||||
def to_indented_source(self, source: SourceContext) -> list[SourceLine]:
|
||||
header = source[self.loop_header]
|
||||
body = source[self.loop_body, 1]
|
||||
RET_CONST = source[self.RET_CONST]
|
||||
if not any(source.lines[i.starts_line - 1].strip().startswith("while ") for i in self.loop_header.get_instructions() if i.starts_line is not None):
|
||||
return list(chain(header, self.line("while True:"), body))
|
||||
else:
|
||||
return list(chain(header, body))
|
||||
|
||||
|
||||
@register_template(0, 2)
|
||||
class TrueSelfLoop(ControlFlowTemplate):
|
||||
template = T(loop_body=~N("tail.", "loop_body"), tail=N.tail())
|
||||
@@ -68,6 +100,47 @@ class TrueSelfLoop(ControlFlowTemplate):
|
||||
"""
|
||||
|
||||
|
||||
@register_template(0, 1, *versions_from(3, 12))
|
||||
class AsyncForLoop3_12(ControlFlowTemplate):
|
||||
template = T(
|
||||
for_iter=N("for_body", None, "tail"),
|
||||
for_body=~N("for_iter").with_in_deg(1),
|
||||
tail=N.tail(),
|
||||
)
|
||||
|
||||
try_match = make_try_match({}, "tail", "for_iter", "for_body")
|
||||
|
||||
@to_indented_source
|
||||
def to_indented_source():
|
||||
"""
|
||||
{for_iter}
|
||||
{for_body}
|
||||
{tail}
|
||||
"""
|
||||
|
||||
|
||||
@register_template(1, 39)
|
||||
class WhileIfElseLoop(ControlFlowTemplate):
|
||||
template = T(
|
||||
if_header=~N("if_body", "else_body").with_cond(without_top_level_instructions("WITH_EXCEPT_START", "CHECK_EXC_MATCH", "FOR_ITER")),
|
||||
else_body=~N("if_header").with_in_deg(1),
|
||||
if_body=~N("tail.").with_cond(without_top_level_instructions("RERAISE", "END_FINALLY")).with_in_deg(1),
|
||||
tail=N.tail(),
|
||||
)
|
||||
|
||||
try_match = make_try_match({EdgeKind.Fall: "tail"}, "if_header", "if_body", "else_body")
|
||||
|
||||
@to_indented_source
|
||||
def to_indented_source():
|
||||
"""
|
||||
while True:
|
||||
{if_header}
|
||||
{if_body}
|
||||
{else_body?else:}
|
||||
{else_body}
|
||||
"""
|
||||
|
||||
|
||||
@register_template(0, 3)
|
||||
class InlinedComprehensionTemplate(ControlFlowTemplate):
|
||||
template = T(
|
||||
@@ -90,9 +163,21 @@ class InlinedComprehensionTemplate(ControlFlowTemplate):
|
||||
class BreakTemplate(ControlFlowTemplate):
|
||||
@classmethod
|
||||
def try_match(cls, cfg, node):
|
||||
if isinstance(node, BreakTemplate) or has_no_lines(cfg, node) or with_instructions("RAISE_VARARGS")(cfg, node):
|
||||
if not with_top_level_instructions("POP_TOP", "LOAD_CONST", "RETURN_VALUE", "RETURN_CONST", "JUMP_ABSOLUTE", "JUMP_FORWARD", "JUMP_BACKWARD", "BREAK_LOOP")(cfg, node) or has_no_lines(cfg, node):
|
||||
return None
|
||||
return condense_mapping(cls, cfg, {"child": node}, "child")
|
||||
|
||||
i = len(node.get_instructions()) - 1
|
||||
while i >= 0:
|
||||
instruction = node.get_instructions()[i].opname
|
||||
if instruction in {"POP_TOP", "LOAD_CONST", "RETURN_VALUE", "RETURN_CONST", "JUMP_ABSOLUTE", "JUMP_FORWARD", "JUMP_BACKWARD", "BREAK_LOOP"}:
|
||||
if node.get_instructions()[i].starts_line is not None:
|
||||
return condense_mapping(cls, cfg, {"child": node}, "child")
|
||||
else:
|
||||
i -= 1
|
||||
continue
|
||||
else:
|
||||
return None
|
||||
return None
|
||||
|
||||
def to_indented_source(self, source):
|
||||
return self.child.to_indented_source(source) + self.line("break")
|
||||
@@ -101,11 +186,20 @@ class BreakTemplate(ControlFlowTemplate):
|
||||
class ContinueTemplate(ControlFlowTemplate):
|
||||
@classmethod
|
||||
def try_match(cls, cfg, node):
|
||||
if isinstance(node, ContinueTemplate) or has_no_lines(cfg, node):
|
||||
if not with_top_level_instructions("JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP", "POP_EXCEPT")(cfg, node) or has_no_lines(cfg, node):
|
||||
return None
|
||||
instruction = node.get_instructions()[-1].opname
|
||||
if instruction in {"JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP"} and (node.get_instructions()[-1].starts_line is not None or node.get_instructions()[-2].starts_line is not None):
|
||||
return condense_mapping(cls, cfg, {"child": node}, "child")
|
||||
|
||||
i = len(node.get_instructions()) - 1
|
||||
while i >= 0:
|
||||
instruction = node.get_instructions()[i].opname
|
||||
if instruction in {"JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP", "POP_EXCEPT"}:
|
||||
if node.get_instructions()[i].starts_line is not None:
|
||||
return condense_mapping(cls, cfg, {"child": node}, "child")
|
||||
else:
|
||||
i -= 1
|
||||
continue
|
||||
else:
|
||||
return None
|
||||
return None
|
||||
|
||||
def to_indented_source(self, source):
|
||||
@@ -132,8 +226,8 @@ class FixLoop(ControlFlowTemplate):
|
||||
# A back edge exists if the predecessor is reachable from the node (node dominates predecessor)
|
||||
if cfg.dominates(node, predecessor):
|
||||
back_edges.append(predecessor)
|
||||
|
||||
if not back_edges:
|
||||
|
||||
if not back_edges or with_top_level_instructions("SEND")(cfg, node):
|
||||
return None
|
||||
|
||||
# Get all nodes encompassed by the loop excluding source node and initial false jump
|
||||
@@ -151,30 +245,52 @@ class FixLoop(ControlFlowTemplate):
|
||||
# Find the candidate end that break connects to
|
||||
candidate_end = None
|
||||
for succ in cfg.successors(node):
|
||||
if cfg.get_edge_data(node, succ).get("kind") == EdgeKind.FalseJump and cfg.out_degree(succ) <= 1:
|
||||
if cfg.get_edge_data(node, succ).get("kind") == EdgeKind.FalseJump and not any(n == node for n in cfg.successors(succ)):
|
||||
candidate_end = succ
|
||||
|
||||
# Candidate end is a buffer node
|
||||
if cfg.in_degree(candidate_end) == 1 and all(x.opname in {"POP_TOP", "POP_BLOCK", "END_FOR", "RETURN_CONST", "LOAD_CONST", "RETURN_VALUE", "JUMP_BACKWARD"} for x in candidate_end.get_instructions()):
|
||||
if cfg.in_degree(candidate_end) == 1:
|
||||
for ss in cfg.successors(candidate_end):
|
||||
if cfg.get_edge_data(candidate_end, ss).get("kind") != EdgeKind.Exception:
|
||||
candidate_end = ss
|
||||
break
|
||||
|
||||
if encompassed_nodes is not None:
|
||||
for succ in encompassed_nodes:
|
||||
if cfg.get_edge_data(succ, candidate_end) != None:
|
||||
edges_to_remove.append((succ, candidate_end))
|
||||
if candidate_end == None:
|
||||
# While loops
|
||||
for candidate in back_edges:
|
||||
cont_node = ContinueTemplate.try_match(cfg, candidate)
|
||||
if cont_node is not None and not cfg.has_edge(node, cont_node):
|
||||
cfg.remove_edge(cont_node, node)
|
||||
|
||||
dfs_edges = cfg.dfs_labeled_edges_no_loop(source=node)
|
||||
candidates = [v for u, v, d in dfs_edges if d == "forward"][1:]
|
||||
|
||||
for pred, succ in edges_to_remove:
|
||||
break_node = BreakTemplate.try_match(cfg, pred)
|
||||
if break_node is not None:
|
||||
cfg.remove_edge(break_node, succ)
|
||||
for n in candidates:
|
||||
for s in cfg.successors(n):
|
||||
if cfg.get_edge_data(n, s).get("kind") != EdgeKind.Exception and not all(cfg.get_edge_data(p, n).get("kind") == EdgeKind.Exception for p in cfg.predecessors(n)):
|
||||
edges_to_remove.append((n, s))
|
||||
|
||||
for pred, succ in edges_to_remove:
|
||||
break_node = BreakTemplate.try_match(cfg, pred)
|
||||
if break_node is not None and cfg.in_degree(succ) > 2:
|
||||
cfg.remove_edge(break_node, succ)
|
||||
|
||||
for candidate in back_edges:
|
||||
cont_node = ContinueTemplate.try_match(cfg, candidate)
|
||||
if cont_node is not None and cfg.in_degree(node) > 2:
|
||||
cfg.remove_edge(cont_node, node)
|
||||
else:
|
||||
# For loops
|
||||
if encompassed_nodes is not None:
|
||||
for succ in encompassed_nodes:
|
||||
if cfg.get_edge_data(succ, candidate_end) != None:
|
||||
edges_to_remove.append((succ, candidate_end))
|
||||
|
||||
for candidate in back_edges:
|
||||
cont_node = ContinueTemplate.try_match(cfg, candidate)
|
||||
if cont_node is not None and cfg.in_degree(node) > 2:
|
||||
cfg.remove_edge(cont_node, node)
|
||||
|
||||
for pred, succ in edges_to_remove:
|
||||
break_node = BreakTemplate.try_match(cfg, pred)
|
||||
if break_node is not None:
|
||||
cfg.remove_edge(break_node, succ)
|
||||
|
||||
cfg.iterate()
|
||||
return
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from ..cft import ControlFlowTemplate, EdgeKind, register_template
|
||||
from ..utils import T, N, exact_instructions, starting_instructions, without_instructions, to_indented_source, make_try_match, versions_from
|
||||
from ..utils import T, N, exact_instructions, starting_instructions, without_top_level_instructions, to_indented_source, make_try_match, versions_from
|
||||
|
||||
|
||||
class WithCleanup3_11(ControlFlowTemplate):
|
||||
@@ -83,7 +83,7 @@ class With3_9(ControlFlowTemplate):
|
||||
@register_template(0, 10, (3, 6), (3, 7), (3, 8))
|
||||
class With3_6(ControlFlowTemplate):
|
||||
template = T(
|
||||
setup_with=~N("with_body", None).with_cond(without_instructions("SETUP_FINALLY")),
|
||||
setup_with=~N("with_body", None).with_cond(without_top_level_instructions("SETUP_FINALLY", "SETUP_EXCEPT")),
|
||||
with_body=N("buffer_block.", None, "normal_cleanup").with_in_deg(1),
|
||||
buffer_block=~N("normal_cleanup.", None).with_in_deg(1),
|
||||
normal_cleanup=~N.tail(),
|
||||
|
||||
@@ -96,6 +96,19 @@ def without_top_level_instructions(*opnames: str):
|
||||
return check_instructions
|
||||
|
||||
|
||||
def with_top_level_instructions(*opnames: str):
|
||||
from .templates.Block import BlockTemplate
|
||||
|
||||
def check_instructions(cfg: CFG, node: ControlFlowTemplate | None) -> bool:
|
||||
if isinstance(node, BlockTemplate):
|
||||
return any(x.inst.opname in opnames for x in node.members if isinstance(x, InstTemplate))
|
||||
if isinstance(node, InstTemplate):
|
||||
return node.inst.opname in opnames
|
||||
return False
|
||||
|
||||
return check_instructions
|
||||
|
||||
|
||||
def has_type(*template_type: type[ControlFlowTemplate]):
|
||||
def check_type(cfg: CFG, node: ControlFlowTemplate | None) -> bool:
|
||||
return isinstance(node, template_type)
|
||||
@@ -106,6 +119,8 @@ def has_type(*template_type: type[ControlFlowTemplate]):
|
||||
def no_back_edges(cfg: CFG, node: ControlFlowTemplate | None) -> bool:
|
||||
return node is None or not any(cfg.dominates(succ, node) for succ in cfg.successors(node))
|
||||
|
||||
def no_self_edges(cfg: CFG, node: ControlFlowTemplate | None) -> bool:
|
||||
return node is None or not any(cfg.has_edge(succ, node) and cfg.has_edge(node, succ) for succ in cfg.successors(node))
|
||||
|
||||
def has_incoming_edge_of_categories(*categories: str):
|
||||
def check(cfg: CFG, node: ControlFlowTemplate | None) -> bool:
|
||||
|
||||
Reference in New Issue
Block a user