diff --git a/pylingual/control_flow_reconstruction/templates/Block.py b/pylingual/control_flow_reconstruction/templates/Block.py index 1788a43..18a6b8f 100644 --- a/pylingual/control_flow_reconstruction/templates/Block.py +++ b/pylingual/control_flow_reconstruction/templates/Block.py @@ -7,7 +7,7 @@ from pylingual.editable_bytecode import Inst import networkx as nx from ..cft import ControlFlowTemplate, EdgeKind, SourceContext, SourceLine, register_template, EdgeCategory, out_edge_dict, MetaTemplate, indent_str -from ..utils import E, N, T, defer_source_to, remove_nodes, versions_from, without_instructions, has_no_lines, exact_instructions, make_try_match +from ..utils import E, N, T, defer_source_to, no_self_edges, remove_nodes, versions_from, without_instructions, has_no_lines, exact_instructions, make_try_match if TYPE_CHECKING: from pylingual.control_flow_reconstruction.cfg import CFG @@ -48,7 +48,7 @@ class RemoveUnreachable(ControlFlowTemplate): return None valid = list(nx.dfs_preorder_nodes(cfg, source=cfg.start)) - invalid = [n for n in cfg.nodes if n not in valid] + invalid = [n for n in cfg.nodes if n not in valid and has_no_lines(cfg, n)] if invalid: cfg.remove_nodes_from(invalid) return node @@ -59,6 +59,7 @@ class JumpTemplate(ControlFlowTemplate): template = T( body=~N("jump", None).with_cond(without_instructions("CLEANUP_THROW")), jump=N("tail", "block?") + .with_cond(no_self_edges) .with_in_deg(1) .with_cond( exact_instructions("JUMP_BACKWARD_NO_INTERRUPT"), diff --git a/pylingual/control_flow_reconstruction/templates/Conditional.py b/pylingual/control_flow_reconstruction/templates/Conditional.py index 41989cc..d76bef0 100644 --- a/pylingual/control_flow_reconstruction/templates/Conditional.py +++ b/pylingual/control_flow_reconstruction/templates/Conditional.py @@ -1,14 +1,14 @@ from ..cft import ControlFlowTemplate, EdgeKind, register_template from ..utils import T, N, defer_source_to, run_is, has_no_lines, with_instructions, has_instval, starting_instructions, to_indented_source, make_try_match, without_top_level_instructions -from .Loop import BreakTemplate +from .Loop import BreakTemplate, ContinueTemplate @register_template(1, 40) class IfElse(ControlFlowTemplate): template = T( if_header=~N("if_body", "else_body").with_cond(without_top_level_instructions("WITH_EXCEPT_START", "CHECK_EXC_MATCH", "FOR_ITER")), - if_body=N.tail().with_in_deg(1).of_type(BreakTemplate) | ~N("tail.").with_in_deg(1), - else_body=N.tail().with_in_deg(1).of_type(BreakTemplate) | ~N("tail.").with_cond(without_top_level_instructions("RERAISE", "END_FINALLY")).with_in_deg(1), + if_body=N(None).with_in_deg(1).of_type(BreakTemplate, ContinueTemplate) | ~N("tail.").with_in_deg(1), + else_body=N("tail.").with_in_deg(1).of_type(BreakTemplate, ContinueTemplate) | ~N("tail.").with_cond(without_top_level_instructions("RERAISE", "END_FINALLY")).with_in_deg(1), tail=N.tail(), ) diff --git a/pylingual/control_flow_reconstruction/templates/Exception.py b/pylingual/control_flow_reconstruction/templates/Exception.py index 7aa5619..c75462e 100644 --- a/pylingual/control_flow_reconstruction/templates/Exception.py +++ b/pylingual/control_flow_reconstruction/templates/Exception.py @@ -292,14 +292,14 @@ class TryFinally3_11(ControlFlowTemplate): try_header=N("try_body"), try_body=N("finally_body", None, "fail_body"), finally_body=~N("tail.").with_in_deg(1).with_cond(no_back_edges), - fail_body=N(E.exc("reraise")).with_cond(ending_instructions("POP_TOP", "RERAISE"), ending_instructions("DELETE_SUBSCR", "RERAISE")), + fail_body=N(E.exc("reraise")).with_cond(without_top_level_instructions("DELETE_FAST")), reraise=reraise, tail=N.tail(), ) template2 = T( try_except=N("finally_body", None, "fail_body").of_type(Try3_11, TryElse3_11, Try3_12, TryElse3_12), finally_body=~N("tail.").with_in_deg(1).with_cond(no_back_edges), - fail_body=N(E.exc("reraise")).with_cond(ending_instructions("POP_TOP", "RERAISE")), + fail_body=N(E.exc("reraise")).with_cond(without_top_level_instructions("DELETE_FAST")), reraise=reraise, tail=N.tail(), ) @@ -531,14 +531,14 @@ class TryFinally3_9(ControlFlowTemplate): try_header=N("try_body"), try_body=N("finally_body", None, "fail_body"), finally_body=~N("tail.").with_in_deg(1).with_cond(no_back_edges), - fail_body=N("tail.").with_cond(ending_instructions("POP_TOP", "RERAISE"), ending_instructions("DELETE_SUBSCR", "RERAISE")), + fail_body=N("tail.").with_cond(without_top_level_instructions("DELETE_FAST")), tail=N.tail(), ) template2 = T( try_except=N("finally_tail", None, "fail_body").of_type(TryElse3_9, Try3_9), finally_tail=N("finally_body", None, "fail_body"), finally_body=~N("tail.").with_in_deg(1).with_cond(no_back_edges), - fail_body=N("tail.").with_cond(ending_instructions("POP_TOP", "RERAISE")), + fail_body=N("tail.").with_cond(without_top_level_instructions("DELETE_FAST")), tail=N.tail(), ) @@ -790,14 +790,14 @@ class TryFinally3_6(ControlFlowTemplate): try_header=N("try_body"), try_body=N("finally_body", None, "fail_body"), finally_body=~N("fail_body").with_in_deg(1).with_cond(no_back_edges), - fail_body=N("tail.").with_cond(with_instructions("POP_TOP", "END_FINALLY"), with_instructions("LOAD_CONST", "RETURN_VALUE"), with_instructions("DELETE_SUBSCR", "END_FINALLY")), + fail_body=N("tail.").with_cond(without_top_level_instructions("DELETE_FAST")), tail=N.tail(), ) template2 = T( try_except=N("finally_tail", None, "fail_body").of_type(TryElse3_6, Try3_6, ReturnFinally3_6), finally_tail=N("finally_body", None, "fail_body"), finally_body=~N("fail_body").with_in_deg(1).with_cond(no_back_edges), - fail_body=N("tail.").with_cond(with_instructions("POP_TOP", "END_FINALLY"), with_instructions("LOAD_CONST", "RETURN_VALUE")), + fail_body=N("tail.").with_cond(without_top_level_instructions("DELETE_FAST")), tail=N.tail(), ) diff --git a/pylingual/control_flow_reconstruction/templates/Loop.py b/pylingual/control_flow_reconstruction/templates/Loop.py index fbdf78c..c5ec983 100644 --- a/pylingual/control_flow_reconstruction/templates/Loop.py +++ b/pylingual/control_flow_reconstruction/templates/Loop.py @@ -1,17 +1,27 @@ from __future__ import annotations +from itertools import chain from typing import TYPE_CHECKING + +from pylingual.control_flow_reconstruction.source import SourceContext, SourceLine + from ..cft import ControlFlowTemplate, EdgeKind, register_template from ..utils import ( T, N, + no_back_edges, + versions_below, + versions_from, with_instructions, exact_instructions, has_no_lines, + has_some_lines, condense_mapping, defer_source_to, starting_instructions, to_indented_source, make_try_match, + with_top_level_instructions, + without_top_level_instructions, ) if TYPE_CHECKING: @@ -36,9 +46,11 @@ class ForLoop(ControlFlowTemplate): """ -@register_template(0, 2) -class SelfLoop(ControlFlowTemplate): - template = T(loop_body=~N("loop_body", None)) +@register_template(0, 2, *versions_below(3, 10)) +class SelfLoop3_6(ControlFlowTemplate): + template = T( + loop_body=~N("loop_body", None) + ) try_match = make_try_match({}, "loop_body") @@ -50,6 +62,26 @@ class SelfLoop(ControlFlowTemplate): """ +@register_template(0, 2, *versions_from(3, 10)) +class SelfLoop3_10(ControlFlowTemplate): + template = T( + loop_header=~N("loop_body", "RET_CONST?").with_cond(no_back_edges), + loop_body=~N("loop_body", None), + RET_CONST=N.tail(), + ) + + try_match = make_try_match({}, "loop_header", "loop_body", "RET_CONST") + + def to_indented_source(self, source: SourceContext) -> list[SourceLine]: + header = source[self.loop_header] + body = source[self.loop_body, 1] + RET_CONST = source[self.RET_CONST] + if not any(source.lines[i.starts_line - 1].strip().startswith("while ") for i in self.loop_header.get_instructions() if i.starts_line is not None): + return list(chain(header, self.line("while True:"), body)) + else: + return list(chain(header, body)) + + @register_template(0, 2) class TrueSelfLoop(ControlFlowTemplate): template = T(loop_body=~N("tail.", "loop_body"), tail=N.tail()) @@ -68,6 +100,47 @@ class TrueSelfLoop(ControlFlowTemplate): """ +@register_template(0, 1, *versions_from(3, 12)) +class AsyncForLoop3_12(ControlFlowTemplate): + template = T( + for_iter=N("for_body", None, "tail"), + for_body=~N("for_iter").with_in_deg(1), + tail=N.tail(), + ) + + try_match = make_try_match({}, "tail", "for_iter", "for_body") + + @to_indented_source + def to_indented_source(): + """ + {for_iter} + {for_body} + {tail} + """ + + +@register_template(1, 39) +class WhileIfElseLoop(ControlFlowTemplate): + template = T( + if_header=~N("if_body", "else_body").with_cond(without_top_level_instructions("WITH_EXCEPT_START", "CHECK_EXC_MATCH", "FOR_ITER")), + else_body=~N("if_header").with_in_deg(1), + if_body=~N("tail.").with_cond(without_top_level_instructions("RERAISE", "END_FINALLY")).with_in_deg(1), + tail=N.tail(), + ) + + try_match = make_try_match({EdgeKind.Fall: "tail"}, "if_header", "if_body", "else_body") + + @to_indented_source + def to_indented_source(): + """ + while True: + {if_header} + {if_body} + {else_body?else:} + {else_body} + """ + + @register_template(0, 3) class InlinedComprehensionTemplate(ControlFlowTemplate): template = T( @@ -90,9 +163,21 @@ class InlinedComprehensionTemplate(ControlFlowTemplate): class BreakTemplate(ControlFlowTemplate): @classmethod def try_match(cls, cfg, node): - if isinstance(node, BreakTemplate) or has_no_lines(cfg, node) or with_instructions("RAISE_VARARGS")(cfg, node): + if not with_top_level_instructions("POP_TOP", "LOAD_CONST", "RETURN_VALUE", "RETURN_CONST", "JUMP_ABSOLUTE", "JUMP_FORWARD", "JUMP_BACKWARD", "BREAK_LOOP")(cfg, node) or has_no_lines(cfg, node): return None - return condense_mapping(cls, cfg, {"child": node}, "child") + + i = len(node.get_instructions()) - 1 + while i >= 0: + instruction = node.get_instructions()[i].opname + if instruction in {"POP_TOP", "LOAD_CONST", "RETURN_VALUE", "RETURN_CONST", "JUMP_ABSOLUTE", "JUMP_FORWARD", "JUMP_BACKWARD", "BREAK_LOOP"}: + if node.get_instructions()[i].starts_line is not None: + return condense_mapping(cls, cfg, {"child": node}, "child") + else: + i -= 1 + continue + else: + return None + return None def to_indented_source(self, source): return self.child.to_indented_source(source) + self.line("break") @@ -101,11 +186,20 @@ class BreakTemplate(ControlFlowTemplate): class ContinueTemplate(ControlFlowTemplate): @classmethod def try_match(cls, cfg, node): - if isinstance(node, ContinueTemplate) or has_no_lines(cfg, node): + if not with_top_level_instructions("JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP", "POP_EXCEPT")(cfg, node) or has_no_lines(cfg, node): return None - instruction = node.get_instructions()[-1].opname - if instruction in {"JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP"} and (node.get_instructions()[-1].starts_line is not None or node.get_instructions()[-2].starts_line is not None): - return condense_mapping(cls, cfg, {"child": node}, "child") + + i = len(node.get_instructions()) - 1 + while i >= 0: + instruction = node.get_instructions()[i].opname + if instruction in {"JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP", "POP_EXCEPT"}: + if node.get_instructions()[i].starts_line is not None: + return condense_mapping(cls, cfg, {"child": node}, "child") + else: + i -= 1 + continue + else: + return None return None def to_indented_source(self, source): @@ -132,8 +226,8 @@ class FixLoop(ControlFlowTemplate): # A back edge exists if the predecessor is reachable from the node (node dominates predecessor) if cfg.dominates(node, predecessor): back_edges.append(predecessor) - - if not back_edges: + + if not back_edges or with_top_level_instructions("SEND")(cfg, node): return None # Get all nodes encompassed by the loop excluding source node and initial false jump @@ -151,30 +245,52 @@ class FixLoop(ControlFlowTemplate): # Find the candidate end that break connects to candidate_end = None for succ in cfg.successors(node): - if cfg.get_edge_data(node, succ).get("kind") == EdgeKind.FalseJump and cfg.out_degree(succ) <= 1: + if cfg.get_edge_data(node, succ).get("kind") == EdgeKind.FalseJump and not any(n == node for n in cfg.successors(succ)): candidate_end = succ # Candidate end is a buffer node - if cfg.in_degree(candidate_end) == 1 and all(x.opname in {"POP_TOP", "POP_BLOCK", "END_FOR", "RETURN_CONST", "LOAD_CONST", "RETURN_VALUE", "JUMP_BACKWARD"} for x in candidate_end.get_instructions()): + if cfg.in_degree(candidate_end) == 1: for ss in cfg.successors(candidate_end): if cfg.get_edge_data(candidate_end, ss).get("kind") != EdgeKind.Exception: candidate_end = ss break - if encompassed_nodes is not None: - for succ in encompassed_nodes: - if cfg.get_edge_data(succ, candidate_end) != None: - edges_to_remove.append((succ, candidate_end)) + if candidate_end == None: + # While loops + for candidate in back_edges: + cont_node = ContinueTemplate.try_match(cfg, candidate) + if cont_node is not None and not cfg.has_edge(node, cont_node): + cfg.remove_edge(cont_node, node) + + dfs_edges = cfg.dfs_labeled_edges_no_loop(source=node) + candidates = [v for u, v, d in dfs_edges if d == "forward"][1:] - for pred, succ in edges_to_remove: - break_node = BreakTemplate.try_match(cfg, pred) - if break_node is not None: - cfg.remove_edge(break_node, succ) + for n in candidates: + for s in cfg.successors(n): + if cfg.get_edge_data(n, s).get("kind") != EdgeKind.Exception and not all(cfg.get_edge_data(p, n).get("kind") == EdgeKind.Exception for p in cfg.predecessors(n)): + edges_to_remove.append((n, s)) + + for pred, succ in edges_to_remove: + break_node = BreakTemplate.try_match(cfg, pred) + if break_node is not None and cfg.in_degree(succ) > 2: + cfg.remove_edge(break_node, succ) - for candidate in back_edges: - cont_node = ContinueTemplate.try_match(cfg, candidate) - if cont_node is not None and cfg.in_degree(node) > 2: - cfg.remove_edge(cont_node, node) + else: + # For loops + if encompassed_nodes is not None: + for succ in encompassed_nodes: + if cfg.get_edge_data(succ, candidate_end) != None: + edges_to_remove.append((succ, candidate_end)) + + for candidate in back_edges: + cont_node = ContinueTemplate.try_match(cfg, candidate) + if cont_node is not None and cfg.in_degree(node) > 2: + cfg.remove_edge(cont_node, node) + + for pred, succ in edges_to_remove: + break_node = BreakTemplate.try_match(cfg, pred) + if break_node is not None: + cfg.remove_edge(break_node, succ) cfg.iterate() return diff --git a/pylingual/control_flow_reconstruction/templates/With.py b/pylingual/control_flow_reconstruction/templates/With.py index 65bcf7c..c24a33b 100644 --- a/pylingual/control_flow_reconstruction/templates/With.py +++ b/pylingual/control_flow_reconstruction/templates/With.py @@ -1,5 +1,5 @@ from ..cft import ControlFlowTemplate, EdgeKind, register_template -from ..utils import T, N, exact_instructions, starting_instructions, without_instructions, to_indented_source, make_try_match, versions_from +from ..utils import T, N, exact_instructions, starting_instructions, without_top_level_instructions, to_indented_source, make_try_match, versions_from class WithCleanup3_11(ControlFlowTemplate): @@ -83,7 +83,7 @@ class With3_9(ControlFlowTemplate): @register_template(0, 10, (3, 6), (3, 7), (3, 8)) class With3_6(ControlFlowTemplate): template = T( - setup_with=~N("with_body", None).with_cond(without_instructions("SETUP_FINALLY")), + setup_with=~N("with_body", None).with_cond(without_top_level_instructions("SETUP_FINALLY", "SETUP_EXCEPT")), with_body=N("buffer_block.", None, "normal_cleanup").with_in_deg(1), buffer_block=~N("normal_cleanup.", None).with_in_deg(1), normal_cleanup=~N.tail(), diff --git a/pylingual/control_flow_reconstruction/utils.py b/pylingual/control_flow_reconstruction/utils.py index 66367ea..16edadb 100644 --- a/pylingual/control_flow_reconstruction/utils.py +++ b/pylingual/control_flow_reconstruction/utils.py @@ -96,6 +96,19 @@ def without_top_level_instructions(*opnames: str): return check_instructions +def with_top_level_instructions(*opnames: str): + from .templates.Block import BlockTemplate + + def check_instructions(cfg: CFG, node: ControlFlowTemplate | None) -> bool: + if isinstance(node, BlockTemplate): + return any(x.inst.opname in opnames for x in node.members if isinstance(x, InstTemplate)) + if isinstance(node, InstTemplate): + return node.inst.opname in opnames + return False + + return check_instructions + + def has_type(*template_type: type[ControlFlowTemplate]): def check_type(cfg: CFG, node: ControlFlowTemplate | None) -> bool: return isinstance(node, template_type) @@ -106,6 +119,8 @@ def has_type(*template_type: type[ControlFlowTemplate]): def no_back_edges(cfg: CFG, node: ControlFlowTemplate | None) -> bool: return node is None or not any(cfg.dominates(succ, node) for succ in cfg.successors(node)) +def no_self_edges(cfg: CFG, node: ControlFlowTemplate | None) -> bool: + return node is None or not any(cfg.has_edge(succ, node) and cfg.has_edge(node, succ) for succ in cfg.successors(node)) def has_incoming_edge_of_categories(*categories: str): def check(cfg: CFG, node: ControlFlowTemplate | None) -> bool: