diff --git a/dev_scripts/cflow.py b/dev_scripts/cflow.py index 7f9ddc8..c702e15 100644 --- a/dev_scripts/cflow.py +++ b/dev_scripts/cflow.py @@ -18,7 +18,7 @@ from rich.progress import Progress, BarColumn, TextColumn, TimeRemainingColumn, from pylingual.control_flow_reconstruction.cfg import CFG from pylingual.control_flow_reconstruction.structure import bc_to_cft from pylingual.main import print_result -from pylingual.control_flow_reconstruction.source import SourceContext +from pylingual.control_flow_reconstruction.source import SourceContext, sanitize_lines from pylingual.editable_bytecode import PYCFile from pylingual.equivalence_check import TestResult, compare_pyc from pylingual.utils.version import PythonVersion @@ -64,7 +64,7 @@ def run(file: Path, out_dir: Path, version: PythonVersion, print=False): file = next(file.iterdir()) in_src = normalize_source(file.read_text(), replace_docstrings=True) - src_lines = in_src.split("\n") + src_lines = sanitize_lines(in_src.split("\n")) in_path = out_dir / "a.py" in_path.write_text(in_src, encoding="utf-8") in_pyc = out_dir / "a.pyc" diff --git a/pylingual/control_flow_reconstruction/cfg.py b/pylingual/control_flow_reconstruction/cfg.py index 6b406cc..e57133f 100644 --- a/pylingual/control_flow_reconstruction/cfg.py +++ b/pylingual/control_flow_reconstruction/cfg.py @@ -199,5 +199,6 @@ class CFG(DiGraph_CFT): cdg = nx.create_empty_copy(self) cdg.add_edges_from((B, A, {"kind": EdgeKind.Fall}) for A, B in itertools.product(self.nodes, self.nodes) if A != B and control_dependent(A, B)) cdg.remove_node(self.end) - cdg.add_edges_from(((self.start, n) for n in cdg.nodes if cdg.in_degree(n) == 0 and n != self.start), kind=EdgeKind.Fall) + start_nodes = [n for n in cdg.nodes if cdg.in_degree(n) == 0 and n != self.start] + cdg.add_edges_from(((self.start, n) for n in start_nodes), kind=EdgeKind.Fall) return cdg diff --git a/pylingual/control_flow_reconstruction/templates/Block.py b/pylingual/control_flow_reconstruction/templates/Block.py index 18a6b8f..7f1ae49 100644 --- a/pylingual/control_flow_reconstruction/templates/Block.py +++ b/pylingual/control_flow_reconstruction/templates/Block.py @@ -54,11 +54,12 @@ class RemoveUnreachable(ControlFlowTemplate): return node -@register_template(0, 0, (3, 13)) +@register_template(0, 0, (3, 12), (3, 13)) class JumpTemplate(ControlFlowTemplate): template = T( body=~N("jump", None).with_cond(without_instructions("CLEANUP_THROW")), jump=N("tail", "block?") + .with_cond(has_no_lines) .with_cond(no_self_edges) .with_in_deg(1) .with_cond( @@ -71,7 +72,7 @@ class JumpTemplate(ControlFlowTemplate): exact_instructions("POP_JUMP_IF_FALSE"), ), block=N.tail(), - tail=N.tail(), + tail=N.tail().with_cond(without_instructions("NOP")), ) try_match = make_try_match( diff --git a/pylingual/control_flow_reconstruction/templates/Conditional.py b/pylingual/control_flow_reconstruction/templates/Conditional.py index 8d42ceb..7b174ac 100644 --- a/pylingual/control_flow_reconstruction/templates/Conditional.py +++ b/pylingual/control_flow_reconstruction/templates/Conditional.py @@ -50,8 +50,8 @@ class IfElseLoop(ControlFlowTemplate): @register_template(2, 41) class IfThen(ControlFlowTemplate): template = T( - if_header=~N("if_body", "tail").with_cond(without_top_level_instructions("WITH_EXCEPT_START", "CHECK_EXC_MATCH", "FOR_ITER")), - if_body=~N("tail").with_in_deg(1) | ~N("tail.").with_in_deg(1).with_cond(run_is(2)) | ~N.tail().with_in_deg(1).with_cond(exact_instructions("LOAD_CONST","RETURN_VALUE")), + if_header=~N("if_body", "tail").with_cond(without_top_level_instructions("WITH_EXCEPT_START", "CHECK_EXC_MATCH", "FOR_ITER", "JUMP_IF_NOT_EXC_MATCH")), + if_body=~N(None).with_in_deg(1).of_type(BreakTemplate, ContinueTemplate) | ~N("tail").with_in_deg(1) | ~N("tail.").with_in_deg(1).with_cond(run_is(2)) | ~N.tail().with_in_deg(1).with_cond(exact_instructions("LOAD_CONST","RETURN_VALUE"), exact_instructions("POP_TOP", "LOAD_CONST","RETURN_VALUE")), tail=N.tail(), ) diff --git a/pylingual/control_flow_reconstruction/templates/Loop.py b/pylingual/control_flow_reconstruction/templates/Loop.py index db44d0e..21735a3 100644 --- a/pylingual/control_flow_reconstruction/templates/Loop.py +++ b/pylingual/control_flow_reconstruction/templates/Loop.py @@ -15,7 +15,6 @@ from ..utils import ( exact_instructions, ending_instructions, has_no_lines, - has_some_lines, condense_mapping, defer_source_to, starting_instructions, @@ -180,14 +179,14 @@ class InlinedComprehensionTemplate(ControlFlowTemplate): class BreakTemplate(ControlFlowTemplate): @classmethod def try_match(cls, cfg, node): - if not with_top_level_instructions("POP_TOP", "LOAD_CONST", "RETURN_VALUE", "RETURN_CONST", "JUMP_ABSOLUTE", "JUMP_FORWARD", "JUMP_BACKWARD", "BREAK_LOOP")(cfg, node) or has_no_lines(cfg, node): + if not with_top_level_instructions("POP_TOP", "LOAD_FAST", "LOAD_CONST", "RETURN_VALUE", "RETURN_CONST", "JUMP_ABSOLUTE", "JUMP_FORWARD", "JUMP_BACKWARD", "BREAK_LOOP", "POP_BLOCK")(cfg, node) or has_no_lines(cfg, node): return None i = len(node.get_instructions()) - 1 while i >= 0: instruction = node.get_instructions()[i].opname - if instruction in {"POP_TOP", "LOAD_CONST", "RETURN_VALUE", "RETURN_CONST", "JUMP_ABSOLUTE", "JUMP_FORWARD", "JUMP_BACKWARD", "BREAK_LOOP"}: - if node.get_instructions()[i].starts_line is not None: + if instruction in {"POP_TOP", "LOAD_FAST", "LOAD_CONST", "RETURN_VALUE", "RETURN_CONST", "JUMP_ABSOLUTE", "JUMP_FORWARD", "JUMP_BACKWARD", "BREAK_LOOP", "POP_BLOCK"}: + if node.get_instructions()[i].starts_line is not None and not any(node.get_instructions()[i].source_line.strip().startswith(word) for word in {"pass", "...", "return"}): return condense_mapping(cls, cfg, {"child": node}, "child") else: i -= 1 @@ -203,14 +202,14 @@ class BreakTemplate(ControlFlowTemplate): class ContinueTemplate(ControlFlowTemplate): @classmethod def try_match(cls, cfg, node): - if not with_top_level_instructions("JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP", "POP_EXCEPT")(cfg, node) or has_no_lines(cfg, node): + if not with_top_level_instructions("JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP", "POP_EXCEPT", "POP_BLOCK")(cfg, node) or has_no_lines(cfg, node): return None i = len(node.get_instructions()) - 1 while i >= 0: instruction = node.get_instructions()[i].opname - if instruction in {"JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP", "POP_EXCEPT"}: - if node.get_instructions()[i].starts_line is not None: + if instruction in {"JUMP_ABSOLUTE", "JUMP_BACKWARD", "CONTINUE_LOOP", "POP_EXCEPT", "POP_BLOCK"}: + if node.get_instructions()[i].starts_line is not None and not any(node.get_instructions()[i].source_line.strip().startswith(word) for word in {"pass", "...", "return"}): return condense_mapping(cls, cfg, {"child": node}, "child") else: i -= 1 @@ -244,7 +243,7 @@ class FixLoop(ControlFlowTemplate): if cfg.dominates(node, predecessor): back_edges.append(predecessor) - if not back_edges or with_top_level_instructions("SEND")(cfg, node): + if not back_edges or all(n == node for n in back_edges) or with_top_level_instructions("SEND")(cfg, node): return None # Get all nodes encompassed by the loop excluding source node and initial false jump