diff --git a/dev_scripts/cflow.py b/dev_scripts/cflow.py index 72e84ad..7f9ddc8 100644 --- a/dev_scripts/cflow.py +++ b/dev_scripts/cflow.py @@ -73,7 +73,7 @@ def run(file: Path, out_dir: Path, version: PythonVersion, print=False): pyc = PYCFile(in_pyc) edit_pyc_lines(pyc, src_lines) - cfts = {bc.codeobj: bc_to_cft(bc) for bc in pyc.iter_bytecodes()} + cfts = {bc.codeobj: bc_to_cft(bc, src_lines) for bc in pyc.iter_bytecodes()} out_src = str(SourceContext(pyc, src_lines, cfts)) out_path = out_dir / "b.py" @@ -98,7 +98,7 @@ class NoPool: def print_results(a: Path, b: Path, result: Result, results: list[tuple[bool, str]] | Exception): a_text = a.read_text() b_text = b.read_text() - console = rich.console.Console(highlight=False) + console = rich.console.Console(highlight=False, markup=False) console.print("=== original file ===", style="green bold") console.print(a_text) console.print("\n=== reconstructed file ===", style="green bold") diff --git a/pylingual/control_flow_reconstruction/cfg.py b/pylingual/control_flow_reconstruction/cfg.py index 2ae8494..e5759be 100644 --- a/pylingual/control_flow_reconstruction/cfg.py +++ b/pylingual/control_flow_reconstruction/cfg.py @@ -35,7 +35,7 @@ class CFG(DiGraph_CFT): CFG.graph_format = fmt @staticmethod - def from_graph(cfg: nx.DiGraph, bytecode: EditableBytecode, iterate=True) -> CFG: + def from_graph(cfg: nx.DiGraph, bytecode: EditableBytecode, iterate=True, source=None) -> CFG: self = CFG(cfg) self.bytecode = bytecode @@ -46,6 +46,11 @@ class CFG(DiGraph_CFT): self.run = 0 InstTemplate.match_all(self) + for n in self.nodes: + if source is not None and n.inst.starts_line is not None: + n.inst.source_line = source[n.inst.starts_line - 1] + else: + n.inst.source_line = '' for _a, _b, _p in self.edges(data=True): self[_a][_b]["kind"] = EdgeKind(_p["type"]) diff --git a/pylingual/control_flow_reconstruction/structure.py b/pylingual/control_flow_reconstruction/structure.py index 98a26eb..c65e81c 100644 --- a/pylingual/control_flow_reconstruction/structure.py +++ b/pylingual/control_flow_reconstruction/structure.py @@ -17,16 +17,16 @@ def iteration(cfg: CFG, runs: list[list[type[ControlFlowTemplate]]]): return False -def bc_to_cft(bc: EditableBytecode): - return structure_control_flow(bytecode_to_control_flow_graph(bc), bc) +def bc_to_cft(bc: EditableBytecode, source: list[str]): + return structure_control_flow(bytecode_to_control_flow_graph(bc), bc, source) -def structure_control_flow(cfg: nx.DiGraph, bytecode: EditableBytecode) -> ControlFlowTemplate: - cfg = CFG.from_graph(cfg, bytecode) +def structure_control_flow(cfg: nx.DiGraph, bytecode: EditableBytecode, source: list[str]) -> ControlFlowTemplate: + cfg = CFG.from_graph(cfg, bytecode, source=source) runs = get_template_runs(bytecode.version[:2]) while len(cfg) > 1: if not iteration(cfg, runs): - return MetaTemplate("\x1b[31mirreducible cflow\x1b[0m", bytecode.codeobj) + return MetaTemplate("irreducible cflow", bytecode.codeobj) return next(iter(cfg.nodes)) diff --git a/pylingual/decompiler.py b/pylingual/decompiler.py index bc9f366..534566c 100644 --- a/pylingual/decompiler.py +++ b/pylingual/decompiler.py @@ -306,7 +306,7 @@ class Decompiler: def run_cflow_reconstruction(self): logger.info(f"Reconstructing control flow for {self.name}...") try: - cfts = {bc.codeobj: bc_to_cft(bc) for bc in TrackedList(CFLOW_STEP, self.ordered_bytecodes)} + cfts = {bc.codeobj: bc_to_cft(bc, self.source_lines) for bc in TrackedList(CFLOW_STEP, self.ordered_bytecodes)} self.source_context = SourceContext(self.pyc, self.source_lines, cfts) version = magicint2version.get(self.pyc.magic, "?") time = datetime.datetime.fromtimestamp(self.pyc.timestamp, datetime.UTC).strftime("%Y-%m-%d %H:%M:%S UTC") diff --git a/pylingual/equivalence_check.py b/pylingual/equivalence_check.py index b574b66..1aa3b9b 100644 --- a/pylingual/equivalence_check.py +++ b/pylingual/equivalence_check.py @@ -207,8 +207,8 @@ def compare_pyc(pyc_a: PYCFile | Path, pyc_b: PYCFile | Path) -> list[TestResult continue cfg_a = bytecode_to_control_flow_graph(bytecode_a) cfg_b = bytecode_to_control_flow_graph(bytecode_b) - block_graph_a = CFG.from_graph(cfg_a, bytecode_a, False) - block_graph_b = CFG.from_graph(cfg_b, bytecode_b, False) + block_graph_a = CFG.from_graph(cfg_a, bytecode_a, iterate=False) + block_graph_b = CFG.from_graph(cfg_b, bytecode_b, iterate=False) if not is_control_flow_equivalent(block_graph_a, block_graph_b): test_result = TestResult(False, "Different control flow", bytecode_a, bytecode_b) results.append(test_result)