From ff010d2f42c6df22aa505f195eb5ae1e605b0e58 Mon Sep 17 00:00:00 2001 From: Josh Wiedemeier Date: Tue, 9 Dec 2025 10:17:01 -0600 Subject: [PATCH] make dataset generation pylingual dependency editable --- dev_scripts/cflow.py | 2 +- dev_scripts/dataset_generation/bytecode2csv.py | 2 +- dev_scripts/dataset_generation/create_code_dataset.py | 2 +- dev_scripts/prepare_dataset.py | 2 +- dev_scripts/segmentation/tokenize_seg.py | 2 +- dev_scripts/segmentation/train_mlm.py | 2 +- dev_scripts/train_models.py | 11 +++++++---- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/dev_scripts/cflow.py b/dev_scripts/cflow.py index 40dafb1..6c3cbb8 100644 --- a/dev_scripts/cflow.py +++ b/dev_scripts/cflow.py @@ -4,7 +4,7 @@ # "pylingual", # ] # [tool.uv.sources] -# pylingual = { path = "../" } +# pylingual = { path = "../", editable = true } # /// import contextlib diff --git a/dev_scripts/dataset_generation/bytecode2csv.py b/dev_scripts/dataset_generation/bytecode2csv.py index 8a2288e..ee5b98b 100644 --- a/dev_scripts/dataset_generation/bytecode2csv.py +++ b/dev_scripts/dataset_generation/bytecode2csv.py @@ -4,7 +4,7 @@ # "pylingual", # ] # [tool.uv.sources] -# pylingual = { path = "../../" } +# pylingual = { path = "../../", editable = true } # /// import csv diff --git a/dev_scripts/dataset_generation/create_code_dataset.py b/dev_scripts/dataset_generation/create_code_dataset.py index cae61bb..b6aad4b 100644 --- a/dev_scripts/dataset_generation/create_code_dataset.py +++ b/dev_scripts/dataset_generation/create_code_dataset.py @@ -4,7 +4,7 @@ # "pylingual", # ] # [tool.uv.sources] -# pylingual = { path = "../../" } +# pylingual = { path = "../../", editable = true } # /// import itertools diff --git a/dev_scripts/prepare_dataset.py b/dev_scripts/prepare_dataset.py index 85e11c2..4dc3fb9 100644 --- a/dev_scripts/prepare_dataset.py +++ b/dev_scripts/prepare_dataset.py @@ -4,7 +4,7 @@ # "pylingual", # ] # [tool.uv.sources] -# pylingual = { path = "../" } +# pylingual = { path = "../", editable = true } # /// import json diff --git a/dev_scripts/segmentation/tokenize_seg.py b/dev_scripts/segmentation/tokenize_seg.py index 7679727..cf51022 100644 --- a/dev_scripts/segmentation/tokenize_seg.py +++ b/dev_scripts/segmentation/tokenize_seg.py @@ -4,7 +4,7 @@ # "pylingual", # ] # [tool.uv.sources] -# pylingual = { path = "../../" } +# pylingual = { path = "../../", editable = true } # /// import ast diff --git a/dev_scripts/segmentation/train_mlm.py b/dev_scripts/segmentation/train_mlm.py index 3c03d05..653c37b 100644 --- a/dev_scripts/segmentation/train_mlm.py +++ b/dev_scripts/segmentation/train_mlm.py @@ -4,7 +4,7 @@ # "pylingual", # ] # [tool.uv.sources] -# pylingual = { path = "../../" } +# pylingual = { path = "../../", editable = true } # /// import logging diff --git a/dev_scripts/train_models.py b/dev_scripts/train_models.py index 019e563..4b649f9 100644 --- a/dev_scripts/train_models.py +++ b/dev_scripts/train_models.py @@ -4,7 +4,7 @@ # "pylingual", # ] # [tool.uv.sources] -# pylingual = { path = "../" } +# pylingual = { path = "../", editable = true } # /// import logging @@ -27,7 +27,8 @@ def train_segmentation(segmentation_config_path: pathlib.Path, logger: logging.L logger.info("training masked language model...") subprocess.run( [ - "uv", "run", + "uv", + "run", "torchrun", f"--nnodes={nnodes}", f"--nproc-per-node={nproc_per_node}", @@ -47,7 +48,8 @@ def train_segmentation(segmentation_config_path: pathlib.Path, logger: logging.L logger.info("training segmentation model...") subprocess.run( [ - "uv", "run", + "uv", + "run", "torchrun", f"--nnodes={nnodes}", f"--nproc-per-node={nproc_per_node}", @@ -74,7 +76,8 @@ def train_statement(statement_config_path: pathlib.Path, logger: logging.Logger, logger.info("training statement model...") subprocess.run( [ - "uv", "run", + "uv", + "run", "torchrun", f"--nnodes={nnodes}", f"--nproc-per-node={nproc_per_node}",