From 8f787c6aaff7d298b836bff78aadfd56b7a7c19e Mon Sep 17 00:00:00 2001
From: markus583 <markus.frohmann@gmail.com>
Date: Tue, 18 Jun 2024 13:15:11 +0000
Subject: [PATCH] git rm

---
 calc_compression_rate.py | 31 ---------------
 commands.txt             | 24 ------------
 run.sh                   |  2 -
 run_adapter.sh           |  1 -
 run_eval.sh              | 18 ---------
 run_eval_kmer.sh         | 22 -----------
 tpu_START.sh             |  9 -----
 tpu_starter.sh           |  4 --
 xla_spawn.py             | 83 ----------------------------------------
 9 files changed, 194 deletions(-)
 delete mode 100644 calc_compression_rate.py
 delete mode 100644 commands.txt
 delete mode 100755 run.sh
 delete mode 100755 run_adapter.sh
 delete mode 100755 run_eval.sh
 delete mode 100755 run_eval_kmer.sh
 delete mode 100755 tpu_START.sh
 delete mode 100755 tpu_starter.sh
 delete mode 100644 xla_spawn.py

diff --git a/calc_compression_rate.py b/calc_compression_rate.py
deleted file mode 100644
index 9acc5c1a..00000000
--- a/calc_compression_rate.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from datasets import load_dataset
-from transformers import XLMRobertaTokenizer
-
-def calculate_compression_rate(dataset_name):
-    # Load the dataset
-    dataset = load_dataset(dataset_name, split='train')
-
-    # Initialize the tokenizer
-    tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')
-
-    total_chars = 0
-    total_tokens = 0
-
-    # Iterate over the dataset
-    for sample in dataset:
-        text = sample['text']
-        total_chars += len(text)
-
-        # Tokenize the text
-        tokens = tokenizer.tokenize(text)
-        total_tokens += len(tokens)
-
-    # Calculate the average compression rate
-    avg_compression_rate = total_chars / total_tokens if total_tokens > 0 else 0
-
-    return avg_compression_rate
-
-# Example dataset
-dataset_name = "markus583/mC4-TEST"
-compression_rate = calculate_compression_rate(dataset_name)
-print(compression_rate)
diff --git a/commands.txt b/commands.txt
deleted file mode 100644
index 3c702478..00000000
--- a/commands.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-# .bashrc
-export PATH=$PATH:~/.local/bin
-
-export XRT_TPU_CONFIG="localservice;0;localhost:51011"
-
-export XLA_USE_BF16=0
-
-export TPU_NUM_DEVICES=8
-
-export HF_DATASETS_CACHE=/dev/shm/cache
-
-# data
-gcloud auth login
-gsutil -m cp -r gs://trc-transfer-data/sentence/data/eval.pth data/
-
-# cleanup
-pkill -e python3
-(until no more)
-or
-watch -n1 pkill -e python3
-
-# for debugging:
-
-os.environ["PJRT_DEVICE"] = "None"
diff --git a/run.sh b/run.sh
deleted file mode 100755
index d3ba4a89..00000000
--- a/run.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-# TODO: cleanup in case of no .arrow files but cache-* files available.
-python3 ~/wtpsplit/xla_spawn.py --num_cores ${TPU_NUM_DEVICES} wtpsplit/train/train.py $1
\ No newline at end of file
diff --git a/run_adapter.sh b/run_adapter.sh
deleted file mode 100755
index e0d58697..00000000
--- a/run_adapter.sh
+++ /dev/null
@@ -1 +0,0 @@
-python3 ~/wtpsplit/xla_spawn.py --num_cores ${TPU_NUM_DEVICES} wtpsplit/train/train_adapter_parallel.py $1
\ No newline at end of file
diff --git a/run_eval.sh b/run_eval.sh
deleted file mode 100755
index 208e8187..00000000
--- a/run_eval.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-# Check if sufficient arguments are provided
-if [[ $# -lt 2 ]]; then
-    echo "Usage: $0 MODEL_PATH 'threshold_list'"
-    echo "Example: $0 /path/to/model '0.1 0.2 0.3'"
-    exit 1
-fi
-
-# Assign arguments to variables
-MODEL_PATH="$1"
-threshold_list=($2)
-
-# Loop over threshold_list
-for threshold in "${threshold_list[@]}"; do
-    # Execute the Python script
-    python3 wtpsplit/evaluation/intrinsic.py --model_path "$MODEL_PATH" --threshold "$threshold" --keep_logits
-done
\ No newline at end of file
diff --git a/run_eval_kmer.sh b/run_eval_kmer.sh
deleted file mode 100755
index 3fbc473f..00000000
--- a/run_eval_kmer.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-# Check if sufficient arguments are provided
-if [[ $# -lt 3 ]]; then
-    echo "Usage: $0 MODEL_PATH 'k_list' 'threshold_list'"
-    echo "Example: $0 /path/to/model '1 2 3' '0.1 0.2 0.3'"
-    exit 1
-fi
-
-# Assign arguments to variables
-MODEL_PATH="$1"
-k_list=($2)
-threshold_list=($3)
-
-# Loop over k_list
-for k in "${k_list[@]}"; do
-    # Loop over threshold_list
-    for threshold in "${threshold_list[@]}"; do
-        # Execute the Python script
-        python3 wtpsplit/evaluation/intrinsic_pairwise.py --model_path "$MODEL_PATH" --k "$k" --threshold "$threshold" --keep_logits
-    done
-done
\ No newline at end of file
diff --git a/tpu_START.sh b/tpu_START.sh
deleted file mode 100755
index cc04404b..00000000
--- a/tpu_START.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-TPU_VM_NAME="v3-8_4-1.13" # Name of the TPU VM
-ZONE="europe-west4-a"     # Zone
-
-# Create the TPU VM, retry if it fails
-until gcloud compute tpus tpu-vm start "$TPU_VM_NAME" --zone="$ZONE"; do 
-    sleep 1
-done
\ No newline at end of file
diff --git a/tpu_starter.sh b/tpu_starter.sh
deleted file mode 100755
index f8d5ba55..00000000
--- a/tpu_starter.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-for var in "$@"
-do
-    until gcloud compute tpus tpu-vm create $var --zone=europe-west4-a --accelerator-type=v3-8 --version=tpu-vm-pt-1.13; do sleep 3; done
-done
\ No newline at end of file
diff --git a/xla_spawn.py b/xla_spawn.py
deleted file mode 100644
index 5df6bfa2..00000000
--- a/xla_spawn.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-A simple launcher script for TPU training
-
-Inspired by https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py
-
-::
-    >>> python xla_spawn.py --num_cores=NUM_CORES_YOU_HAVE
-               YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other
-               arguments of your training script)
-
-"""
-
-
-import importlib
-import sys
-from argparse import REMAINDER, ArgumentParser
-from pathlib import Path
-
-import torch_xla.distributed.xla_multiprocessing as xmp
-
-
-def parse_args():
-    """
-    Helper function parsing the command line options
-    @retval ArgumentParser
-    """
-    parser = ArgumentParser(
-        description=(
-            "PyTorch TPU distributed training launch helper utility that will spawn up multiple distributed processes"
-        )
-    )
-
-    # Optional arguments for the launch helper
-    parser.add_argument("--num_cores", type=int, default=1, help="Number of TPU cores to use (1 or 8).")
-
-    # positional
-    parser.add_argument(
-        "training_script",
-        type=str,
-        help=(
-            "The full path to the single TPU training "
-            "program/script to be launched in parallel, "
-            "followed by all the arguments for the "
-            "training script"
-        ),
-    )
-
-    # rest from the training program
-    parser.add_argument("training_script_args", nargs=REMAINDER)
-
-    return parser.parse_args()
-
-
-def main():
-    args = parse_args()
-
-    # Import training_script as a module.
-    script_fpath = Path(args.training_script)
-    sys.path.append(str(script_fpath.parent.resolve()))
-    mod_name = script_fpath.stem
-    mod = importlib.import_module(mod_name)
-
-    # Patch sys.argv
-    sys.argv = [args.training_script] + args.training_script_args + ["--tpu_num_cores", str(args.num_cores)]
-
-    xmp.spawn(mod._mp_fn, args=(), nprocs=args.num_cores)
-
-
-if __name__ == "__main__":
-    main()