From de8bcbf2782d72356e67e4f9d96fba995ca3aa0a Mon Sep 17 00:00:00 2001 From: Yahya Emara <49173233+yahya010@users.noreply.github.com> Date: Sun, 21 Jul 2024 23:36:12 +0200 Subject: [PATCH 1/5] Added pseudolabel_frames.py --- fourm/pseudolabel_frames.py | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 fourm/pseudolabel_frames.py diff --git a/fourm/pseudolabel_frames.py b/fourm/pseudolabel_frames.py new file mode 100644 index 0000000..8d66192 --- /dev/null +++ b/fourm/pseudolabel_frames.py @@ -0,0 +1,47 @@ +import tarfile +import os +import cv2 +from ultralytics import YOLO + +SHARDS = "/cluster/work/cotterell/mm_swissai/datasets/hdvila/1000_hd_vila_shuffled/0000000000.tar" +OUTPUT_DIR = "bbox-yolo/extracted_frames" +LABELED_OUTPUT_DIR = "bbox-yolo/labeled_frames" + +# Ensure output directories exist +os.makedirs(OUTPUT_DIR, exist_ok=True) +os.makedirs(LABELED_OUTPUT_DIR, exist_ok=True) + +# Load the YOLO model +model = YOLO('bbox-yolo/yolov8n.pt') # pretrained YOLOv8n model + +# Extract the tar file +with tarfile.open(SHARDS, "r") as tar: + tar.extractall(path="bbox-yolo/extracted_files") + +# Iterate through extracted files +for root, dirs, files in os.walk("bbox-yolo/extracted_files"): + for file in files: + if file.endswith(".mp4"): + video_path = os.path.join(root, file) + video = cv2.VideoCapture(video_path) + + frame_paths = [] + for frame_count in range(3): # Only process the first 3 frames to test on euler + success, frame = video.read() + if not success: + break + + # Save the frame as an image + frame_path = os.path.join(OUTPUT_DIR, f"{file[:-4]}_frame_{frame_count}.jpg") + cv2.imwrite(frame_path, frame) + frame_paths.append(frame_path) + + video.release() + + # Apply pseudolabeling to the extracted frames + results = model(frame_paths, project=LABELED_OUTPUT_DIR, name=file[:-4]) + + for i, result in enumerate(results): + result.save(filename=f'{file[:-4]}_labeled_frame_{i}.jpg') + +print("Frame extraction and pseudolabeling complete.") From b9d68a980901c5bf13fcc1e22cc6618c71518c4e Mon Sep 17 00:00:00 2001 From: Yahya Emara <49173233+yahya010@users.noreply.github.com> Date: Sun, 28 Jul 2024 14:07:06 +0200 Subject: [PATCH 2/5] Added configurability and json output for pseudolabel_frames.py --- fourm/pseudolabel_frames.py | 70 +++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/fourm/pseudolabel_frames.py b/fourm/pseudolabel_frames.py index 8d66192..27b56b5 100644 --- a/fourm/pseudolabel_frames.py +++ b/fourm/pseudolabel_frames.py @@ -2,46 +2,88 @@ import os import cv2 from ultralytics import YOLO +import argparse +import json -SHARDS = "/cluster/work/cotterell/mm_swissai/datasets/hdvila/1000_hd_vila_shuffled/0000000000.tar" -OUTPUT_DIR = "bbox-yolo/extracted_frames" -LABELED_OUTPUT_DIR = "bbox-yolo/labeled_frames" +# Set up argument parser +parser = argparse.ArgumentParser(description='Process video frames with YOLO') +parser.add_argument('--shards', type=str, required=True, help='Path to the tar file containing video shards') +parser.add_argument('--nth_frame', type=int, default=30, help='Select every nth frame (default: 30)') +parser.add_argument('--max_frames', type=int, default=None, help='Maximum number of frames to process (default: None, process all)') +args = parser.parse_args() + +SHARDS = args.shards +NTH_FRAME = args.nth_frame +MAX_FRAMES = args.max_frames +OUTPUT_DIR = "extracted_frames" +LABELED_OUTPUT_DIR = "labeled_frames" +JSON_OUTPUT_DIR = "json_output" # Ensure output directories exist os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(LABELED_OUTPUT_DIR, exist_ok=True) +os.makedirs(JSON_OUTPUT_DIR, exist_ok=True) # Load the YOLO model -model = YOLO('bbox-yolo/yolov8n.pt') # pretrained YOLOv8n model +model = YOLO('/cluster/work/cotterell/yemara/ml-4m/bbox-yolo/yolov8n.pt') # pretrained YOLOv8n model # Extract the tar file with tarfile.open(SHARDS, "r") as tar: - tar.extractall(path="bbox-yolo/extracted_files") + tar.extractall(path="extracted_files") # Iterate through extracted files -for root, dirs, files in os.walk("bbox-yolo/extracted_files"): +for root, dirs, files in os.walk("extracted_files"): for file in files: if file.endswith(".mp4"): video_path = os.path.join(root, file) video = cv2.VideoCapture(video_path) - frame_paths = [] - for frame_count in range(3): # Only process the first 3 frames to test on euler + frame_count = 0 + processed_frames = 0 + + while True: success, frame = video.read() if not success: break - # Save the frame as an image - frame_path = os.path.join(OUTPUT_DIR, f"{file[:-4]}_frame_{frame_count}.jpg") - cv2.imwrite(frame_path, frame) - frame_paths.append(frame_path) - + if frame_count % NTH_FRAME == 0: + # Save the frame as an image + frame_path = os.path.join(OUTPUT_DIR, f"{file[:-4]}_frame_{frame_count}.jpg") + cv2.imwrite(frame_path, frame) + frame_paths.append(frame_path) + processed_frames += 1 + + frame_count += 1 + + if MAX_FRAMES and processed_frames >= MAX_FRAMES: + break + video.release() # Apply pseudolabeling to the extracted frames results = model(frame_paths, project=LABELED_OUTPUT_DIR, name=file[:-4]) for i, result in enumerate(results): + # Save labeled image result.save(filename=f'{file[:-4]}_labeled_frame_{i}.jpg') + + # Extract bounding box information + boxes = result.boxes + json_data = [] + for box in boxes: + xyxy = box.xyxy[0].tolist() # get box coordinates + conf = box.conf.item() # get confidence score + cls = int(box.cls.item()) # get class id + json_data.append({ + "bbox": xyxy, + "confidence": conf, + "class": cls, + "class_name": result.names[cls] + }) + + # Save JSON file + json_filename = os.path.join(JSON_OUTPUT_DIR, f"{file[:-4]}_frame_{i}_boxes.json") + with open(json_filename, 'w') as f: + json.dump(json_data, f, indent=4) -print("Frame extraction and pseudolabeling complete.") +print("Frame extraction, pseudolabeling, and JSON export complete.") From 3420f893b3219f8c9c0c0a080b62b697b993d209 Mon Sep 17 00:00:00 2001 From: Yahya Emara <49173233+yahya010@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:42:34 +0200 Subject: [PATCH 3/5] Added saving each video to Jsonl --- fourm/pseudolabel_frames.py | 116 ++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 52 deletions(-) diff --git a/fourm/pseudolabel_frames.py b/fourm/pseudolabel_frames.py index 27b56b5..3be80e0 100644 --- a/fourm/pseudolabel_frames.py +++ b/fourm/pseudolabel_frames.py @@ -4,6 +4,7 @@ from ultralytics import YOLO import argparse import json +import jsonlines # Set up argument parser parser = argparse.ArgumentParser(description='Process video frames with YOLO') @@ -17,7 +18,7 @@ MAX_FRAMES = args.max_frames OUTPUT_DIR = "extracted_frames" LABELED_OUTPUT_DIR = "labeled_frames" -JSON_OUTPUT_DIR = "json_output" +JSON_OUTPUT_DIR = "output" # Ensure output directories exist os.makedirs(OUTPUT_DIR, exist_ok=True) @@ -31,59 +32,70 @@ with tarfile.open(SHARDS, "r") as tar: tar.extractall(path="extracted_files") -# Iterate through extracted files -for root, dirs, files in os.walk("extracted_files"): - for file in files: - if file.endswith(".mp4"): - video_path = os.path.join(root, file) - video = cv2.VideoCapture(video_path) - frame_paths = [] - frame_count = 0 - processed_frames = 0 +# Get the shard number from the input file name +shard_number = os.path.splitext(os.path.basename(SHARDS))[0] - while True: - success, frame = video.read() - if not success: - break - - if frame_count % NTH_FRAME == 0: - # Save the frame as an image - frame_path = os.path.join(OUTPUT_DIR, f"{file[:-4]}_frame_{frame_count}.jpg") - cv2.imwrite(frame_path, frame) - frame_paths.append(frame_path) - processed_frames += 1 - - frame_count += 1 +# Create the output tar file +output_tar_path = os.path.join(JSON_OUTPUT_DIR, f"{shard_number}.tar") +with tarfile.open(output_tar_path, "w") as output_tar: + # Iterate through extracted files + for root, dirs, files in os.walk("extracted_files"): + for file in files: + if file.endswith(".mp4"): + video_path = os.path.join(root, file) + video = cv2.VideoCapture(video_path) + frame_paths = [] + frame_count = 0 + processed_frames = 0 + json_data_list = [] - if MAX_FRAMES and processed_frames >= MAX_FRAMES: - break + while True: + success, frame = video.read() + if not success: + break + if frame_count % NTH_FRAME == 0: + # Save the frame as an image + frame_path = os.path.join(OUTPUT_DIR, f"{file[:-4]}_frame_{frame_count}.jpg") + cv2.imwrite(frame_path, frame) + frame_paths.append(frame_path) + processed_frames += 1 + frame_count += 1 + if MAX_FRAMES and processed_frames >= MAX_FRAMES: + break + video.release() - video.release() - - # Apply pseudolabeling to the extracted frames - results = model(frame_paths, project=LABELED_OUTPUT_DIR, name=file[:-4]) - - for i, result in enumerate(results): - # Save labeled image - result.save(filename=f'{file[:-4]}_labeled_frame_{i}.jpg') - - # Extract bounding box information - boxes = result.boxes - json_data = [] - for box in boxes: - xyxy = box.xyxy[0].tolist() # get box coordinates - conf = box.conf.item() # get confidence score - cls = int(box.cls.item()) # get class id - json_data.append({ - "bbox": xyxy, - "confidence": conf, - "class": cls, - "class_name": result.names[cls] - }) + # Apply pseudolabeling to the extracted frames + results = model(frame_paths, project=LABELED_OUTPUT_DIR, name=file[:-4]) - # Save JSON file - json_filename = os.path.join(JSON_OUTPUT_DIR, f"{file[:-4]}_frame_{i}_boxes.json") - with open(json_filename, 'w') as f: - json.dump(json_data, f, indent=4) + for i, result in enumerate(results): + # Save labeled image + result.save(filename=f'{file[:-4]}_labeled_frame_{i}.jpg') + + # Extract bounding box information + boxes = result.boxes + frame_data = [] + for box in boxes: + xyxy = box.xyxy[0].tolist() # get box coordinates + conf = box.conf.item() # get confidence score + cls = int(box.cls.item()) # get class id + frame_data.append({ + "bbox": xyxy, + "confidence": conf, + "class": cls, + "class_name": result.names[cls] + }) + json_data_list.append(frame_data) + + # Save JSONL file + jsonl_filename = f"{file[:-4]}.jsonl" + jsonl_path = os.path.join(JSON_OUTPUT_DIR, jsonl_filename) + with jsonlines.open(jsonl_path, mode='w') as writer: + writer.write_all(json_data_list) + + # Add JSONL file to the tar archive + output_tar.add(jsonl_path, arcname=jsonl_filename) + + # Remove the temporary JSONL file + os.remove(jsonl_path) -print("Frame extraction, pseudolabeling, and JSON export complete.") +print("Frame extraction, pseudolabeling, and JSONL export complete.") From e50fa4dc68e20da71a8f91a2c020fdfd8ad5ff6a Mon Sep 17 00:00:00 2001 From: Kevin Du Date: Fri, 2 Aug 2024 02:06:01 +0200 Subject: [PATCH 4/5] Add more configuration args to pseudolabel frames, rename file, make it take in a dir of tars, move things into tempdirs --- fourm/pseudolabel_frames.py | 101 -------------------- pseudolabeling/pseudolabel_video_det.py | 117 ++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 101 deletions(-) delete mode 100644 fourm/pseudolabel_frames.py create mode 100644 pseudolabeling/pseudolabel_video_det.py diff --git a/fourm/pseudolabel_frames.py b/fourm/pseudolabel_frames.py deleted file mode 100644 index 3be80e0..0000000 --- a/fourm/pseudolabel_frames.py +++ /dev/null @@ -1,101 +0,0 @@ -import tarfile -import os -import cv2 -from ultralytics import YOLO -import argparse -import json -import jsonlines - -# Set up argument parser -parser = argparse.ArgumentParser(description='Process video frames with YOLO') -parser.add_argument('--shards', type=str, required=True, help='Path to the tar file containing video shards') -parser.add_argument('--nth_frame', type=int, default=30, help='Select every nth frame (default: 30)') -parser.add_argument('--max_frames', type=int, default=None, help='Maximum number of frames to process (default: None, process all)') -args = parser.parse_args() - -SHARDS = args.shards -NTH_FRAME = args.nth_frame -MAX_FRAMES = args.max_frames -OUTPUT_DIR = "extracted_frames" -LABELED_OUTPUT_DIR = "labeled_frames" -JSON_OUTPUT_DIR = "output" - -# Ensure output directories exist -os.makedirs(OUTPUT_DIR, exist_ok=True) -os.makedirs(LABELED_OUTPUT_DIR, exist_ok=True) -os.makedirs(JSON_OUTPUT_DIR, exist_ok=True) - -# Load the YOLO model -model = YOLO('/cluster/work/cotterell/yemara/ml-4m/bbox-yolo/yolov8n.pt') # pretrained YOLOv8n model - -# Extract the tar file -with tarfile.open(SHARDS, "r") as tar: - tar.extractall(path="extracted_files") - -# Get the shard number from the input file name -shard_number = os.path.splitext(os.path.basename(SHARDS))[0] - -# Create the output tar file -output_tar_path = os.path.join(JSON_OUTPUT_DIR, f"{shard_number}.tar") -with tarfile.open(output_tar_path, "w") as output_tar: - # Iterate through extracted files - for root, dirs, files in os.walk("extracted_files"): - for file in files: - if file.endswith(".mp4"): - video_path = os.path.join(root, file) - video = cv2.VideoCapture(video_path) - frame_paths = [] - frame_count = 0 - processed_frames = 0 - json_data_list = [] - - while True: - success, frame = video.read() - if not success: - break - if frame_count % NTH_FRAME == 0: - # Save the frame as an image - frame_path = os.path.join(OUTPUT_DIR, f"{file[:-4]}_frame_{frame_count}.jpg") - cv2.imwrite(frame_path, frame) - frame_paths.append(frame_path) - processed_frames += 1 - frame_count += 1 - if MAX_FRAMES and processed_frames >= MAX_FRAMES: - break - video.release() - - # Apply pseudolabeling to the extracted frames - results = model(frame_paths, project=LABELED_OUTPUT_DIR, name=file[:-4]) - - for i, result in enumerate(results): - # Save labeled image - result.save(filename=f'{file[:-4]}_labeled_frame_{i}.jpg') - - # Extract bounding box information - boxes = result.boxes - frame_data = [] - for box in boxes: - xyxy = box.xyxy[0].tolist() # get box coordinates - conf = box.conf.item() # get confidence score - cls = int(box.cls.item()) # get class id - frame_data.append({ - "bbox": xyxy, - "confidence": conf, - "class": cls, - "class_name": result.names[cls] - }) - json_data_list.append(frame_data) - - # Save JSONL file - jsonl_filename = f"{file[:-4]}.jsonl" - jsonl_path = os.path.join(JSON_OUTPUT_DIR, jsonl_filename) - with jsonlines.open(jsonl_path, mode='w') as writer: - writer.write_all(json_data_list) - - # Add JSONL file to the tar archive - output_tar.add(jsonl_path, arcname=jsonl_filename) - - # Remove the temporary JSONL file - os.remove(jsonl_path) - -print("Frame extraction, pseudolabeling, and JSONL export complete.") diff --git a/pseudolabeling/pseudolabel_video_det.py b/pseudolabeling/pseudolabel_video_det.py new file mode 100644 index 0000000..c8bf9b1 --- /dev/null +++ b/pseudolabeling/pseudolabel_video_det.py @@ -0,0 +1,117 @@ +import tarfile +import os +import cv2 +from ultralytics import YOLO +import argparse +import json +import jsonlines +from pathlib import Path +import tempfile + + +# Set up argument parser +parser = argparse.ArgumentParser(description='Process video frames with YOLO') +parser.add_argument('--source_dir', type=str, required=True, help='Path to the source dir containing tar files of video shards') +parser.add_argument('--yolo_path', type=str, default="/store/swissai/a08/pseudolabelers/yolov8n.pt", help='Path to the YOLO model') +parser.add_argument('--nth_frame', type=int, default=30, help='Select every nth frame (default: 30)') +parser.add_argument('--max_frames', type=int, default=None, help='Maximum number of frames to process (default: None, process all)') +parser.add_argument('--save_frames', type=bool, default=False, help='Whether to save frames') +args = parser.parse_args() + +SOURCE_DIR = args.source_dir +NTH_FRAME = args.nth_frame +MAX_FRAMES = args.max_frames +SAVE_FRAMES = args.save_frames +JSON_OUTPUT_DIR = Path(SOURCE_DIR).parent.absolute() / "video_det/" + +# Ensure output directories exist +os.makedirs(JSON_OUTPUT_DIR, exist_ok=True) + +# Load the YOLO model +model = YOLO(args.yolo_path) # pretrained YOLOv8n model + +for tfile in sorted(os.listdir(SOURCE_DIR)): + print(tfile) + if tfile.endswith(".tar"): + # Get the shard number from the input file name and create the output tar file + shard_number = os.path.splitext(os.path.basename(tfile))[0] + output_tar_path = os.path.join(JSON_OUTPUT_DIR, f"{shard_number}.tar") + + # Extract the tar file + with tempfile.TemporaryDirectory() as tmpdirname: + print('created temporary directory', tmpdirname) + output_dir = os.path.join(tmpdirname, "extracted_frames") + labeled_output_dir = os.path.join(tmpdirname, "labeled_frames") + + os.makedirs(output_dir, exist_ok=True) + os.makedirs(labeled_output_dir, exist_ok=True) + + with tarfile.open(os.path.join(SOURCE_DIR, tfile), "r") as tar: + tar.extractall(path=tmpdirname, numeric_owner=True) + + + + with tarfile.open(output_tar_path, "w") as output_tar: + # Iterate through extracted files + for root, dirs, files in sorted(os.walk(tmpdirname)): + for file in files: + if file.endswith(".mp4"): + video_path = os.path.join(root, file) + video = cv2.VideoCapture(video_path) + frame_paths = [] + frame_count = 0 + processed_frames = 0 + json_data_list = [] + + while True: + success, frame = video.read() + if not success: + break + if frame_count % NTH_FRAME == 0: + # Save the frame as an image + frame_path = os.path.join(output_dir, f"{file[:-4]}_frame_{frame_count}.jpg") + cv2.imwrite(frame_path, frame) + frame_paths.append(frame_path) + processed_frames += 1 + frame_count += 1 + if MAX_FRAMES and processed_frames >= MAX_FRAMES: + break + video.release() + + # Apply pseudolabeling to the extracted frames + import pdb; pdb.set_trace() + results = model(frame_paths, project=labeled_output_dir, name=file[:-4]) + + for i, result in enumerate(results): + # Save labeled image + if SAVE_FRAMES: + result.save(filename=f'{file[:-4]}_labeled_frame_{i}.jpg') + + # Extract bounding box information + boxes = result.boxes + frame_data = [] + for box in boxes: + xyxy = box.xyxy[0].tolist() # get box coordinates + conf = box.conf.item() # get confidence score + cls = int(box.cls.item()) # get class id + frame_data.append({ + "bbox": xyxy, + "confidence": conf, + "class": cls, + "class_name": result.names[cls] + }) + json_data_list.append(frame_data) + + # Save JSONL file + jsonl_filename = f"{file[:-4]}.jsonl" + jsonl_path = os.path.join(JSON_OUTPUT_DIR, jsonl_filename) + with jsonlines.open(jsonl_path, mode='w') as writer: + writer.write_all(json_data_list) + + # Add JSONL file to the tar archive + output_tar.add(jsonl_path, arcname=jsonl_filename) + + # Remove the temporary JSONL file + os.remove(jsonl_path) + +print("Frame extraction, pseudolabeling, and JSONL export complete.") From dab7df151642dd13c89848cf96ba7eb70e3adfba Mon Sep 17 00:00:00 2001 From: Kevin Du Date: Mon, 5 Aug 2024 02:12:55 +0200 Subject: [PATCH 5/5] Make video_det pseudolabeler take in a filtered_raw/ dir as input --- pseudolabeling/pseudolabel_video_det.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pseudolabeling/pseudolabel_video_det.py b/pseudolabeling/pseudolabel_video_det.py index c8bf9b1..060a97b 100644 --- a/pseudolabeling/pseudolabel_video_det.py +++ b/pseudolabeling/pseudolabel_video_det.py @@ -11,18 +11,26 @@ # Set up argument parser parser = argparse.ArgumentParser(description='Process video frames with YOLO') -parser.add_argument('--source_dir', type=str, required=True, help='Path to the source dir containing tar files of video shards') -parser.add_argument('--yolo_path', type=str, default="/store/swissai/a08/pseudolabelers/yolov8n.pt", help='Path to the YOLO model') +parser.add_argument("-I", '--input_dir', type=str, required=True, help='Path to the source dir containing tar files of video shards. Should be a subdir of `filtered_raw/`.') +parser.add_argument("-O", '--output_dir', type=str, default=None, help='Path to the target dir to save the bounding box outputs. Default None means it will be inferred.') +parser.add_argument("-M", '--yolo_path', type=str, default="/store/swissai/a08/pseudolabelers/yolov8n.pt", help='Path to the YOLO model') parser.add_argument('--nth_frame', type=int, default=30, help='Select every nth frame (default: 30)') parser.add_argument('--max_frames', type=int, default=None, help='Maximum number of frames to process (default: None, process all)') parser.add_argument('--save_frames', type=bool, default=False, help='Whether to save frames') args = parser.parse_args() -SOURCE_DIR = args.source_dir +if "filtered_raw" not in args.input_dir: + raise ValueError(f"Expected input dir to be a subdir of `filtered_raw/`, instead received {args.input_dir}.") + +SOURCE_DIR = args.input_dir NTH_FRAME = args.nth_frame MAX_FRAMES = args.max_frames SAVE_FRAMES = args.save_frames -JSON_OUTPUT_DIR = Path(SOURCE_DIR).parent.absolute() / "video_det/" +JSON_OUTPUT_DIR = ( + args.output_dir + if args.output_dir is not None + else os.path.join(args.input_dir.replace("filtered_raw", "4m"), "video_det") +) # Ensure output directories exist os.makedirs(JSON_OUTPUT_DIR, exist_ok=True)