123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377 |
- import os
- import json
- import shutil
- import random
- import click
- import yaml
- from shapely.geometry import Polygon
- from shapely.affinity import rotate
- from ImageElement import ImageElement
- def preprocessing_for_yolov8_obb_model(coco_json: str, lang_ru=False):
- """
- Checks for Oriented Bounding Boxes in COCO format. If found,
- replaces the bbox and rotation of each object with the coordinates of four points in the segmentation section.
-
- Args:
- - coco_json (str): Path to the file containing COCO data in JSON format.
- - lang_ru (bool): If True, all comments will be in Russian (otherwise in English).
- """
- # Loading COCO data from file
- with open(coco_json, 'r') as f:
- coco_data = json.load(f)
- # Getting the list of annotations from COCO
- annotations = coco_data['annotations']
- changes = 0
- # Iterating through the annotations
- for annotation in annotations:
- segmentation = annotation['segmentation']
- # If segmentation is empty and bbox contains information, perform the operation
- if not segmentation and annotation['bbox']:
- bbox = annotation['bbox']
- rotation_angle = annotation['attributes']['rotation'] # Assumes rotation information is available
- # Converting bbox to x, y, width, height format
- x, y, width, height = bbox
- # Creating a rotated rectangle
- rectangle = Polygon([(x, y), (x + width, y), (x + width, y + height), (x, y + height)])
- # Rotating the rectangle
- rotated_rectangle = rotate(rectangle, rotation_angle, origin='center')
- # Getting the coordinates of the vertices of the rotated rectangle
- new_segmentation = list(rotated_rectangle.exterior.coords)
- # Keeping only the vertex coordinates (first 4 elements)
- new_segmentation = new_segmentation[:4]
- # Converting the list of vertices into the desired format
- flattened_segmentation = [coord for point in new_segmentation for coord in point]
- # Updating the value in the annotation
- annotation['segmentation'] = [flattened_segmentation]
- changes += 1
- if changes > 0:
- if lang_ru:
- print(f'Было обнаружено {changes} Oriented Bounding Boxes в файле {coco_json}')
- else:
- print(f'Found {changes} Oriented Bounding Boxes in the file {coco_json}')
- # Saving the updated data to the file
- with open(coco_json, 'w') as f:
- json.dump(coco_data, f)
- @click.command()
- @click.option(
- "--coco_dataset",
- default="COCO_dataset",
- help="Folder with COCO 1.0 format dataset (can be exported from CVAT). Default is COCO_dataset",
- type=str,
- )
- @click.option(
- "--yolo_dataset",
- default="YOLO_dataset",
- help="Folder with the resulting YOLOv8 format dataset. Default is YOLO_dataset",
- type=str,
- )
- @click.option(
- "--print_info",
- default=False,
- help="Enable/Disable processing log output mode. Default is disabled",
- type=bool,
- )
- @click.option(
- "--autosplit",
- help="Enable/Disable automatic split into train/val. Default is disabled (uses the CVAT annotations)",
- default=False,
- type=bool,
- )
- @click.option(
- "--percent_val",
- help="Percentage of data for validation when using autosplit=True. Default is 25%",
- default=25,
- type=float,
- )
- @click.option(
- "--lang_ru",
- help="Sets the Russian language of comments, if selected value is True. English by default",
- default=False,
- type=bool,
- )
- def main(**kwargs):
- # ------------------ ARG parse ------------------
- coco_dataset_path = kwargs["coco_dataset"]
- yolo_dataset_path = kwargs["yolo_dataset"]
- print_info = kwargs["print_info"]
- autosplit = kwargs["autosplit"]
- percent_val = kwargs["percent_val"]
- lang_ru = kwargs["lang_ru"]
- coco_annotations_path = os.path.join(coco_dataset_path, 'annotations')
- coco_images_path = os.path.join(coco_dataset_path, 'images')
- # Check the presence of the dataset
- if not os.path.exists(coco_dataset_path):
- if lang_ru:
- raise FileNotFoundError(f"Папка с COCO датасетом '{coco_images_path}' не найдена.")
- else:
- raise FileNotFoundError(f"The COCO dataset folder '{coco_images_path}' was not found.")
- # Check the presence of the images folder
- if not os.path.exists(coco_images_path):
- if lang_ru:
- raise FileNotFoundError(f"Папка с изображениями '{coco_images_path}' не найдена. "
- f"Убедитесь, что вы загрузили разметку COCO так, чтобы имелась папка со всеми изображениями.")
- else:
- raise FileNotFoundError(f"The images folder '{coco_images_path}' was not found. "
- f"Make sure you have uploaded COCO annotations so that there is a folder with all images.")
- # Check if the annotations folder exists
- if not os.path.exists(coco_annotations_path):
- if lang_ru:
- raise FileNotFoundError(f"The folder with json files '{coco_annotations_path}' was not found.")
- else:
- raise FileNotFoundError(f"Папка с json файлами '{coco_annotations_path}' не найдена.")
- list_of_image_elements = []
- list_of_images_path = []
- # Get a list of all files in the annotations folder
- annotation_files = os.listdir(coco_annotations_path)
- shutil.rmtree(yolo_dataset_path, ignore_errors=True) # Clear old data in the folder
- if autosplit:
- for folder_path in ['images', 'labels']:
- for type in ['validation', 'train']:
- path_create=os.path.join(yolo_dataset_path, type, folder_path)
- os.makedirs(path_create, exist_ok=True)
- ### Check for duplicates in different subsets ###
- # Create a dictionary to store files and their corresponding JSON files
- file_json_mapping = {}
- # Iterate through annotation files
- for annotation_file in annotation_files:
- json_file_path = os.path.join(coco_annotations_path, annotation_file)
- with open(json_file_path, 'r') as f:
- coco_data = json.load(f)
- # Get the list of images from JSON
- images = coco_data['images']
- # Iterate through images and update the file_json_mapping dictionary
- for image in images:
- file_name = image['file_name']
- if file_name not in file_json_mapping:
- file_json_mapping[file_name] = [annotation_file]
- else:
- file_json_mapping[file_name].append(annotation_file)
- # Check if any file has more than one occurrence
- for file_name, json_files in file_json_mapping.items():
- if len(json_files) > 1:
- if lang_ru:
- print(f"Файл {file_name} встречается в следующих JSON файлах: {json_files}")
- print(f'В каком-либо из JSON файлов удалите в разделе "images" словарь ' \
- f'с описанием этой фотографии, иначе будет ошибка при выполнении кода')
- raise SystemExit
- else:
- print(f"The file {file_name} appears in the following JSON files: {json_files}")
- print(f"Remove the dictionary describing this photo from the 'images' section in " \
- f"one of the JSON files, otherwise there will be an error when running the code.")
- raise SystemExit
- ### Run the main code: ###
-
- # Iterate through annotation files
- for annotation_file in annotation_files:
- # Parse the image file name from the annotation file
- type_data = os.path.splitext(annotation_file)[0].split('_')[-1]
- json_file_path = os.path.join(coco_annotations_path, annotation_file) # path to the json file
- # Preprocessing for YOLOv8-obb
- preprocessing_for_yolov8_obb_model(coco_json=json_file_path, lang_ru=lang_ru)
- # Create folder if it doesn't exist
- if not autosplit:
- for folder_path in ['images', 'labels']:
- path_create=os.path.join(yolo_dataset_path, type_data.lower(), folder_path)
- os.makedirs(path_create, exist_ok=True)
- # Open coco json
- with open(json_file_path, 'r') as f:
- coco_data = json.load(f)
- # Get the list of images from JSON
- images = coco_data['images']
- # Create a dictionary with class information
- coco_categories = coco_data['categories']
- categories_dict = {category['id']-1: category['name'] for category in coco_categories}
- # Print information
- if print_info:
- if lang_ru:
- print(f'Осуществляется обработка {annotation_file}')
- print(f'Имеющиеся классы: {categories_dict}')
- else:
- print(f'Processing {annotation_file}')
- print(f'Available classes: {categories_dict}')
- print('-----------------\n')
- #### Additional check for the presence of all image files
- # Get the list of image files with annotations in COCO
- annotated_images = set([entry['file_name'] for entry in coco_data['images']])
- # Get the list of files in the images folder
- all_images = set(os.listdir(coco_images_path))
- # Check that all images from COCO are annotated
- if not annotated_images.issubset(all_images):
- missing_images = annotated_images - all_images
- if lang_ru:
- raise FileNotFoundError(f"Некоторые изображения, для которых есть разметка в {json_file_path}, отсутствуют в папке с изображениями. "
- f"Отсутствующие изображения: {missing_images}")
- else:
- raise FileNotFoundError(f"Some images annotated in {json_file_path} are missing from the images folder. "
- f"Missing images: {missing_images}")
-
- # Iterate through images and read annotations
- for image in images:
- image_id = image['id']
- file_name = image['file_name']
- path_image_initial = os.path.join(coco_images_path, file_name)
-
- # Find corresponding annotations for the image
- list_of_lists_annotations = [ann['segmentation'] for ann in coco_data['annotations'] if ann['image_id'] == image_id]
- try:
- annotations = [sublist[0] for sublist in list_of_lists_annotations]
- except:
- if lang_ru:
- print(f"В разметке фотографии {file_name} имеются объекты, не являющиеся полигонами. "\
- f"\nНеобходимо, чтобы все объекты для обучения YOLOv8-seg были размечены как полигоны! "\
- f"\nИсправьте это и заново выгрузите датасет.")
- else:
- print(f"The annotations for the image {file_name} contain objects that are not polygons. "\
- f"\nAll objects for training YOLOv8-seg must be annotated as polygons! "\
- f"\nPlease correct this and reload the dataset.")
- raise SystemExit
-
- classes = [ann['category_id']-1 for ann in coco_data['annotations'] if ann['image_id'] == image_id]
-
- if autosplit:
- # Generate a random number from 1 to 100
- random_number = random.randint(1, 100)
- # If the random number <= percent_val, then type_dataset = "validation", otherwise "train"
- type_dataset = "validation" if random_number <= percent_val else "train"
- else:
- type_dataset = type_data.lower()
- # Create an instance of the ImageElement class:
- element = ImageElement(
- path_image_initial=path_image_initial,
- path_label_initial=json_file_path,
- img_width=image['width'],
- img_height=image['height'],
- image_id=image_id,
- type_data=type_dataset,
- path_label_final=os.path.join(yolo_dataset_path, type_dataset,
- 'labels', os.path.splitext(file_name)[0]+'.txt'),
- path_image_final=os.path.join(yolo_dataset_path, type_dataset,
- 'images', file_name),
- classes_names=[categories_dict[cl] for cl in classes],
- classes_ids=classes,
- point_list=annotations,
- )
- list_of_image_elements.append(element)
- list_of_images_path.append(file_name)
- # Print information about ImageElement if necessary
- if print_info:
- print(element)
- ### Check for the presence of all images in the images folder
- # Get the list of files in the folder
- files_in_folder = set(os.listdir(coco_images_path))
- # Check that all files from the list are present in the folder
- missing_files = set(list_of_images_path) - files_in_folder
- extra_files = files_in_folder - set(list_of_images_path)
- # Display notification
- if missing_files:
- if lang_ru:
- print(f"Отсутствующие файлы в папке {coco_images_path}: {missing_files}")
- else:
- print(f"Missing files in the folder {coco_images_path}: {missing_files}")
- if extra_files:
- if lang_ru:
- print(f"Лишние файлы в папке {coco_images_path}: {extra_files}")
- else:
- print(f"Extra files in the folder {coco_images_path}: {extra_files}")
- # Creating data.yaml configuration:
- # Create a data structure for writing to data.yaml
- data_dict = {
- 'names': list(categories_dict.values()),
- 'nc': len(categories_dict),
- 'test': 'test/images',
- 'train': 'train/images',
- 'val': 'validation/images'
- }
- if autosplit:
- data_dict['test'] = 'validation/images'
- # Path to the data.yaml file
- data_yaml_path = f"{yolo_dataset_path}/data.yaml"
- # Write data to the data.yaml file
- with open(data_yaml_path, 'w') as file:
- yaml.dump(data_dict, file, default_flow_style=False)
- # Creating labels and copying images to folders:
- for element in list_of_image_elements:
- # Copying the image
- shutil.copy(element.path_image_initial, element.path_image_final)
- # Creating a YOLO annotation file
- with open(element.path_label_final, 'w') as yolo_label_file:
- for i in range(len(element.classes_ids)):
- class_id = element.classes_ids[i]
- class_name = element.classes_names[i]
- points = element.point_list[i]
- output_string = f'{class_id}'
- for i, point in enumerate(points):
- if i % 2 == 0:
- result = round(point / element.img_width, 9)
- else:
- result = round(point / element.img_height, 9)
- output_string += f' {result:.6f}'
- # Writing data to the file
- yolo_label_file.write(output_string+'\n')
-
- if lang_ru:
- print(f"Итоговая разметка в формате YOLOv8 расположена в папке - {yolo_dataset_path}.")
- else:
- print(f"The final YOLOv8 format annotations are located in the folder - {yolo_dataset_path}.")
- if __name__ == "__main__":
- main()
|