唐山网站关键词优化,网络营销的形式网站营销,郑州网站建设亻汉狮网络,hui怎么做网站在学习工作的过程中#xff0c;有时会需要自己新建数据集#xff0c;向训练数据中添加新的数据#xff0c;存在已有模型对新数据进行检测#xff0c;得到yolov5对应的txt文件#xff0c;之后转成xml#xff0c;使用标注工具对数据进行校正。后续将xml转成yolov5训练使用的…在学习工作的过程中有时会需要自己新建数据集向训练数据中添加新的数据存在已有模型对新数据进行检测得到yolov5对应的txt文件之后转成xml使用标注工具对数据进行校正。后续将xml转成yolov5训练使用的txt格式。 以下是使用多进程和多线程两种方式实现的txt和xml互转的代码。
xml-txt
# -*- coding: UTF-8 -*-Project yolov5_relu_fire_smoke_v1.3
IDE PyCharm
Author mufeng
Date 2024/2/22 15:58将xml转为yolo训练使用的txt格式
xml保存时使用的是[x1,y1,x2,y2]坐标格式
yolo训练使用的是[xn,yn,wn,hn]坐标格式
data_root|----annotations|----images|----labelsimport os
import multiprocessing
from concurrent import futures
from typing import List, Tuple
from copy import deepcopyimport xml.etree.ElementTree as ET
import cv2
import numpy as npdef decodeVocAnnotation(voc_xml_path, class_index_dict):voc数据集格式的文件解析将一个文件解析成一个list使用空格间隔不同对象注意:返回的类别不是整型而是字符串的类别名称注意判断返回值是否为 空如果是空说明没有目标是一张背景图:param voc_xml_path: xml路径:param class_index_dict: 类别字典:return:assert voc_xml_path.endswith(.xml), voc_xml_path must endswith .xmlxml_file open(voc_xml_path, r, encodingutf-8)# 打开xml文件并返回根节点root ET.ElementTree().parse(xml_file)# 定义一个列表专门保存目标information []# 查找root节点下所有目标信息for obj in root.iter(object):# 目标的名称name obj.find(name).text# 目标的bbox坐标一般voc是保存的corner格式的bboxbox obj.find(bndbox)xmin box.find(xmin).textymin box.find(ymin).textxmax box.find(xmax).textymax box.find(ymax).text# 添加一个目标的信息# NOTE:返回值的listinformation.append((class_index_dict[name], int(xmin), int(ymin), int(xmax), int(ymax)))xml_file.close()return informationdef xyxy2xywh(matrix)::param matrix: np矩阵, x1, y1, x2, y2:return:# 确保输入矩阵的形状为 (n, 4)其中 n 是矩阵中矩形的数量if matrix.ndim 2 or matrix.shape[1] ! 4:raise ValueError(Input matrix must have shape (n, 4))# 计算中心点坐标center_x (matrix[:, 0] matrix[:, 2]) / 2center_y (matrix[:, 1] matrix[:, 3]) / 2# 计算宽度和高度width np.abs(matrix[:, 2] - matrix[:, 0])height np.abs(matrix[:, 3] - matrix[:, 1])# 返回结果组合为 (center_x, center_y, width, height) 形式return np.column_stack((center_x, center_y, width, height))def run_thread(root, file, class_index_dict)::param root::param file: 图片路径:param class_index_dict::return:image_name, suffix os.path.splitext(file)image_path os.path.join(root, file)xml_path image_path.replace(images, annotations).replace(suffix, .xml)txt_path image_path.replace(images, labels).replace(suffix, .txt)if os.path.exists(xml_path):# cls_index, x1, y1, x2, y2bbox decodeVocAnnotation(xml_path, class_index_dict)bbox np.array(bbox, dtypenp.float32)else:bbox np.zeros(shape(0, 5), dtypenp.float32)returnif len(bbox) 0:bbox np.zeros(shape(0, 5), dtypenp.float32)returnimage cv2.imread(image_path)if image is None:print(f\n\033[31m{image_path} is None\033[0m)returnelse:print(f\r\033[32m{image_path}\033[0m, end)imh, imw image.shape[:2]# # 画框,视为了检查框是否正确# for cls_id, xmin, ymin, xmax, ymax in np.array(bbox.copy(), dtypenp.int32):# cv2.putText(image, textf{cls_id}, org(xmin, ymin),# fontScale2, fontFace1, color(0, 255, 0), thickness1)# cv2.rectangle(image, pt1(xmin, ymin), pt2(xmax, ymax), color(0, 255, 0), thickness2)# cv2.imwrite(os.path.join(data_root, temp, image_file), image)# 坐标转换 xyxy - xywhbbox[:, 1:] xyxy2xywh(bbox[:, 1:])# 归一化bbox[..., [1, 3]] / imwbbox[..., [2, 4]] / imhos.makedirs(os.path.dirname(txt_path), exist_okTrue)# 保存结果with open(txt_path, w, encodingutf-8) as wFile:for cls_id, x, y, w, h in bbox:wFile.write(f{int(cls_id)} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n)def run_process(root_file, class_index_dict)::param root_file: [(root, file), ...] 因为进程的创建花费时间长所以一个进程处理多个图片:param class_index_dict::return:for root, file in root_file:image_name, suffix os.path.splitext(file)image_path os.path.join(root, file)xml_path image_path.replace(images, annotations).replace(suffix, .xml)txt_path image_path.replace(images, labels).replace(suffix, .txt)if os.path.exists(xml_path):# cls_index, x1, y1, x2, y2bbox decodeVocAnnotation(xml_path, class_index_dict)bbox np.array(bbox, dtypenp.float32)else:bbox np.zeros(shape(0, 5), dtypenp.float32)continueif len(bbox) 0:bbox np.zeros(shape(0, 5), dtypenp.float32)continueimage cv2.imread(image_path)if image is None:print(f\n\033[31m{image_path} is None\033[0m)continueelse:print(f\r\033[32m{image_path}\033[0m, end)imh, imw image.shape[:2]# # 画框,视为了检查框是否正确# for cls_id, xmin, ymin, xmax, ymax in np.array(bbox.copy(), dtypenp.int32):# cv2.putText(image, textf{cls_id}, org(xmin, ymin),# fontScale2, fontFace1, color(0, 255, 0), thickness1)# cv2.rectangle(image, pt1(xmin, ymin), pt2(xmax, ymax), color(0, 255, 0), thickness2)# cv2.imwrite(os.path.join(data_root, temp, image_file), image)# 坐标转换 xyxy - xywhbbox[:, 1:] xyxy2xywh(bbox[:, 1:])# 归一化bbox[..., [1, 3]] / imwbbox[..., [2, 4]] / imhos.makedirs(os.path.dirname(txt_path), exist_okTrue)# 保存结果with open(txt_path, w, encodingutf-8) as wFile:for cls_id, x, y, w, h in bbox:wFile.write(f{int(cls_id)} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n)if __name__ __main__:# 需要已知目标的名称和类别索引class_index_dict {fire: 0,smoke: 1,}# data_root rZ:\Datasets\Detection\FireSmoke\TSMFireSmoke# data_root rZ:\Datasets\Detection\FireSmoke\TSMCandle# data_root rZ:\Datasets\FireSmoke_v4data_root rE:\CodeFiles\pycharm\YOLO\yolov5\my_test\data# data_root rZ:\Datasets\Detection\FireSmoke\candle-testdata_root os.path.abspath(data_root)# 需要跳过的目录exclude_dirs [rbackground,]# NOTE:多线程/多进程 程序不好调试将线程池/进程池 中的数量改为1可以调试程序max_workers 6 # 线程/进程 数# 使用的类型# run_type thread # 多线程run_type process # 多进程print(frunning use run_type{run_type}, max_workers:{max_workers})if run_type thread:# 使用线程池控制程序执行with futures.ThreadPoolExecutor(max_workersmax_workers) as executor:for root, _, files in os.walk(os.path.join(data_root, images)):# 需要排除的目录if any(exclude_dir in root for exclude_dir in exclude_dirs):continuefor file in files:# 向线程池中提交任务向线程池中提交任务的时候是一个一个提交的executor.submit(run_thread, *(root, file, class_index_dict))print(\nFinish ...)elif run_type process:# 一个进程处理多少图片max_file_num 1000# 保存root和file的listroot_file_list: List[Tuple] list()# 创建进程池根据自己的设备自行调整别太多否则会变慢pool multiprocessing.Pool(processesmax_workers)# for image_file in os.listdir(os.path.join(data_root, images, sub_dir)):for root, _, files in os.walk(os.path.join(data_root, images)):# 需要排除的目录if any(exclude_dir in root for exclude_dir in exclude_dirs):continuefor file in files:root_file_list.append((root, file))if len(root_file_list) max_file_num:# 启动一个进程开始处理当前list中的信息使用deepcopy是为了防止下面清除list后导致进程崩溃pool.apply_async(run_process, (deepcopy(root_file_list), class_index_dict))# 清除列表中的存储root_file_list.clear()else:# for循环正常结束的话如果剩下的文件数量不足max_file_num上面不会启动新的进程# 所以为了防止丢掉信息在for循环正常结束之后丢掉信息就将root_file_list中的信息处理掉# 启动一个进程开始处理当前list中的信息pool.apply_async(run_process, (deepcopy(root_file_list), class_index_dict))# 清除列表中的存储root_file_list.clear()# 关闭进程池pool.close()# 等待所有子进程执行结束pool.join()print(\nFinish ...)else:print(run_type should be thread or process.)
txt-xml
#!/usr/bin/env python
# -*- coding: UTF-8 -*-Project TestCode
IDE PyCharm
Author mufeng
Date 2023/7/21 17:15yolov5检测出来的目标结果转成xml
xml保存时使用的是[x1,y1,x2,y2]坐标格式
yolo检测结果保存使用的是[xn,yn,wn,hn]坐标格式
如果保存txt保存了置信度则txt每一行是[class_index, xn, yn, wn, hn, conf]
使用线程池实现data_root|----annotations|----images|----labelsimport os
import multiprocessing
from concurrent import futures
from typing import List, Tuple
from copy import deepcopyimport cv2
import numpy as np
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidomdef create_voc_xml(image_folder, image_filename, width: int, height: int, labels,save_path, class_name_dict, conf_thresh_dictNone)::param image_folder: 图片的相对路径:param image_filename: 000001.jpg:param width: 图片宽:param height: 图片高:param labels: 目标框:[[class_index, xmin, ymin, xmax, ymax], ...]:param save_path: 保存xml的根目录:param class_name_dict: cls_index:cls_name,根据index获取正确的类别name:param conf_thresh_dict: cls_index:conf_thresh,根据不同类别设置的阈值获取对应的目标如果设置为None则表示保存的txt没有置信度:return:# 创建 XML 文件的根元素root ET.Element(annotation)# 添加图片信息folder ET.SubElement(root, folder)folder.text str(image_folder)# 图片名字filename ET.SubElement(root, filename)filename.text os.path.join(image_filename)# 图片大小size ET.SubElement(root, size)width_element ET.SubElement(size, width)width_element.text str(width)height_element ET.SubElement(size, height)height_element.text str(height)depth ET.SubElement(size, depth) # 通道数depth.text 3# 添加目标框信息for label in labels:# 如果该参数设置为None表示保存的txt没有Noneif conf_thresh_dict is None:# 保证这几项是整数class_index, x1, y1, x2, y2 label.astype(dtypenp.int32)else:class_index, x1, y1, x2, y2, conf label# 保证这几项是整数class_index, x1, y1, x2, y2 np.array([class_index, x1, y1, x2, y2], dtypenp.int32)# 根据置信度过滤是否保存项if conf conf_thresh_dict[class_index]:continueobj ET.SubElement(root, object)name ET.SubElement(obj, name)name.text class_name_dict[int(class_index)]pose ET.SubElement(obj, pose)pose.text Unspecifiedtruncated ET.SubElement(obj, truncated)truncated.text 0difficult ET.SubElement(obj, difficult)difficult.text 0bndbox ET.SubElement(obj, bndbox)xmin ET.SubElement(bndbox, xmin)xmin.text str(x1)ymin ET.SubElement(bndbox, ymin)ymin.text str(y1)xmax ET.SubElement(bndbox, xmax)xmax.text str(x2)ymax ET.SubElement(bndbox, ymax)ymax.text str(y2)# 创建 XML 文件并保存xml_str ET.tostring(root, encodingutf-8)xml_str minidom.parseString(xml_str)# 设置缩进为4个空格xml可读性提高pretty_xml xml_str.toprettyxml(indent * 4)save_path os.path.join(save_path, f{os.path.splitext(image_filename)[0]}.xml)os.makedirs((os.path.dirname(save_path)), exist_okTrue)with open(save_path, w) as xmlFile:xmlFile.write(pretty_xml)def run_thread(root, image_file, save_root, image_root, txt_root, class_name_dict, conf_thresh_dictNone):param root: ..\images\trainparam image_file: 0000000.jpgparam save_root: ..\annotations\trainparam image_root: ..\images\trainparam txt_root: ...\txt\trainparam class_name_dict:param conf_thresh_dict: 使用yolov5模型跑detect.py没有保存置信度conf该参数可以不输入return:# 获取图片的名称和后缀image_name, suffix os.path.splitext(image_file)# 图片路径image_path os.path.join(root, image_file)# 设置捕捉异常防止因为异常导致的代码停止运行try:# 读图image cv2.imread(image_path)if image is None:print(f\n\033[31mError {image_path}\033[0m)return# 图片的宽高imh, imw image.shape[:2]# txt路径txt_file image_path.replace(image_root, txt_root).replace(suffix, .txt)if not os.path.exists(txt_file):return# class_index xn yn wn hn conflabels np.loadtxt(txt_file, dtypenp.float32)# 空txt跳过if len(labels) 0:return# 确包所有矩阵维度都是2维,方便后续处理if labels.ndim 1:labels np.array([labels])# xywhn - xywhlabels[:, [1, 3]] labels[:, [1, 3]] * imwlabels[:, [2, 4]] labels[:, [2, 4]] * imhcenter labels[:, 1:5].copy()# xywh - xyxycorner np.zeros_like(center)corner[:, 0] center[:, 0] - center[:, 2] / 2 # xmin x - w / 2corner[:, 1] center[:, 1] - center[:, 3] / 2 # ymin y - h / 2corner[:, 2] center[:, 0] center[:, 2] / 2 # xmax x w / 2corner[:, 3] center[:, 1] center[:, 3] / 2 # ymax y h / 2# np.float32labels[:, 1:5] corner[:, :]# 创建xmlcreate_voc_xml(root.replace(image_root os.sep, ), # Z:\FireData\images\train - trainimage_filenameimage_file,widthimw,heightimh,labelslabels,save_pathroot.replace(image_root, save_root),class_name_dictclass_name_dict,conf_thresh_dictconf_thresh_dict)# 处理完成后打印信息要不不知道执行到哪里了print(f\r{image_path}, end)except Exception as e:print(f{image_path} \n{e}\n\n)def run_process(root_file, save_root, image_root, txt_root, class_name_dict, conf_thresh_dictNone):param root_file: [(..\images\train, 0000000.jpg), ...]param image_file:param save_root: ..\annotations\trainparam image_root: ..\images\trainparam txt_root: ...\txt\trainparam class_name_dict:param conf_thresh_dict: 使用yolov5模型跑detect.py没有保存置信度conf该参数可以不输入return:for root, image_file in root_file:# 获取图片的名称和后缀image_name, suffix os.path.splitext(image_file)# 图片路径image_path os.path.join(root, image_file)# 设置捕捉异常防止因为异常导致的代码停止运行try:# 读图image cv2.imread(image_path)if image is None:print(f\n\033[31mError {image_path}\033[0m)continue# 图片的宽高imh, imw image.shape[:2]# txt路径txt_file image_path.replace(image_root, txt_root).replace(suffix, .txt)if not os.path.exists(txt_file):continue# class_index xn yn wn hn conflabels np.loadtxt(txt_file, dtypenp.float32)# 空txt跳过if len(labels) 0:continue# 确包所有矩阵维度都是2维,方便后续处理if labels.ndim 1:labels np.array([labels])# xywhn - xywhlabels[:, [1, 3]] labels[:, [1, 3]] * imwlabels[:, [2, 4]] labels[:, [2, 4]] * imhcenter labels[:, 1:5].copy()# xywh - xyxycorner np.zeros_like(center)corner[:, 0] center[:, 0] - center[:, 2] / 2 # xmin x - w / 2corner[:, 1] center[:, 1] - center[:, 3] / 2 # ymin y - h / 2corner[:, 2] center[:, 0] center[:, 2] / 2 # xmax x w / 2corner[:, 3] center[:, 1] center[:, 3] / 2 # ymax y h / 2# np.float32labels[:, 1:5] corner[:, :]# 创建xmlcreate_voc_xml(root.replace(image_root os.sep, ), # Z:\FireData\images\train - trainimage_filenameimage_file,widthimw,heightimh,labelslabels,save_pathroot.replace(image_root, save_root),class_name_dictclass_name_dict,conf_thresh_dictconf_thresh_dict)# 处理完成后打印信息要不不知道执行到哪里了print(f\r{image_path}, end)except Exception as e:print(f{image_path} \n{e}\n\n)if __name__ __main__:# 类别字典class_name_dict {0: fire,1: smoke}# 置信度阈值不同类别设置不同的阈值CONF_THRESH_DICT None # 如果该参数设置为None表示txt没有保存conf这一项# CONF_THRESH_DICT {# 0: 0.2,# 1: 0.2# }if CONF_THRESH_DICT ! None:assert class_name_dict.keys() CONF_THRESH_DICT.keys(), class_name_dict.keys() ! CONF_THRESH_DICT.keys().# 数据集根目录data_root rE:\CodeFiles\pycharm\YOLO\yolov5\my_test\datadata_root os.path.abspath(data_root)# 指定的子目录sub_dir r# sub_dir r\train\fire_smoke# 要保证这三个的目录结构是一致的# 保存xml的根路径 save_root\annotations\...if sub_dir :save_root os.path.join(data_root, annotations)# txt路径txt_root os.path.join(data_root, labels) # txt和images不在一个目录下目录结构应该和images一样# 图片路径image_root os.path.join(data_root, images)else:save_root os.path.join(data_root, annotations, sub_dir)# txt路径txt_root os.path.join(data_root, labels, sub_dir) # txt和images不在一个目录下目录结构应该和images一样# 图片路径image_root os.path.join(data_root, images, sub_dir)# 需要跳过的目录exclude_dirs [rbackground,]# NOTE:多线程/多进程 程序不好调试将线程池/进程池 中的数量改为1可以调试程序max_workers 6 # 线程/进程 数# 使用的类型run_type thread # 多线程# run_type process # 多进程print(frunning use run_type{run_type}, max_workers:{max_workers})if run_type thread:# 使用线程池控制程序执行with futures.ThreadPoolExecutor(max_workersmax_workers) as executor:for root, _, files in os.walk(image_root):# 需要排除的目录if any(exclude_dir in root for exclude_dir in exclude_dirs):continuefor file in files:# 向线程池中提交任务向线程池中提交任务的时候是一个一个提交的executor.submit(run_thread,*(root, file, save_root, image_root, txt_root, class_name_dict, CONF_THRESH_DICT))print(\nFinish ...)elif run_type process:# 一个进程处理多少图片max_file_num 1000# 保存root和file的listroot_file_list: List[Tuple] list()# 创建进程池根据自己的设备自行调整别太多否则会变慢pool multiprocessing.Pool(processesmax_workers)# for image_file in os.listdir(os.path.join(data_root, images, sub_dir)):for root, _, files in os.walk(image_root):# 需要排除的目录if any(exclude_dir in root for exclude_dir in exclude_dirs):continuefor file in files:root_file_list.append((root, file))if len(root_file_list) max_file_num:# 启动一个进程开始处理当前list中的信息使用deepcopy是为了防止下面清除list后导致进程崩溃pool.apply_async(run_process,(deepcopy(root_file_list), save_root, image_root, txt_root,class_name_dict, CONF_THRESH_DICT))# 清除列表中的存储root_file_list.clear()else:# for循环正常结束的话如果剩下的文件数量不足max_file_num上面不会启动新的进程# 所以为了防止丢掉信息在for循环正常结束之后丢掉信息就将root_file_list中的信息处理掉# 启动一个进程开始处理当前list中的信息pool.apply_async(run_process, (deepcopy(root_file_list), save_root, image_root, txt_root,class_name_dict, CONF_THRESH_DICT))# 清除列表中的存储root_file_list.clear()# 关闭进程池pool.close()# 等待所有子进程执行结束pool.join()print(\nFinish ...)else:print(run_type should be thread or process.)