自动调整图片方向并划窗剪裁

2021-02-16 07:23 358人阅读评论(0)

❝
最近在“2021广东工业智造创新大赛智能算法赛：瓷砖表面瑕疵质检” 中遇到一些图片，有不同角度偏差。类似卫星图，分辨率特别大，目标却特别小，这就需要对原始图片自动调整角度，划窗剪裁和相应的坐标映射。
❞

读取图片

对于大图片来说，直接使用cv2.imread会比PIL再转numpy array慢 30% 左右，这里推荐使用Image.open读取。


   
    
     
      
     
     
      
       import numpy as np
      
     
    
     
      
     
     
      
       import cv2
      
     
    
     
      
     
     
      
       from PIL 
       import Image
      
     
    
     
      
     
     
      
       # org_img = cv2.imread(BASE_DIR + img_file)
      
     
    
     
      
     
     
      
       org_img = Image.open(BASE_DIR + img_file)
      
     
    
     
      
     
     
      
       org_img = cv2.cvtColor(np.asarray(org_img), cv2.COLOR_RGB2BGR)

检测外框

1. 转灰度图


   
    
     
      
     
     
      
       # 灰度图
      
     
    
     
      
     
     
      
       greyPic = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)

2. 对图像进行二值化操作

这里阈值采用平均像数值，可满足大多数场景，特殊场合下可以自己调整。


   
    
     
      
     
     
      
       # threshold(src, thresh, maxval, 
       type, dst=None)
      
     
    
     
      
     
     
      
       # src是输入数组，thresh是阈值的具体值，maxval是
       type取THRESH_BINARY或者THRESH_BINARY_INV时的最大值
      
     
    
     
      
     
     
      
       # 
       type有
       5种类型,这里取
       0：THRESH_BINARY ，当前点值大于阈值时，取maxval，也就是前一个参数，否则设为
       0
      
     
    
     
      
     
     
      
       # 该函数第一个返回值是阈值的值，第二个是阈值化后的图像
      
     
    
     
      
     
     
      
       ret, binPic = cv2.threshold(greyPic, greyPic.mean(), 
       255, cv2.THRESH_BINARY)

3. 中值滤波

median = cv2.medianBlur(binPic, 5)

4. 找出轮廓


   
    
     
      
     
     
      
       # findContours()有三个参数：输入图像，层次类型和轮廓逼近方法
      
     
    
     
      
     
     
      
       # 该函数会修改原图像，建议使用img.
       copy()作为输入
      
     
    
     
      
     
     
      
       # 由函数返回的层次树很重要，cv2.RETR_TREE会得到图像中轮廓的整体层次结构，以此来建立轮廓之间的‘关系
       '。
      
     
    
     
      
     
     
      
       # 如果只想得到最外面的轮廓，可以使用cv2.RETE_EXTERNAL。这样可以消除轮廓中其他的轮廓，也就是最大的集合
      
     
    
     
      
     
     
      
       # 该函数有三个返回值：修改后的图像，图像的轮廓，它们的层次
      
     
    
     
      
     
     
      
       contours, hierarchy = cv2.findContours(median, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

5. 获取最小外接矩形


   
    
     
      
     
     
      
       maxArea = 
       0
      
     
    
     
      
     
     
      
       # 挨个检查看那个轮廓面积最大
      
     
    
     
      
     
     
      
       for i in 
       range(
       len(contours)):
      
     
    
     
      
     
     
          
       if cv2.contourArea(contours[i]) > cv2.contourArea(contours[maxArea]):
      
     
    
     
      
     
     
      
               maxArea = i
      
     
    
     
      
     
     
      
       hull = cv2.convexHull(contours[maxArea])
      
     
    
     
      
     
     
      
       hull = np.squeeze(hull)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
       # 得到最小外接矩形的（中心(x,y), (宽,高), 旋转角度）
      
     
    
     
      
     
     
      
       rect = cv2.minAreaRect(hull)
      
     
    
     
      
     
     
      
       # 通过box会出矩形框
      
     
    
     
      
     
     
      
       box = np.int0(cv2.boxPoints(rect))

调整图片角度

获取角度偏差，计算仿射矩阵，将外接矩形box的坐标进行变换。


   
    
     
      
     
     
      
       center = rect[
       0]
      
     
    
     
      
     
     
      
       angle = rect[
       2]
      
     
    
     
      
     
     
      
       if angle > 
       45:
      
     
    
     
      
     
     
      
           angle = angle - 
       90
      
     
    
     
      
     
     
          
      
     
    
     
      
     
     
      
       # 旋转矩阵
      
     
    
     
      
     
     
      
       M = cv2.getRotationMatrix2D(center, angle, 
       1)
      
     
    
     
      
     
     
      
       h, w, c = org_img.shape
      
     
    
     
      
     
     
      
       # 旋转图片
      
     
    
     
      
     
     
      
       dst = cv2.warpAffine(org_img, M, (w, h))
      
     
    
     
      
     
     
      
       # 坐标变换
      
     
    
     
      
     
     
      
       poly_r = np.asarray([(M[
       0][
       0] * x + M[
       0][
       1] * y + M[
       0][
       2],
      
     
    
     
      
     
     
      
                             M[
       1][
       0] * x + M[
       1][
       1] * y + M[
       1][
       2]) 
       for (x, y) in box])

裁剪图片


   
    
     
      
     
     
      
       x_s, y_s = np.int0(poly_r.min(axis=
       0))
      
     
    
     
      
     
     
      
       x_e, y_e = np.int0(poly_r.max(axis=
       0))
      
     
    
     
      
     
     
      
       # 设置预留边框
      
     
    
     
      
     
     
      
       border = 
       100
      
     
    
     
      
     
     
      
       x_s = 
       int(max((x_s - border), 
       0))
      
     
    
     
      
     
     
      
       y_s = 
       int(max((y_s - border), 
       0))
      
     
    
     
      
     
     
      
       x_e = 
       int(min((x_e + border), w))
      
     
    
     
      
     
     
      
       y_e = 
       int(min((y_e + border), h))
      
     
    
     
      
     
     
      
       # 剪裁
      
     
    
     
      
     
     
      
       cut_img = dst[y_s:y_e, x_s:x_e, :]

划窗分割

图片已经扶正后，就可以根据需要进行划窗分割。指定划窗大小，重叠比例和输出目录后，就能获取一堆小图片了。


   
    
     
      
     
     
      
       def slice(img, img_file, window_l=
       1024, overlap=
       0.2, out_dir=
       ""):
      
     
    
     
      
     
     
      
           # 切割图片 生成文件 xxx_000_000.jpg
      
     
    
     
      
     
     
      
           h, w, c = img.shape
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           step_l = 
       int(window_l - window_l * overlap)  # 步长
      
     
    
     
      
     
     
      
           x_num = 
       int(np.ceil(max((w - window_l) / step_l, 
       0))) + 
       1
      
     
    
     
      
     
     
      
           y_num = 
       int(np.ceil(max((h - window_l) / step_l, 
       0))) + 
       1
      
     
    
     
      
     
     
          
       for i in 
       range(x_num):
      
     
    
     
      
     
     
              
       for j in 
       range(y_num):
      
     
    
     
      
     
     
      
                   x_s, x_e = i * step_l, i * step_l + window_l
      
     
    
     
      
     
     
      
                   y_s, y_e = j * step_l, j * step_l + window_l
      
     
    
     
      
     
     
      
                   # 修正越界
      
     
    
     
      
     
     
                  
       if x_e > w:
      
     
    
     
      
     
     
      
                       x_s, x_e = w - window_l, w
      
     
    
     
      
     
     
                  
       if y_e > h:
      
     
    
     
      
     
     
      
                       y_s, y_e = h - window_l, h
      
     
    
     
      
     
     
      
                   assert w >= window_l
      
     
    
     
      
     
     
      
                   assert h >= window_l
      
     
    
     
      
     
     
      
                   new_img_file = img_file[:
       -4] + 
       '_%03d_%03d.jpg' % (i, j)
      
     
    
     
      
     
     
      
                   im = img[y_s:y_e, x_s:x_e, :]
      
     
    
     
      
     
     
      
                   cv2.imwrite(out_dir + new_img_file, im)
      
     
    
     
      
     
     
          
       return

批量化处理

封装一下函数，对整个目录扫描，并保存和原图的对应关系配置文件，为之后还原坐标做准备。


   
    
     
      
     
     
      
       def adjust_angle(org_img, img_file, border=
       100):
      
     
    
     
      
     
     
      
           h, w, c = org_img.shape
      
     
    
     
      
     
     
      
           # 统一尺度，如果尺寸小于 
       4000，放大一倍
      
     
    
     
      
     
     
      
           scale = 
       1
      
     
    
     
      
     
     
          
       if w < 
       4000 or h < 
       4000:
      
     
    
     
      
     
     
      
               scale = 
       2
      
     
    
     
      
     
     
      
               w = 
       int(w * scale)
      
     
    
     
      
     
     
      
               h = 
       int(h * scale)
      
     
    
     
      
     
     
      
               org_img = cv2.resize(org_img, (w, h), interpolation=cv2.INTER_LINEAR)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           x_s, y_s, x_e, y_e, rect, new_img = getCornerPoint(org_img)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           # 去除边框
      
     
    
     
      
     
     
      
           x_s = 
       int(max((x_s - border), 
       0))
      
     
    
     
      
     
     
      
           y_s = 
       int(max((y_s - border), 
       0))
      
     
    
     
      
     
     
      
           x_e = 
       int(min((x_e + border), w))
      
     
    
     
      
     
     
      
           y_e = 
       int(min((y_e + border), h))
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           img = new_img[y_s:y_e, x_s:x_e, :]
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           data = dict()
      
     
    
     
      
     
     
      
           data[
       'name'] = img_file
      
     
    
     
      
     
     
      
           data[
       'xyxy'] = [x_s, y_s, x_e, y_e]
      
     
    
     
      
     
     
      
           data[
       'rect'] = rect
      
     
    
     
      
     
     
      
           data[
       'border'] = border
      
     
    
     
      
     
     
      
           data[
       'scale'] = scale
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
          
       return data, img

设置BASE_DIR为原图目录，OUT_ADJUST为角度调整后目录，adjust.json为配置文件。


   
    
     
      
     
     
      
       result_json = []
      
     
    
     
      
     
     
      
       img_list = os.listdir(BASE_DIR)
      
     
    
     
      
     
     
      
       for img_file in tqdm(img_list):
      
     
    
     
      
     
     
      
           org_img = Image.open(BASE_DIR + img_file)
      
     
    
     
      
     
     
      
           org_img = cv2.cvtColor(np.asarray(org_img), cv2.COLOR_RGB2BGR)
      
     
    
     
      
     
     
      
           data, img = adjust_angle(org_img, img_file, border=
       100)
      
     
    
     
      
     
     
      
           result_json.
       append(data)
      
     
    
     
      
     
     
      
           cv2.imwrite(OUT_ADJUST + img_file, img)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           slice(img, img_file, TARGET, overlap=OVERLAP, out_dir=OUT_SLICE)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
       with open(OUT_DIR + 
       'adjust.json', 
       'w') as fp:
      
     
    
     
      
     
     
      
           json.dump(result_json, fp, indent=
       4, ensure_ascii=False)

坐标还原

1. 读取切片图片列表


   
    
     
      
     
     
      
       with open(
       "instances_test2017_1024.json", 
       'r') as f:
      
     
    
     
      
     
     
      
           test_imgs = json.load(f)[
       'images']
      
     
    
     
      
     
     
      
       test_imgs_dict = {}
      
     
    
     
      
     
     
      
       for i, obj in enumerate(test_imgs):
      
     
    
     
      
     
     
      
           img_name = obj[
       'file_name']
      
     
    
     
      
     
     
      
           test_imgs_dict[img_name] = i

2. 读取原始文件信息


   
    
     
      
     
     
      
       with open(OUT_DIR + 
       'adjust.json', 
       'r') as fp:
      
     
    
     
      
     
     
      
           img_info = json.load(fp)
      
     
    
     
      
     
     
      
       img_info_dict = {}
      
     
    
     
      
     
     
      
       for i, obj in enumerate(img_info):
      
     
    
     
      
     
     
      
           img_name = obj[
       'name']
      
     
    
     
      
     
     
      
           img_info_dict[img_name] = i

3. 读取推理结果文件

将一堆子图的推理结果放到一起，可以充分利用mmdetection的多线程DataLoader和大显存的batch size来加速推理过程。


   
    
     
      
     
     
      
       with open(
       "result_1024-20.pkl", 
       'rb') as f:
      
     
    
     
      
     
     
      
           pred_set = pickle.load(f)

4. 合并坐标到角度调整图

获取到图片长宽，根据相同的划窗参数，可以还原出每个子图的基准坐标x_s和y_s。

其中test_imgs_dict中保存了子图的文件名字典，pred_set保存了预测结果列表。通过形如XXX_000_000.jpg的文件名，经过两级映射后就能获取到对应的推理结果集了。


   
    
     
      
     
     
      
       def merge_result(info, pred_set, test_imgs_dict, img_file, window_l=
       1024, overlap=
       0.2):
      
     
    
     
      
     
     
      
           assert info[
       'name'] == img_file
      
     
    
     
      
     
     
      
           # 这里只需要取图片长宽信息，避免读图操作太慢，直接读取配置文件
      
     
    
     
      
     
     
      
           x1, y1, x2, y2 = info[
       'xyxy']
      
     
    
     
      
     
     
      
           w = x2 - x1
      
     
    
     
      
     
     
      
           h = y2 - y1
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           step_l = 
       int(window_l - window_l * overlap)  # 步长
      
     
    
     
      
     
     
      
           x_num = 
       int(np.ceil(max((w - window_l) / step_l, 
       0))) + 
       1
      
     
    
     
      
     
     
      
           y_num = 
       int(np.ceil(max((h - window_l) / step_l, 
       0))) + 
       1
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           result = [np.array([[], ] * 
       5).T.astype(np.
       float32), ] * 
       6  # 分类数为
       6, bbox.shape 为(
       0, 
       5)
      
     
    
     
      
     
     
          
       for i in 
       range(x_num):
      
     
    
     
      
     
     
              
       for j in 
       range(y_num):
      
     
    
     
      
     
     
      
                   x_s, x_e = i * step_l, i * step_l + window_l
      
     
    
     
      
     
     
      
                   y_s, y_e = j * step_l, j * step_l + window_l
      
     
    
     
      
     
     
      
                   # 修正越界
      
     
    
     
      
     
     
                  
       if x_e > w:
      
     
    
     
      
     
     
      
                       x_s, x_e = w - window_l, w
      
     
    
     
      
     
     
                  
       if y_e > h:
      
     
    
     
      
     
     
      
                       y_s, y_e = h - window_l, h
      
     
    
     
      
     
     
      
                   assert w >= window_l
      
     
    
     
      
     
     
      
                   assert h >= window_l
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
                   new_img_file = img_file[:
       -4] + 
       '_%03d_%03d.jpg' % (i, j)
      
     
    
     
      
     
     
      
                   pred = pred_set[test_imgs_dict[new_img_file]]  # 获取预测结果
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
                  
       for label_id, bboxes in enumerate(pred):
      
     
    
     
      
     
     
      
                       # 坐标修正 x_s, y_s 划窗基坐标
      
     
    
     
      
     
     
      
                       bboxes[:, 
       0] = bboxes[:, 
       0] + x_s
      
     
    
     
      
     
     
      
                       bboxes[:, 
       1] = bboxes[:, 
       1] + y_s
      
     
    
     
      
     
     
      
                       bboxes[:, 
       2] = bboxes[:, 
       2] + x_s
      
     
    
     
      
     
     
      
                       bboxes[:, 
       3] = bboxes[:, 
       3] + y_s
      
     
    
     
      
     
     
      
                       # 合并到大图
      
     
    
     
      
     
     
      
                       result[label_id] = np.vstack((result[label_id], bboxes))
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
          
       return result

5. 坐标映射到原图

首先获取到原图信息info，获取外接矩形参数、旋转角度、缩放比例、边框大小等等，构建一个逆仿射矩阵M，对所有检测框进行坐标变换。


   
    
     
      
     
     
      
       def generate_json(pred, info, img_file, score_threshold=
       0.05, out_dir=
       "", vis=False):
      
     
    
     
      
     
     
      
           base_x, base_y, x2, y2 = info[
       'xyxy']
      
     
    
     
      
     
     
      
           rect = info[
       'rect']
      
     
    
     
      
     
     
      
           scale = info[
       'scale']
      
     
    
     
      
     
     
      
           border = info[
       'border']
      
     
    
     
      
     
     
      
           x1, y1, x2, y2 = (border, border, x2 - border, y2 - border)
      
     
    
     
      
     
     
      
           poly = np.asarray([(x1, y1), (x2, y1), (x2, y2), (x1, y2)]) 
      
     
    
     
      
     
     
      
           center = tuple(rect[
       0])
      
     
    
     
      
     
     
      
           angle = rect[
       2]
      
     
    
     
      
     
     
          
       if angle > 
       45:
      
     
    
     
      
     
     
      
               angle = angle - 
       90
      
     
    
     
      
     
     
          
      
     
    
     
      
     
     
      
           # 逆旋转还原
      
     
    
     
      
     
     
      
           M = cv2.getRotationMatrix2D(center, -angle, 
       1)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
           # 遍历完所有分片, nms
      
     
    
     
      
     
     
      
           json_results = []
      
     
    
     
      
     
     
          
       for label_id, bboxes in enumerate(pred):  # 
       6个分类
      
     
    
     
      
     
     
      
               bboxes = nms(np.array(bboxes[:, :
       4]), np.array(bboxes[:, 
       4]), iou_threshold=
       0.5)[
       0]
      
     
    
     
      
     
     
      
               # 坐标转换到原始图片
      
     
    
     
      
     
     
      
               bboxes[:, 
       0] = bboxes[:, 
       0] + base_x
      
     
    
     
      
     
     
      
               bboxes[:, 
       1] = bboxes[:, 
       1] + base_y
      
     
    
     
      
     
     
      
               bboxes[:, 
       2] = bboxes[:, 
       2] + base_x
      
     
    
     
      
     
     
      
               bboxes[:, 
       3] = bboxes[:, 
       3] + base_y
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
              
       for ann in bboxes:
      
     
    
     
      
     
     
      
                   x1, y1, x2, y2, score = ann
      
     
    
     
      
     
     
                  
       if score < score_threshold:
      
     
    
     
      
     
     
                      
       continue
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
                   poly_r = np.asarray([(M[
       0][
       0] * x + M[
       0][
       1] * y + M[
       0][
       2],
      
     
    
     
      
     
     
      
                                         M[
       1][
       0] * x + M[
       1][
       1] * y + M[
       1][
       2]) 
       for (x, y) in
      
     
    
     
      
     
     
      
                                        [(x1, y1), (x1, y2), (x2, y1), (x2, y2)]])
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
                   # 还原小图片缩放
      
     
    
     
      
     
     
      
                   ann = poly2ann(poly_r, score, scale=scale)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
                   data = dict()
      
     
    
     
      
     
     
      
                   data[
       'name'] = img_file
      
     
    
     
      
     
     
      
                   data[
       'category'] = label_id + 
       1
      
     
    
     
      
     
     
      
                   data[
       'bbox'] = [float(ann[
       0]), float(ann[
       1]), float(ann[
       2]), float(ann[
       3])]
      
     
    
     
      
     
     
      
                   data[
       'score'] = float(score)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
      
                   json_results.
       append(data)
      
     
    
     
      
     
     
       
      
     
    
     
      
     
     
          
       return json_results

最后经过nms和一系列后处理之后，映射到原图上即可。

完美收工！

转载：https://blog.csdn.net/weixin_47479625/article/details/113449495

查看评论

小言_互联网的博客