❝最近在“2021广东工业智造创新大赛 智能算法赛:瓷砖表面瑕疵质检” 中遇到一些图片,有不同角度偏差。类似卫星图,分辨率特别大,目标却特别小,这就需要对原始图片自动调整角度,划窗剪裁和相应的坐标映射。
❞
读取图片
对于大图片来说,直接使用cv2.imread
会比PIL
再转numpy array
慢 30% 左右,这里推荐使用Image.open
读取。
-
import numpy as np
-
import cv2
-
from PIL
import Image
-
# org_img = cv2.imread(BASE_DIR + img_file)
-
org_img = Image.open(BASE_DIR + img_file)
-
org_img = cv2.cvtColor(np.asarray(org_img), cv2.COLOR_RGB2BGR)
检测外框
1. 转灰度图
-
# 灰度图
-
greyPic = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)
2. 对图像进行二值化操作
这里阈值采用平均像数值,可满足大多数场景,特殊场合下可以自己调整。
-
# threshold(src, thresh, maxval,
type, dst=None)
-
# src是输入数组,thresh是阈值的具体值,maxval是
type取THRESH_BINARY或者THRESH_BINARY_INV时的最大值
-
#
type有
5种类型,这里取
0:THRESH_BINARY ,当前点值大于阈值时,取maxval,也就是前一个参数,否则设为
0
-
# 该函数第一个返回值是阈值的值,第二个是阈值化后的图像
-
ret, binPic = cv2.threshold(greyPic, greyPic.mean(),
255, cv2.THRESH_BINARY)
3. 中值滤波
median = cv2.medianBlur(binPic, 5)
4. 找出轮廓
-
# findContours()有三个参数:输入图像,层次类型和轮廓逼近方法
-
# 该函数会修改原图像,建议使用img.
copy()作为输入
-
# 由函数返回的层次树很重要,cv2.RETR_TREE会得到图像中轮廓的整体层次结构,以此来建立轮廓之间的‘关系
'。
-
# 如果只想得到最外面的轮廓,可以使用cv2.RETE_EXTERNAL。这样可以消除轮廓中其他的轮廓,也就是最大的集合
-
# 该函数有三个返回值:修改后的图像,图像的轮廓,它们的层次
-
contours, hierarchy = cv2.findContours(median, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
-
5. 获取最小外接矩形
-
maxArea =
0
-
# 挨个检查看那个轮廓面积最大
-
for i in
range(
len(contours)):
-
if cv2.contourArea(contours[i]) > cv2.contourArea(contours[maxArea]):
-
maxArea = i
-
hull = cv2.convexHull(contours[maxArea])
-
hull = np.squeeze(hull)
-
-
# 得到最小外接矩形的(中心(x,y), (宽,高), 旋转角度)
-
rect = cv2.minAreaRect(hull)
-
# 通过box会出矩形框
-
box = np.int0(cv2.boxPoints(rect))
调整图片角度
获取角度偏差,计算仿射矩阵,将外接矩形box
的坐标进行变换。
-
center = rect[
0]
-
angle = rect[
2]
-
if angle >
45:
-
angle = angle -
90
-
-
# 旋转矩阵
-
M = cv2.getRotationMatrix2D(center, angle,
1)
-
h, w, c = org_img.shape
-
# 旋转图片
-
dst = cv2.warpAffine(org_img, M, (w, h))
-
# 坐标变换
-
poly_r = np.asarray([(M[
0][
0] * x + M[
0][
1] * y + M[
0][
2],
-
M[
1][
0] * x + M[
1][
1] * y + M[
1][
2])
for (x, y) in box])
裁剪图片
-
x_s, y_s = np.int0(poly_r.min(axis=
0))
-
x_e, y_e = np.int0(poly_r.max(axis=
0))
-
# 设置预留边框
-
border =
100
-
x_s =
int(max((x_s - border),
0))
-
y_s =
int(max((y_s - border),
0))
-
x_e =
int(min((x_e + border), w))
-
y_e =
int(min((y_e + border), h))
-
# 剪裁
-
cut_img = dst[y_s:y_e, x_s:x_e, :]
划窗分割
图片已经扶正后,就可以根据需要进行划窗分割。指定划窗大小,重叠比例和输出目录后,就能获取一堆小图片了。
-
def slice(img, img_file, window_l=
1024, overlap=
0.2, out_dir=
""):
-
# 切割图片 生成文件 xxx_000_000.jpg
-
h, w, c = img.shape
-
-
step_l =
int(window_l - window_l * overlap) # 步长
-
x_num =
int(np.ceil(max((w - window_l) / step_l,
0))) +
1
-
y_num =
int(np.ceil(max((h - window_l) / step_l,
0))) +
1
-
for i in
range(x_num):
-
for j in
range(y_num):
-
x_s, x_e = i * step_l, i * step_l + window_l
-
y_s, y_e = j * step_l, j * step_l + window_l
-
# 修正越界
-
if x_e > w:
-
x_s, x_e = w - window_l, w
-
if y_e > h:
-
y_s, y_e = h - window_l, h
-
assert w >= window_l
-
assert h >= window_l
-
new_img_file = img_file[:
-4] +
'_%03d_%03d.jpg' % (i, j)
-
im = img[y_s:y_e, x_s:x_e, :]
-
cv2.imwrite(out_dir + new_img_file, im)
-
return
批量化处理
封装一下函数,对整个目录扫描,并保存和原图的对应关系配置文件,为之后还原坐标做准备。
-
def adjust_angle(org_img, img_file, border=
100):
-
h, w, c = org_img.shape
-
# 统一尺度,如果尺寸小于
4000,放大一倍
-
scale =
1
-
if w <
4000 or h <
4000:
-
scale =
2
-
w =
int(w * scale)
-
h =
int(h * scale)
-
org_img = cv2.resize(org_img, (w, h), interpolation=cv2.INTER_LINEAR)
-
-
x_s, y_s, x_e, y_e, rect, new_img = getCornerPoint(org_img)
-
-
# 去除边框
-
x_s =
int(max((x_s - border),
0))
-
y_s =
int(max((y_s - border),
0))
-
x_e =
int(min((x_e + border), w))
-
y_e =
int(min((y_e + border), h))
-
-
img = new_img[y_s:y_e, x_s:x_e, :]
-
-
data = dict()
-
data[
'name'] = img_file
-
data[
'xyxy'] = [x_s, y_s, x_e, y_e]
-
data[
'rect'] = rect
-
data[
'border'] = border
-
data[
'scale'] = scale
-
-
return data, img
设置BASE_DIR
为原图目录,OUT_ADJUST
为角度调整后目录,adjust.json
为配置文件。
-
result_json = []
-
img_list = os.listdir(BASE_DIR)
-
for img_file in tqdm(img_list):
-
org_img = Image.open(BASE_DIR + img_file)
-
org_img = cv2.cvtColor(np.asarray(org_img), cv2.COLOR_RGB2BGR)
-
data, img = adjust_angle(org_img, img_file, border=
100)
-
result_json.
append(data)
-
cv2.imwrite(OUT_ADJUST + img_file, img)
-
-
slice(img, img_file, TARGET, overlap=OVERLAP, out_dir=OUT_SLICE)
-
-
with open(OUT_DIR +
'adjust.json',
'w') as fp:
-
json.dump(result_json, fp, indent=
4, ensure_ascii=False)
坐标还原
1. 读取切片图片列表
-
with open(
"instances_test2017_1024.json",
'r') as f:
-
test_imgs = json.load(f)[
'images']
-
test_imgs_dict = {}
-
for i, obj in enumerate(test_imgs):
-
img_name = obj[
'file_name']
-
test_imgs_dict[img_name] = i
2. 读取原始文件信息
-
with open(OUT_DIR +
'adjust.json',
'r') as fp:
-
img_info = json.load(fp)
-
img_info_dict = {}
-
for i, obj in enumerate(img_info):
-
img_name = obj[
'name']
-
img_info_dict[img_name] = i
3. 读取推理结果文件
将一堆子图的推理结果放到一起,可以充分利用mmdetection
的多线程DataLoader
和大显存的batch size
来加速推理过程。
-
with open(
"result_1024-20.pkl",
'rb') as f:
-
pred_set = pickle.load(f)
4. 合并坐标到角度调整图
获取到图片长宽,根据相同的划窗参数,可以还原出每个子图的基准坐标x_s
和y_s
。
其中test_imgs_dict
中保存了子图的文件名字典,pred_set
保存了预测结果列表。通过形如XXX_000_000.jpg
的文件名,经过两级映射后就能获取到对应的推理结果集了。
-
def merge_result(info, pred_set, test_imgs_dict, img_file, window_l=
1024, overlap=
0.2):
-
assert info[
'name'] == img_file
-
# 这里只需要取图片长宽信息,避免读图操作太慢,直接读取配置文件
-
x1, y1, x2, y2 = info[
'xyxy']
-
w = x2 - x1
-
h = y2 - y1
-
-
step_l =
int(window_l - window_l * overlap) # 步长
-
x_num =
int(np.ceil(max((w - window_l) / step_l,
0))) +
1
-
y_num =
int(np.ceil(max((h - window_l) / step_l,
0))) +
1
-
-
result = [np.array([[], ] *
5).T.astype(np.
float32), ] *
6 # 分类数为
6, bbox.shape 为(
0,
5)
-
for i in
range(x_num):
-
for j in
range(y_num):
-
x_s, x_e = i * step_l, i * step_l + window_l
-
y_s, y_e = j * step_l, j * step_l + window_l
-
# 修正越界
-
if x_e > w:
-
x_s, x_e = w - window_l, w
-
if y_e > h:
-
y_s, y_e = h - window_l, h
-
assert w >= window_l
-
assert h >= window_l
-
-
new_img_file = img_file[:
-4] +
'_%03d_%03d.jpg' % (i, j)
-
pred = pred_set[test_imgs_dict[new_img_file]] # 获取预测结果
-
-
for label_id, bboxes in enumerate(pred):
-
# 坐标修正 x_s, y_s 划窗基坐标
-
bboxes[:,
0] = bboxes[:,
0] + x_s
-
bboxes[:,
1] = bboxes[:,
1] + y_s
-
bboxes[:,
2] = bboxes[:,
2] + x_s
-
bboxes[:,
3] = bboxes[:,
3] + y_s
-
# 合并到大图
-
result[label_id] = np.vstack((result[label_id], bboxes))
-
-
return result
5. 坐标映射到原图
首先获取到原图信息info
,获取外接矩形参数、旋转角度、缩放比例、边框大小等等,构建一个逆仿射矩阵M
,对所有检测框进行坐标变换。
-
def generate_json(pred, info, img_file, score_threshold=
0.05, out_dir=
"", vis=False):
-
base_x, base_y, x2, y2 = info[
'xyxy']
-
rect = info[
'rect']
-
scale = info[
'scale']
-
border = info[
'border']
-
x1, y1, x2, y2 = (border, border, x2 - border, y2 - border)
-
poly = np.asarray([(x1, y1), (x2, y1), (x2, y2), (x1, y2)])
-
center = tuple(rect[
0])
-
angle = rect[
2]
-
if angle >
45:
-
angle = angle -
90
-
-
# 逆旋转还原
-
M = cv2.getRotationMatrix2D(center, -angle,
1)
-
-
# 遍历完所有分片, nms
-
json_results = []
-
for label_id, bboxes in enumerate(pred): #
6个分类
-
bboxes = nms(np.array(bboxes[:, :
4]), np.array(bboxes[:,
4]), iou_threshold=
0.5)[
0]
-
# 坐标转换到原始图片
-
bboxes[:,
0] = bboxes[:,
0] + base_x
-
bboxes[:,
1] = bboxes[:,
1] + base_y
-
bboxes[:,
2] = bboxes[:,
2] + base_x
-
bboxes[:,
3] = bboxes[:,
3] + base_y
-
-
for ann in bboxes:
-
x1, y1, x2, y2, score = ann
-
if score < score_threshold:
-
continue
-
-
poly_r = np.asarray([(M[
0][
0] * x + M[
0][
1] * y + M[
0][
2],
-
M[
1][
0] * x + M[
1][
1] * y + M[
1][
2])
for (x, y) in
-
[(x1, y1), (x1, y2), (x2, y1), (x2, y2)]])
-
-
# 还原小图片缩放
-
ann = poly2ann(poly_r, score, scale=scale)
-
-
data = dict()
-
data[
'name'] = img_file
-
data[
'category'] = label_id +
1
-
data[
'bbox'] = [float(ann[
0]), float(ann[
1]), float(ann[
2]), float(ann[
3])]
-
data[
'score'] = float(score)
-
-
json_results.
append(data)
-
-
return json_results
最后经过nms
和一系列后处理之后,映射到原图上即可。
完美收工!
转载:https://blog.csdn.net/weixin_47479625/article/details/113449495