###for icdar2015#### import torch import torch.utils.data as data import scipy.io as scio from gaussian import GaussianTransformer from watershed import watershed import re import itertools from file_utils import * from mep import mep import random from PIL import Image import torchvision.transforms as transforms import craft_utils import Polygon as plg import time import numpy def ratio_area(h, w, box): area = h * w ratio = 0 for i in range(len(box)): poly = plg.Polygon(box[i]) box_area = poly.area() tem = box_area / area if tem > ratio: ratio = tem return ratio, area def rescale_img(img, box, h, w): image = np.zeros((768,768,3),dtype = np.uint8) length = max(h, w) scale = 768 / length ###768 is the train image size img = cv2.resize(img, dsize=None, fx=scale, fy=scale) image[:img.shape[0], :img.shape[1]] = img box *= scale return image def random_scale(img, bboxes, min_size): h, w = img.shape[0:2] # ratio, _ = ratio_area(h, w, bboxes) # if ratio > 0.5: # image = rescale_img(img.copy(), bboxes, h, w) # return image scale = 1.0 if max(h, w) > 1280: scale = 1280.0 / max(h, w) random_scale = np.array([0.5, 1.0, 1.5, 2.0]) scale1 = np.random.choice(random_scale) if min(h, w) * scale * scale1 <= min_size: scale = (min_size + 10) * 1.0 / min(h, w) else: scale = scale * scale1 bboxes *= scale img = cv2.resize(img, dsize=None, fx=scale, fy=scale) return img def padding_image(image,imgsize): length = max(image.shape[0:2]) if len(image.shape) == 3: img = np.zeros((imgsize, imgsize, len(image.shape)), dtype = np.uint8) else: img = np.zeros((imgsize, imgsize), dtype = np.uint8) scale = imgsize / length image = cv2.resize(image, dsize=None, fx=scale, fy=scale) if len(image.shape) == 3: img[:image.shape[0], :image.shape[1], :] = image else: img[:image.shape[0], :image.shape[1]] = image return img def random_crop(imgs, img_size, character_bboxes): h, w = imgs[0].shape[0:2] th, tw = img_size crop_h, crop_w = img_size if w == tw and h == th: return imgs word_bboxes = [] if len(character_bboxes) > 0: for bboxes in character_bboxes: word_bboxes.append( [[bboxes[:, :, 0].min(), bboxes[:, :, 1].min()], [bboxes[:, :, 0].max(), bboxes[:, :, 1].max()]]) word_bboxes = np.array(word_bboxes, np.int32) #### IC15 for 0.6, MLT for 0.35 ##### if random.random() > 0.6 and len(word_bboxes) > 0: sample_bboxes = word_bboxes[random.randint(0, len(word_bboxes) - 1)] left = max(sample_bboxes[1, 0] - img_size[0], 0) top = max(sample_bboxes[1, 1] - img_size[0], 0) if min(sample_bboxes[0, 1], h - th) < top or min(sample_bboxes[0, 0], w - tw) < left: i = random.randint(0, h - th) j = random.randint(0, w - tw) else: i = random.randint(top, min(sample_bboxes[0, 1], h - th)) j = random.randint(left, min(sample_bboxes[0, 0], w - tw)) crop_h = sample_bboxes[1, 1] if th < sample_bboxes[1, 1] - i else th crop_w = sample_bboxes[1, 0] if tw < sample_bboxes[1, 0] - j else tw else: ### train for IC15 dataset#### # i = random.randint(0, h - th) # j = random.randint(0, w - tw) #### train for MLT dataset ### i, j = 0, 0 crop_h, crop_w = h + 1, w + 1 # make the crop_h, crop_w > tw, th for idx in range(len(imgs)): # crop_h = sample_bboxes[1, 1] if th < sample_bboxes[1, 1] else th # crop_w = sample_bboxes[1, 0] if tw < sample_bboxes[1, 0] else tw if len(imgs[idx].shape) == 3: imgs[idx] = imgs[idx][i:i + crop_h, j:j + crop_w, :] else: imgs[idx] = imgs[idx][i:i + crop_h, j:j + crop_w] if crop_w > tw or crop_h > th: imgs[idx] = padding_image(imgs[idx], tw) return imgs def random_horizontal_flip(imgs): if random.random() < 0.5: for i in range(len(imgs)): imgs[i] = np.flip(imgs[i], axis=1).copy() return imgs def random_rotate(imgs): max_angle = 10 angle = random.random() * 2 * max_angle - max_angle for i in range(len(imgs)): img = imgs[i] w, h = img.shape[:2] rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1) img_rotation = cv2.warpAffine(img, rotation_matrix, (h, w)) imgs[i] = img_rotation return imgs class craft_base_dataset(data.Dataset): def __init__(self, target_size=768, viz=False, debug=False): self.target_size = target_size self.viz = viz self.debug = debug self.gaussianTransformer = GaussianTransformer(imgSize=1024, region_threshold=0.35, affinity_threshold=0.15) def load_image_gt_and_confidencemask(self, index): ''' 根据索引值返回图像、字符框、文字行内容、confidence mask :param index: :return: ''' return None, None, None, None, None def crop_image_by_bbox(self, image, box): w = (int)(np.linalg.norm(box[0] - box[1])) h = (int)(np.linalg.norm(box[0] - box[3])) width = w height = h if h > w * 1.5: width = h height = w M = cv2.getPerspectiveTransform(np.float32(box), np.float32(np.array([[width, 0], [width, height], [0, height], [0, 0]]))) else: M = cv2.getPerspectiveTransform(np.float32(box), np.float32(np.array([[0, 0], [width, 0], [width, height], [0, height]]))) warped = cv2.warpPerspective(image, M, (width, height)) return warped, M def get_confidence(self, real_len, pursedo_len): if pursedo_len == 0: return 0. return (real_len - min(real_len, abs(real_len - pursedo_len))) / real_len def inference_pursedo_bboxes(self, net, image, word_bbox, word, viz=False): word_image, MM = self.crop_image_by_bbox(image, word_bbox) real_word_without_space = word.replace('\s', '') real_char_nums = len(real_word_without_space) input = word_image.copy() scale = 64.0 / input.shape[0] input = cv2.resize(input, None, fx=scale, fy=scale) img_torch = torch.from_numpy(imgproc.normalizeMeanVariance(input, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225))) img_torch = img_torch.permute(2, 0, 1).unsqueeze(0) img_torch = img_torch.type(torch.FloatTensor).cuda() scores, _ = net(img_torch) region_scores = scores[0, :, :, 0].cpu().data.numpy() region_scores = np.uint8(np.clip(region_scores, 0, 1) * 255) bgr_region_scores = cv2.resize(region_scores, (input.shape[1], input.shape[0])) bgr_region_scores = cv2.cvtColor(bgr_region_scores, cv2.COLOR_GRAY2BGR) pursedo_bboxes = watershed(input, bgr_region_scores, False) _tmp = [] for i in range(pursedo_bboxes.shape[0]): if np.mean(pursedo_bboxes[i].ravel()) > 2: _tmp.append(pursedo_bboxes[i]) else: print("filter bboxes", pursedo_bboxes[i]) pursedo_bboxes = np.array(_tmp, np.float32) if pursedo_bboxes.shape[0] > 1: index = np.argsort(pursedo_bboxes[:, 0, 0]) pursedo_bboxes = pursedo_bboxes[index] confidence = self.get_confidence(real_char_nums, len(pursedo_bboxes)) bboxes = [] if confidence <= 0.5: width = input.shape[1] height = input.shape[0] width_per_char = width / len(word) for i, char in enumerate(word): if char == ' ': continue left = i * width_per_char right = (i + 1) * width_per_char bbox = np.array([[left, 0], [right, 0], [right, height], [left, height]]) bboxes.append(bbox) bboxes = np.array(bboxes, np.float32) confidence = 0.5 else: bboxes = pursedo_bboxes if False: _tmp_bboxes = np.int32(bboxes.copy()) _tmp_bboxes[:, :, 0] = np.clip(_tmp_bboxes[:, :, 0], 0, input.shape[1]) _tmp_bboxes[:, :, 1] = np.clip(_tmp_bboxes[:, :, 1], 0, input.shape[0]) for bbox in _tmp_bboxes: cv2.polylines(np.uint8(input), [np.reshape(bbox, (-1, 1, 2))], True, (255, 0, 0)) region_scores_color = cv2.applyColorMap(np.uint8(region_scores), cv2.COLORMAP_JET) region_scores_color = cv2.resize(region_scores_color, (input.shape[1], input.shape[0])) target = self.gaussianTransformer.generate_region(region_scores_color.shape, [_tmp_bboxes]) target_color = cv2.applyColorMap(target, cv2.COLORMAP_JET) viz_image = np.hstack([input[:, :, ::-1], region_scores_color, target_color]) cv2.imshow("crop_image", viz_image) cv2.waitKey() bboxes /= scale try: for j in range(len(bboxes)): ones = np.ones((4, 1)) tmp = np.concatenate([bboxes[j], ones], axis=-1) I = np.matrix(MM).I ori = np.matmul(I, tmp.transpose(1, 0)).transpose(1, 0) bboxes[j] = ori[:, :2] except Exception as e: print(e, gt_path) # for j in range(len(bboxes)): # ones = np.ones((4, 1)) # tmp = np.concatenate([bboxes[j], ones], axis=-1) # I = np.matrix(MM).I # ori = np.matmul(I, tmp.transpose(1, 0)).transpose(1, 0) # bboxes[j] = ori[:, :2] bboxes[:, :, 1] = np.clip(bboxes[:, :, 1], 0., image.shape[0] - 1) bboxes[:, :, 0] = np.clip(bboxes[:, :, 0], 0., image.shape[1] - 1) return bboxes, region_scores, confidence def resizeGt(self, gtmask): return cv2.resize(gtmask, (self.target_size // 2, self.target_size // 2)) def get_imagename(self, index): return None def saveInput(self, imagename, image, region_scores, affinity_scores, confidence_mask): boxes, polys = craft_utils.getDetBoxes(region_scores / 255, affinity_scores / 255, 0.7, 0.4, 0.4, False) boxes = np.array(boxes, np.int32) * 2 if len(boxes) > 0: np.clip(boxes[:, :, 0], 0, image.shape[1]) np.clip(boxes[:, :, 1], 0, image.shape[0]) for box in boxes: cv2.polylines(image, [np.reshape(box, (-1, 1, 2))], True, (0, 0, 255)) target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores / 255) target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores / 255) confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) gt_scores = np.hstack([target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color]) confidence_mask_gray = np.hstack([np.zeros_like(confidence_mask_gray), confidence_mask_gray]) output = np.concatenate([gt_scores, confidence_mask_gray], axis=0) output = np.hstack([image, output]) outpath = os.path.join(os.path.join(os.path.dirname(__file__) + '/output'), "%s_input.jpg" % imagename) #print(outpath) if not os.path.exists(os.path.dirname(outpath)): os.mkdir(os.path.dirname(outpath)) cv2.imwrite(outpath, output) def saveImage(self, imagename, image, bboxes, affinity_bboxes, region_scores, affinity_scores, confidence_mask): output_image = np.uint8(image.copy()) output_image = cv2.cvtColor(output_image, cv2.COLOR_RGB2BGR) if len(bboxes) > 0: affinity_bboxes = np.int32(affinity_bboxes) for i in range(affinity_bboxes.shape[0]): cv2.polylines(output_image, [np.reshape(affinity_bboxes[i], (-1, 1, 2))], True, (255, 0, 0)) for i in range(len(bboxes)): _bboxes = np.int32(bboxes[i]) for j in range(_bboxes.shape[0]): cv2.polylines(output_image, [np.reshape(_bboxes[j], (-1, 1, 2))], True, (0, 0, 255)) target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores / 255) target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores / 255) heat_map = np.concatenate([target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color], axis=1) confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask) output = np.concatenate([output_image, heat_map, confidence_mask_gray], axis=1) outpath = os.path.join(os.path.join(os.path.dirname(__file__) + '/output'), imagename) if not os.path.exists(os.path.dirname(outpath)): os.mkdir(os.path.dirname(outpath)) cv2.imwrite(outpath, output) def pull_item(self, index): # if self.get_imagename(index) == 'img_59.jpg': # pass # else: # return [], [], [], [], np.array([0]) image, character_bboxes, words, confidence_mask, confidences = self.load_image_gt_and_confidencemask(index) #print(len(character_bboxes), len(words), len(confidence_mask), len(confidence_mask[0]), len(confidences)) #print (confidence_mask, confidences) if len(confidences) == 0: confidences = 1.0 else: confidences = np.array(confidences).mean() region_scores = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32) affinity_scores = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32) affinity_bboxes = [] if len(character_bboxes) > 0: #print(len(character_bboxes)) region_scores = self.gaussianTransformer.generate_region(region_scores.shape, character_bboxes) affinity_scores, affinity_bboxes = self.gaussianTransformer.generate_affinity(region_scores.shape, character_bboxes, words) if self.viz: self.saveImage(self.get_imagename(index), image.copy(), character_bboxes, affinity_bboxes, region_scores, affinity_scores, confidence_mask) random_transforms = [image, region_scores, affinity_scores, confidence_mask] random_transforms = random_crop(random_transforms, (self.target_size, self.target_size), character_bboxes) #random_transforms = random_horizontal_flip(random_transforms) #random_transforms = random_rotate(random_transforms) cvimage, region_scores, affinity_scores, confidence_mask = random_transforms region_scores = self.resizeGt(region_scores) affinity_scores = self.resizeGt(affinity_scores) confidence_mask = self.resizeGt(confidence_mask) if self.viz: self.saveInput(self.get_imagename(index), cvimage, region_scores, affinity_scores, confidence_mask) image = Image.fromarray(cvimage) image = image.convert('RGB') image = transforms.ColorJitter(brightness=32.0 / 255, saturation=0.5)(image) image = imgproc.normalizeMeanVariance(np.array(image), mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)) image = torch.from_numpy(image).float().permute(2, 0, 1) region_scores_torch = torch.from_numpy(region_scores / 255).float() affinity_scores_torch = torch.from_numpy(affinity_scores / 255).float() confidence_mask_torch = torch.from_numpy(confidence_mask).float() return image, region_scores_torch, affinity_scores_torch, confidence_mask_torch, confidences class Synth80k(craft_base_dataset): def __init__(self, synthtext_folder, target_size=768, viz=False, debug=False): super(Synth80k, self).__init__(target_size, viz, debug) '''self.synthtext_folder = synthtext_folder gt = scio.loadmat(os.path.join(synthtext_folder, 'gt.mat')) self.charbox = gt['charBB'][0] self.image = gt['imnames'][0] self.imgtxt = gt['txt'][0]''' self.synthtext_folder = synthtext_folder + '/img' labels = os.listdir(synthtext_folder + '/label/') #print (len(labels)) charbox = [] imgtxt = [] linedirection = [] image = [] for txt in labels: image.append(txt.replace('txt', 'jpg')) txt_path = synthtext_folder + '/label/' + txt with open(txt_path) as f: content = f.readlines() content = [x.strip() for x in content] page = [] charBB = [] direction = [] for line in content: tmp = line.split(',') num = len(tmp) - 1 #data = numpy.reshape(tmp[1:], ( num//8 , 4 , 2)) charBB += tmp[1:] direction.append(tmp[0]) word = ''.join('c' for _ in range(num//8)) page.append(word) num = len(charBB) charBB = list(map(float, charBB)) charBB = numpy.reshape(charBB, ( num//8 , 4 , 2)).transpose((2, 1, 0)) charbox.append(charBB) imgtxt.append(page) linedirection.append(direction) #print(content) self.charbox = charbox #print(len(charbox)) self.image = numpy.reshape(image, (len(image), 1)) self.imgtxt = imgtxt self.direction = linedirection ''' print (numpy.shape(self.charbox[0])) print (self.imgtxt[0])''' def __getitem__(self, index): return self.pull_item(index) def __len__(self): return len(self.imgtxt) def get_imagename(self, index): return self.image[index][0] def load_image_gt_and_confidencemask(self, index): ''' 根据索引加载ground truth :param index:索引 :return:bboxes 字符的框, ''' img_path = os.path.join(self.synthtext_folder, self.image[index][0]) #print (img_path) image = cv2.imread(img_path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if np.random.randint(low=1, high=100) > 80: image = cv2.blur(image, (10, 10)) _charbox = numpy.copy(self.charbox[index]).transpose((2, 1, 0)) #print (_charbox[0]) #print (numpy.shape(_charbox)) image = random_scale(image, _charbox, self.target_size) #print (_charbox[0]) #print (self.charbox[index].transpose((2, 1, 0))[0]) words = [re.split(' \n|\n |\n| ', t.strip()) for t in self.imgtxt[index]] #print(words) words = list(itertools.chain(*words)) words = [t for t in words if len(t) > 0] character_bboxes = [] total = 0 confidences = [] for i in range(len(words)): bboxes = _charbox[total:total + len(words[i])] assert (len(bboxes) == len(words[i])) total += len(words[i]) bboxes = np.array(bboxes) character_bboxes.append(bboxes) confidences.append(1.0) #print (self.image[index][0], character_bboxes[0], words[0]) return image, character_bboxes, words, np.ones((image.shape[0], image.shape[1]), np.float32), confidences class ICDAR2013(craft_base_dataset): def __init__(self, net, icdar2013_folder, target_size=768, viz=False, debug=False): super(ICDAR2013, self).__init__(target_size, viz, debug) self.net = net self.net.eval() self.img_folder = os.path.join(icdar2013_folder, 'images/ch8_training_images') self.gt_folder = os.path.join(icdar2013_folder, 'gt') imagenames = os.listdir(self.img_folder) self.images_path = [] for imagename in imagenames: self.images_path.append(imagename) def __getitem__(self, index): return self.pull_item(index) def __len__(self): return len(self.images_path) def get_imagename(self, index): return self.images_path[index] # def convert2013(self,box): # str = box[-1][1:-1] # bboxes = [box[0], box[1], box[2], box[1], # box[2], box[3], box[0], box[3], # str] # return bboxes def load_image_gt_and_confidencemask(self, index): ''' 根据索引加载ground truth :param index:索引 :return:bboxes 字符的框, ''' imagename = self.images_path[index] gt_path = os.path.join(self.gt_folder, "gt_%s.txt" % os.path.splitext(imagename)[0]) word_bboxes, words = self.load_gt(gt_path) word_bboxes = np.float32(word_bboxes) image_path = os.path.join(self.img_folder, imagename) image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = random_scale(image, word_bboxes, self.target_size) confidence_mask = np.ones((image.shape[0], image.shape[1]), np.float32) character_bboxes = [] new_words = [] confidences = [] if len(word_bboxes) > 0: for i in range(len(word_bboxes)): if words[i] == '###' or len(words[i].strip()) == 0: cv2.fillPoly(confidence_mask, [np.int32(word_bboxes[i])], (0)) for i in range(len(word_bboxes)): if words[i] == '###' or len(words[i].strip()) == 0: continue pursedo_bboxes, bbox_region_scores, confidence = self.inference_pursedo_bboxes(self.net, image, word_bboxes[i], words[i], gt_path, viz=self.viz) confidences.append(confidence) cv2.fillPoly(confidence_mask, [np.int32(word_bboxes[i])], (confidence)) new_words.append(words[i]) character_bboxes.append(pursedo_bboxes) return image, character_bboxes, new_words, confidence_mask, confidences def load_gt(self, gt_path): lines = open(gt_path, encoding='utf-8').readlines() bboxes = [] words = [] for line in lines: ori_box = line.strip().encode('utf-8').decode('utf-8-sig').split(',') box = [int(ori_box[j]) for j in range(8)] word = ori_box[9:] word = ','.join(word) box = np.array(box, np.int32).reshape(4, 2) if word == '###': words.append('###') bboxes.append(box) continue if len(word.strip()) == 0: continue try: area, p0, p3, p2, p1, _, _ = mep(box) except Exception as e: print(e,gt_path) bbox = np.array([p0, p1, p2, p3]) distance = 10000000 index = 0 for i in range(4): d = np.linalg.norm(box[0] - bbox[i]) if distance > d: index = i distance = d new_box = [] for i in range(index, index + 4): new_box.append(bbox[i % 4]) new_box = np.array(new_box) bboxes.append(np.array(new_box)) words.append(word) return bboxes, words class ICDAR2015(craft_base_dataset): def __init__(self, net, icdar2015_folder, target_size=768, viz=False, debug=False): super(ICDAR2015, self).__init__(target_size, viz, debug) self.net = net self.net.eval() self.img_folder = os.path.join(icdar2015_folder, 'ch4_training_images') self.gt_folder = os.path.join(icdar2015_folder, 'ch4_training_localization_transcription_gt') imagenames = os.listdir(self.img_folder) self.images_path = [] for imagename in imagenames: self.images_path.append(imagename) def __getitem__(self, index): return self.pull_item(index) def __len__(self): return len(self.images_path) def get_imagename(self, index): return self.images_path[index] def load_image_gt_and_confidencemask(self, index): ''' 根据索引加载ground truth :param index:索引 :return:bboxes 字符的框, ''' imagename = self.images_path[index] gt_path = os.path.join(self.gt_folder, "gt_%s.txt" % os.path.splitext(imagename)[0]) word_bboxes, words = self.load_gt(gt_path) word_bboxes = np.float32(word_bboxes) image_path = os.path.join(self.img_folder, imagename) image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = random_scale(image, word_bboxes, self.target_size) confidence_mask = np.ones((image.shape[0], image.shape[1]), np.float32) character_bboxes = [] new_words = [] confidences = [] if len(word_bboxes) > 0: for i in range(len(word_bboxes)): if words[i] == '###' or len(words[i].strip()) == 0: cv2.fillPoly(confidence_mask, [np.int32(word_bboxes[i])], (0)) for i in range(len(word_bboxes)): if words[i] == '###' or len(words[i].strip()) == 0: continue pursedo_bboxes, bbox_region_scores, confidence = self.inference_pursedo_bboxes(self.net, image, word_bboxes[i], words[i], viz=self.viz) confidences.append(confidence) cv2.fillPoly(confidence_mask, [np.int32(word_bboxes[i])], (confidence)) new_words.append(words[i]) character_bboxes.append(pursedo_bboxes) return image, character_bboxes, new_words, confidence_mask, confidences def load_gt(self, gt_path): lines = open(gt_path, encoding='utf-8').readlines() bboxes = [] words = [] for line in lines: ori_box = line.strip().encode('utf-8').decode('utf-8-sig').split(',') box = [int(ori_box[j]) for j in range(8)] word = ori_box[8:] word = ','.join(word) box = np.array(box, np.int32).reshape(4, 2) if word == '###': words.append('###') bboxes.append(box) continue area, p0, p3, p2, p1, _, _ = mep(box) bbox = np.array([p0, p1, p2, p3]) distance = 10000000 index = 0 for i in range(4): d = np.linalg.norm(box[0] - bbox[i]) if distance > d: index = i distance = d new_box = [] for i in range(index, index + 4): new_box.append(bbox[i % 4]) new_box = np.array(new_box) bboxes.append(np.array(new_box)) words.append(word) return bboxes, words if __name__ == '__main__': # synthtextloader = Synth80k('/home/jiachx/publicdatasets/SynthText/SynthText', target_size=768, viz=True, debug=True) # train_loader = torch.utils.data.DataLoader( # synthtextloader, # batch_size=1, # shuffle=False, # num_workers=0, # drop_last=True, # pin_memory=True) # train_batch = iter(train_loader) # image_origin, target_gaussian_heatmap, target_gaussian_affinity_heatmap, mask = next(train_batch) from craft import CRAFT from torchutil import copyStateDict net = CRAFT(freeze=True) net.load_state_dict( copyStateDict(torch.load('/data/CRAFT-pytorch/1-7.pth'))) net = net.cuda() net = torch.nn.DataParallel(net) net.eval() dataloader = ICDAR2015(net, '/data/CRAFT-pytorch/icdar2015', target_size=768, viz=True) train_loader = torch.utils.data.DataLoader( dataloader, batch_size=1, shuffle=False, num_workers=0, drop_last=True, pin_memory=True) total = 0 total_sum = 0 for index, (opimage, region_scores, affinity_scores, confidence_mask, confidences_mean) in enumerate(train_loader): total += 1 # confidence_mean = confidences_mean.mean() # total_sum += confidence_mean # print(index, confidence_mean) print("mean=", total_sum / total)