add code for both GPU and CPU

2020-07-22 11:01:12 +09:00 · 2020-07-22 11:01:12 +09:00 · 31e96013c5
parent 7ff9246af5
commit 31e96013c5
8 changed files with 50 additions and 38 deletions
--- a/data/result.xml
+++ b/data/result.xml
--- a/data/result/res_k188702_014_21.jpg
+++ b/data/result/res_k188702_014_21.jpg
--- a/decoder.py
+++ b/decoder.py
@ -6,6 +6,7 @@ import torch.nn as nn
 class Gru_cond_layer(nn.Module):
    def __init__(self, params):
        super(Gru_cond_layer, self).__init__()
+        self.cuda = params['cuda']
        # attention
        self.conv_Ua = nn.Conv2d(params['D'], params['dim_attention'], kernel_size=1)
        self.fc_Wa = nn.Linear(params['n'], params['dim_attention'], bias=False)
@ -44,16 +45,24 @@ class Gru_cond_layer(nn.Module):

        if one_step:
            if mask is None:
-                mask = torch.ones(embedding.shape[0]).cuda()
+                mask = torch.ones(embedding.shape[0])
+                if self.cuda:
+                    mask.cuda()
            h2ts, cts, alphas, alpha_pasts = self._step_slice(mask, state_below_r, state_below_z, state_below_h,
                                                              init_state, context, context_mask, alpha_past, Ua_ctx)
        else:
-            alpha_past = torch.zeros(n_samples, context.shape[2], context.shape[3]).cuda()
+            alpha_past = torch.zeros(n_samples, context.shape[2], context.shape[3])
            h2t = init_state
-            h2ts = torch.zeros(n_steps, n_samples, params['n']).cuda()
-            cts = torch.zeros(n_steps, n_samples, params['D']).cuda()
-            alphas = (torch.zeros(n_steps, n_samples, context.shape[2], context.shape[3])).cuda()
-            alpha_pasts = torch.zeros(n_steps, n_samples, context.shape[2], context.shape[3]).cuda()
+            h2ts = torch.zeros(n_steps, n_samples, params['n'])
+            cts = torch.zeros(n_steps, n_samples, params['D'])
+            alphas = (torch.zeros(n_steps, n_samples, context.shape[2], context.shape[3]))
+            alpha_pasts = torch.zeros(n_steps, n_samples, context.shape[2], context.shape[3])
+            if self.cuda:
+                alpha_past.cuda()
+                h2ts.cuda()
+                cts.cuda()
+                alphas.cuda()
+                alpha_pasts.cuda()
            for i in range(n_steps):
                h2t, ct, alpha, alpha_past = self._step_slice(mask[i], state_below_r[i], state_below_z[i],
                                                              state_below_h[i], h2t, context, context_mask, alpha_past,
--- a/encoder_decoder.py
+++ b/encoder_decoder.py
@ -21,14 +21,19 @@ class My_Embedding(nn.Module):
    def __init__(self, params):
        super(My_Embedding, self).__init__()
        self.embedding = nn.Embedding(params['K'], params['m'])
+        self.cuda = params['cuda']

    def forward(self, params, y):
        if y.sum() < 0.: 
-            emb = torch.zeros(1, params['m']).cuda()
+            emb = torch.zeros(1, params['m'])
+            if self.cuda:
+                emb.cuda()
        else:
            emb = self.embedding(y)
            if len(emb.shape) == 3:  # only for training stage
-                emb_shifted = torch.zeros([emb.shape[0], emb.shape[1], params['m']], dtype=torch.float32).cuda()
+                emb_shifted = torch.zeros([emb.shape[0], emb.shape[1], params['m']], dtype=torch.float32)
+                if self.cuda:
+                    emb_shifted.cuda()
                emb_shifted[1:] = emb[:-1]
                emb = emb_shifted
        return emb
@ -43,6 +48,7 @@ class Encoder_Decoder(nn.Module):
        self.emb_model = My_Embedding(params)
        self.gru_model = Gru_cond_layer(params)
        self.gru_prob_model = Gru_prob(params)
+        self.cuda = params['cuda']

    def forward(self, params, x, x_mask, y, y_mask, one_step=False):
        # recover permute
--- a/file_utils.py
+++ b/file_utils.py
@ -41,7 +41,7 @@ def saveResult(img_file, img, boxes, dirname='./result/', verticals=None, texts=
            None
        """
        img = np.array(img)
-        print (np.shape(img))
+        #print (np.shape(img))
        result = np.zeros((np.shape(img)[0], np.shape(img)[1]))
        # make result file list
        filename, file_ext = os.path.splitext(os.path.basename(img_file))
--- a/requirements.txt
+++ b/requirements.txt
@ -1,7 +1,9 @@
+Python==3.7.7
 torch==1.4.0
 torchvision==0.2.1
 opencv-python==3.4.2.17
 scikit-image==0.14.2
 scipy==1.1.0
 Polygon3
+pillow==4.3.0

--- a/test.py
+++ b/test.py
@ -187,6 +187,7 @@ def test(text_detection_modelpara, ocr_modelpara, dictionary_target):
    params['bottleneck'] = True
    params['use_dropout'] = True
    params['input_channels'] = 3
+    params['cuda'] = args.cuda

    # load model
    OCR = Encoder_Decoder(params)
@ -194,9 +195,8 @@ def test(text_detection_modelpara, ocr_modelpara, dictionary_target):
        OCR.load_state_dict(copyStateDict(torch.load(ocr_modelpara)))
    else:
        OCR.load_state_dict(copyStateDict(torch.load(ocr_modelpara, map_location='cpu')))
-
    if args.cuda:
-        OCR = OCR.cuda()
+        #OCR = OCR.cuda()
        OCR = torch.nn.DataParallel(OCR)
        cudnn.benchmark = False

@ -288,9 +288,11 @@ def test(text_detection_modelpara, ocr_modelpara, dictionary_target):
            mat[0,:,:] = 0.299* input_img[:, :, 0] + 0.587 * input_img[:, :, 1] + 0.114 * input_img[:, :, 2]

            xx_pad = mat.astype(np.float32) / 255.
-            xx_pad = torch.from_numpy(xx_pad[None, :, :, :]).cuda()  # (1,1,H,W)
+            xx_pad = torch.from_numpy(xx_pad[None, :, :, :])  # (1,1,H,W)
+            if args.cuda:
+                xx_pad.cuda()
            with torch.no_grad():
-                sample, score, alpha_past_list = gen_sample(OCR, xx_pad, params, True, k=10, maxlen=600)
+                sample, score, alpha_past_list = gen_sample(OCR, xx_pad, params, args.cuda, k=10, maxlen=600)
            score = score / np.array([len(s) for s in sample])
            ss = sample[score.argmin()]
            alpha_past = alpha_past_list[score.argmin()]
@ -327,7 +329,7 @@ def test(text_detection_modelpara, ocr_modelpara, dictionary_target):
            image = cv2_putText_1(img = image, text = result, org = (min_x, max_x, min_y, max_y), fontFace = fontPIL, fontScale = size, color = colorBGR)


-            
+        print('save image')    
        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = result_folder + "/res_" + filename + '_mask.jpg'
--- a/utils.py
+++ b/utils.py
@ -114,23 +114,7 @@ def load_mapping(dictFile):
    
 # create batch
 def prepare_data(options, images_x, seqs_y, prev_x = None):
-    '''if prev_x!= None and len(images_x) == len(prev_x) and np.random.random_sample() > 0.7:
-        for i in range(len(images_x)):
-            #print(np.shape(images_x[i]))
-            images_x[i] = images_x[i]*0.7 + prev_x[i] * 0.3'''
-            
-    '''if np.random.random_sample() > 0.7:
-        for i in range(len(images_x)):
-            #print(np.shape(images_x[i][0]))
-            if np.shape(images_x[i][0])[0] <= 100 or  np.shape(images_x[i][0])[1] <= 100: continue
-            img = Image.fromarray(images_x[i][0])
-            img = sk.perform_operation([img])
-            img = br.perform_operation([img[0]])
-            #img[0].save(str(i) + '.jpg')
-            img = np.asarray(img[0])
-            images_x[i][0] = img
-            #print(np.shape(images_x[i][0]))'''
-
+    
    heights_x = [s.shape[1] for s in images_x]
    widths_x = [s.shape[2] for s in images_x]
    lengths_y = [len(s) for s in seqs_y]
@ -167,14 +151,23 @@ def gen_sample(model, x, params, gpu_flag, k=1, maxlen=30):
    else:
        next_state, ctx0 = model.f_init(x)
    next_w = -1 * np.ones((1,)).astype(np.int64)
-    next_w = torch.from_numpy(next_w).cuda()
-    next_alpha_past = torch.zeros(1, ctx0.shape[2], ctx0.shape[3]).cuda()
+    next_w = torch.from_numpy(next_w)
+    next_alpha_past = torch.zeros(1, ctx0.shape[2], ctx0.shape[3])
    ctx0 = ctx0.cpu().numpy()

+    if gpu_flag:
+        next_w.cuda()
+        next_alpha_past.cuda()
+
    for ii in range(maxlen):
        ctx = np.tile(ctx0, [live_k, 1, 1, 1])
-        ctx = torch.from_numpy(ctx).cuda()
+        ctx = torch.from_numpy(ctx)
        if gpu_flag:
+            ctx.cuda()
+            next_w.cuda()
+            next_state.cuda()
+            next_alpha_past.cuda()
+
            next_p, next_state, next_alpha_past, alpha = model.module.f_next(params, next_w, None, ctx, None, next_state,
                                                                      next_alpha_past, True)
        else:
@ -235,9 +228,9 @@ def gen_sample(model, x, params, gpu_flag, k=1, maxlen=30):
        #next_alpha_past = np.array(hyp_alpha_past)
        next_alpha_past = np.array([w[-1] for w in hyp_alpha_past])
        #print (np.shape(next_alpha_past))
-        next_w = torch.from_numpy(next_w).cuda()
-        next_state = torch.from_numpy(next_state).cuda()
-        next_alpha_past = torch.from_numpy(next_alpha_past).cuda()
+        next_w = torch.from_numpy(next_w)
+        next_state = torch.from_numpy(next_state)
+        next_alpha_past = torch.from_numpy(next_alpha_past)
    return sample, sample_score, sample_alpha