Kindai-OCR/decoder.py

import torch
import torch.nn as nn


# two layers of GRU
class Gru_cond_layer(nn.Module):
    def __init__(self, params):
        super(Gru_cond_layer, self).__init__()
        self.cuda = params['cuda']
        # attention
        self.conv_Ua = nn.Conv2d(params['D'], params['dim_attention'], kernel_size=1)
        self.fc_Wa = nn.Linear(params['n'], params['dim_attention'], bias=False)
        self.conv_Q = nn.Conv2d(1, 512, kernel_size=11, bias=False, padding=5)
        self.fc_Uf = nn.Linear(512, params['dim_attention'])
        self.fc_va = nn.Linear(params['dim_attention'], 1)

        # the first GRU layer
        self.fc_Wyz = nn.Linear(params['m'], params['n'])
        self.fc_Wyr = nn.Linear(params['m'], params['n'])
        self.fc_Wyh = nn.Linear(params['m'], params['n'])

        self.fc_Uhz = nn.Linear(params['n'], params['n'], bias=False)
        self.fc_Uhr = nn.Linear(params['n'], params['n'], bias=False)
        self.fc_Uhh = nn.Linear(params['n'], params['n'], bias=False)

        # the second GRU layer
        self.fc_Wcz = nn.Linear(params['D'], params['n'], bias=False)
        self.fc_Wcr = nn.Linear(params['D'], params['n'], bias=False)
        self.fc_Wch = nn.Linear(params['D'], params['n'], bias=False)

        self.fc_Uhz2 = nn.Linear(params['n'], params['n'])
        self.fc_Uhr2 = nn.Linear(params['n'], params['n'])
        self.fc_Uhh2 = nn.Linear(params['n'], params['n'])

    def forward(self, params, embedding, mask=None, context=None, context_mask=None, one_step=False, init_state=None,
                alpha_past=None):
        n_steps = embedding.shape[0]
        n_samples = embedding.shape[1]

        Ua_ctx = self.conv_Ua(context)
        Ua_ctx = Ua_ctx.permute(2, 3, 0, 1) 
        state_below_z = self.fc_Wyz(embedding)
        state_below_r = self.fc_Wyr(embedding)
        state_below_h = self.fc_Wyh(embedding)

        if one_step:
            if mask is None:
                mask = torch.ones(embedding.shape[0])
                if self.cuda:
                    mask.cuda()
            h2ts, cts, alphas, alpha_pasts = self._step_slice(mask, state_below_r, state_below_z, state_below_h,
                                                              init_state, context, context_mask, alpha_past, Ua_ctx)
        else:
            alpha_past = torch.zeros(n_samples, context.shape[2], context.shape[3])
            h2t = init_state
            h2ts = torch.zeros(n_steps, n_samples, params['n'])
            cts = torch.zeros(n_steps, n_samples, params['D'])
            alphas = (torch.zeros(n_steps, n_samples, context.shape[2], context.shape[3]))
            alpha_pasts = torch.zeros(n_steps, n_samples, context.shape[2], context.shape[3])
            if self.cuda:
                alpha_past.cuda()
                h2ts.cuda()
                cts.cuda()
                alphas.cuda()
                alpha_pasts.cuda()
            for i in range(n_steps):
                h2t, ct, alpha, alpha_past = self._step_slice(mask[i], state_below_r[i], state_below_z[i],
                                                              state_below_h[i], h2t, context, context_mask, alpha_past,
                                                              Ua_ctx)
                h2ts[i] = h2t
                cts[i] = ct
                alphas[i] = alpha
                alpha_pasts[i] = alpha_past
        return h2ts, cts, alphas, alpha_pasts

    # one step of two GRU layers
    def _step_slice(self, mask, state_below_r, state_below_z, state_below_h, h, ctx, ctx_mask, alpha_past, Ua_ctx):
        # the first GRU layer
        z1 = torch.sigmoid(self.fc_Uhz(h) + state_below_z)
        r1 = torch.sigmoid(self.fc_Uhr(h) + state_below_r)
        h1_p = torch.tanh(self.fc_Uhh(h) * r1 + state_below_h)
        h1 = z1 * h + (1. - z1) * h1_p
        h1 = mask[:, None] * h1 + (1. - mask)[:, None] * h

        # attention
        Wa_h1 = self.fc_Wa(h1)
        alpha_past_ = alpha_past[:, None, :, :]
        cover_F = self.conv_Q(alpha_past_).permute(2, 3, 0, 1)
        cover_vector = self.fc_Uf(cover_F)
        attention_score = torch.tanh(Ua_ctx + Wa_h1[None, None, :, :] + cover_vector)
        alpha = self.fc_va(attention_score)
        alpha = alpha.view(alpha.shape[0], alpha.shape[1], alpha.shape[2])
        alpha = torch.exp(alpha)
        if (ctx_mask is not None):
            alpha = alpha * ctx_mask.permute(1, 2, 0)
        alpha = alpha / alpha.sum(1).sum(0)[None, None, :]
        alpha_past = alpha_past + alpha.permute(2, 0, 1)
        ct = (ctx * alpha.permute(2, 0, 1)[:, None, :, :]).sum(3).sum(2)

        # the second GRU layer
        z2 = torch.sigmoid(self.fc_Wcz(ct) + self.fc_Uhz2(h1))
        r2 = torch.sigmoid(self.fc_Wcr(ct) + self.fc_Uhr2(h1))
        h2_p = torch.tanh(self.fc_Wch(ct) + self.fc_Uhh2(h1) * r2)
        h2 = z2 * h1 + (1. - z2) * h2_p
        h2 = mask[:, None] * h2 + (1. - mask)[:, None] * h1
        return h2, ct, alpha.permute(2, 0, 1), alpha_past


# calculate probabilities
class Gru_prob(nn.Module):
    def __init__(self, params):
        super(Gru_prob, self).__init__()
        self.fc_Wct = nn.Linear(params['D'], params['m'])
        self.fc_Wht = nn.Linear(params['n'], params['m'])
        self.fc_Wyt = nn.Linear(params['m'], params['m'])
        self.dropout = nn.Dropout(p=0.2)
        self.fc_W0 = nn.Linear(int(params['m'] / 2), params['K'])

    def forward(self, cts, hts, emb, use_dropout):
        logit = self.fc_Wct(cts) + self.fc_Wht(hts) + self.fc_Wyt(emb)

        # maxout
        shape = logit.shape
        shape2 = int(shape[2] / 2)
        shape3 = 2
        logit = logit.view(shape[0], shape[1], shape2, shape3)
        logit = logit.max(3)[0]

        if use_dropout:
            logit = self.dropout(logit)

        out = self.fc_W0(logit)
        return out
initial commit 2020-07-08 02:31:26 +00:00			`import torch`
			`import torch.nn as nn`


			`# two layers of GRU`
			`class Gru_cond_layer(nn.Module):`
			`def __init__(self, params):`
			`super(Gru_cond_layer, self).__init__()`
add code for both GPU and CPU 2020-07-22 02:01:12 +00:00			`self.cuda = params['cuda']`
initial commit 2020-07-08 02:31:26 +00:00			`# attention`
			`self.conv_Ua = nn.Conv2d(params['D'], params['dim_attention'], kernel_size=1)`
			`self.fc_Wa = nn.Linear(params['n'], params['dim_attention'], bias=False)`
			`self.conv_Q = nn.Conv2d(1, 512, kernel_size=11, bias=False, padding=5)`
			`self.fc_Uf = nn.Linear(512, params['dim_attention'])`
			`self.fc_va = nn.Linear(params['dim_attention'], 1)`

			`# the first GRU layer`
			`self.fc_Wyz = nn.Linear(params['m'], params['n'])`
			`self.fc_Wyr = nn.Linear(params['m'], params['n'])`
			`self.fc_Wyh = nn.Linear(params['m'], params['n'])`

			`self.fc_Uhz = nn.Linear(params['n'], params['n'], bias=False)`
			`self.fc_Uhr = nn.Linear(params['n'], params['n'], bias=False)`
			`self.fc_Uhh = nn.Linear(params['n'], params['n'], bias=False)`

			`# the second GRU layer`
			`self.fc_Wcz = nn.Linear(params['D'], params['n'], bias=False)`
			`self.fc_Wcr = nn.Linear(params['D'], params['n'], bias=False)`
			`self.fc_Wch = nn.Linear(params['D'], params['n'], bias=False)`

			`self.fc_Uhz2 = nn.Linear(params['n'], params['n'])`
			`self.fc_Uhr2 = nn.Linear(params['n'], params['n'])`
			`self.fc_Uhh2 = nn.Linear(params['n'], params['n'])`

			`def forward(self, params, embedding, mask=None, context=None, context_mask=None, one_step=False, init_state=None,`
			`alpha_past=None):`
			`n_steps = embedding.shape[0]`
			`n_samples = embedding.shape[1]`

			`Ua_ctx = self.conv_Ua(context)`
			`Ua_ctx = Ua_ctx.permute(2, 3, 0, 1)`
			`state_below_z = self.fc_Wyz(embedding)`
			`state_below_r = self.fc_Wyr(embedding)`
			`state_below_h = self.fc_Wyh(embedding)`

			`if one_step:`
			`if mask is None:`
add code for both GPU and CPU 2020-07-22 02:01:12 +00:00			`mask = torch.ones(embedding.shape[0])`
			`if self.cuda:`
			`mask.cuda()`
initial commit 2020-07-08 02:31:26 +00:00			`h2ts, cts, alphas, alpha_pasts = self._step_slice(mask, state_below_r, state_below_z, state_below_h,`
			`init_state, context, context_mask, alpha_past, Ua_ctx)`
			`else:`
add code for both GPU and CPU 2020-07-22 02:01:12 +00:00			`alpha_past = torch.zeros(n_samples, context.shape[2], context.shape[3])`
initial commit 2020-07-08 02:31:26 +00:00			`h2t = init_state`
add code for both GPU and CPU 2020-07-22 02:01:12 +00:00			`h2ts = torch.zeros(n_steps, n_samples, params['n'])`
			`cts = torch.zeros(n_steps, n_samples, params['D'])`
			`alphas = (torch.zeros(n_steps, n_samples, context.shape[2], context.shape[3]))`
			`alpha_pasts = torch.zeros(n_steps, n_samples, context.shape[2], context.shape[3])`
			`if self.cuda:`
			`alpha_past.cuda()`
			`h2ts.cuda()`
			`cts.cuda()`
			`alphas.cuda()`
			`alpha_pasts.cuda()`
initial commit 2020-07-08 02:31:26 +00:00			`for i in range(n_steps):`
			`h2t, ct, alpha, alpha_past = self._step_slice(mask[i], state_below_r[i], state_below_z[i],`
			`state_below_h[i], h2t, context, context_mask, alpha_past,`
			`Ua_ctx)`
			`h2ts[i] = h2t`
			`cts[i] = ct`
			`alphas[i] = alpha`
			`alpha_pasts[i] = alpha_past`
			`return h2ts, cts, alphas, alpha_pasts`

			`# one step of two GRU layers`
			`def _step_slice(self, mask, state_below_r, state_below_z, state_below_h, h, ctx, ctx_mask, alpha_past, Ua_ctx):`
			`# the first GRU layer`
			`z1 = torch.sigmoid(self.fc_Uhz(h) + state_below_z)`
			`r1 = torch.sigmoid(self.fc_Uhr(h) + state_below_r)`
			`h1_p = torch.tanh(self.fc_Uhh(h) * r1 + state_below_h)`
			`h1 = z1 * h + (1. - z1) * h1_p`
			`h1 = mask[:, None] * h1 + (1. - mask)[:, None] * h`

			`# attention`
			`Wa_h1 = self.fc_Wa(h1)`
			`alpha_past_ = alpha_past[:, None, :, :]`
			`cover_F = self.conv_Q(alpha_past_).permute(2, 3, 0, 1)`
			`cover_vector = self.fc_Uf(cover_F)`
			`attention_score = torch.tanh(Ua_ctx + Wa_h1[None, None, :, :] + cover_vector)`
			`alpha = self.fc_va(attention_score)`
			`alpha = alpha.view(alpha.shape[0], alpha.shape[1], alpha.shape[2])`
			`alpha = torch.exp(alpha)`
			`if (ctx_mask is not None):`
			`alpha = alpha * ctx_mask.permute(1, 2, 0)`
			`alpha = alpha / alpha.sum(1).sum(0)[None, None, :]`
			`alpha_past = alpha_past + alpha.permute(2, 0, 1)`
			`ct = (ctx * alpha.permute(2, 0, 1)[:, None, :, :]).sum(3).sum(2)`

			`# the second GRU layer`
			`z2 = torch.sigmoid(self.fc_Wcz(ct) + self.fc_Uhz2(h1))`
			`r2 = torch.sigmoid(self.fc_Wcr(ct) + self.fc_Uhr2(h1))`
			`h2_p = torch.tanh(self.fc_Wch(ct) + self.fc_Uhh2(h1) * r2)`
			`h2 = z2 * h1 + (1. - z2) * h2_p`
			`h2 = mask[:, None] * h2 + (1. - mask)[:, None] * h1`
			`return h2, ct, alpha.permute(2, 0, 1), alpha_past`


			`# calculate probabilities`
			`class Gru_prob(nn.Module):`
			`def __init__(self, params):`
			`super(Gru_prob, self).__init__()`
			`self.fc_Wct = nn.Linear(params['D'], params['m'])`
			`self.fc_Wht = nn.Linear(params['n'], params['m'])`
			`self.fc_Wyt = nn.Linear(params['m'], params['m'])`
			`self.dropout = nn.Dropout(p=0.2)`
			`self.fc_W0 = nn.Linear(int(params['m'] / 2), params['K'])`

			`def forward(self, cts, hts, emb, use_dropout):`
			`logit = self.fc_Wct(cts) + self.fc_Wht(hts) + self.fc_Wyt(emb)`

			`# maxout`
			`shape = logit.shape`
			`shape2 = int(shape[2] / 2)`
			`shape3 = 2`
			`logit = logit.view(shape[0], shape[1], shape2, shape3)`
			`logit = logit.max(3)[0]`

			`if use_dropout:`
			`logit = self.dropout(logit)`

			`out = self.fc_W0(logit)`
			`return out`