mirror of https://github.com/perkeep/perkeep.git
vendor: add rsc.io/pdf
at rev 1d34785eb915fd1ea1c437ad41621c9066642030 Change-Id: I38d3044444f81607c6fa39fdcd79e9f2a987af1e
This commit is contained in:
parent
89591fed0e
commit
2938caa52e
|
@ -0,0 +1,27 @@
|
|||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,3 @@
|
|||
go get rsc.io/pdf
|
||||
|
||||
http://godoc.org/rsc.io/pdf
|
|
@ -0,0 +1,529 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Reading of PDF tokens and objects from a raw byte stream.
|
||||
|
||||
package pdf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// A token is a PDF token in the input stream, one of the following Go types:
|
||||
//
|
||||
// bool, a PDF boolean
|
||||
// int64, a PDF integer
|
||||
// float64, a PDF real
|
||||
// string, a PDF string literal
|
||||
// keyword, a PDF keyword
|
||||
// name, a PDF name without the leading slash
|
||||
//
|
||||
type token interface{}
|
||||
|
||||
// A name is a PDF name, without the leading slash.
|
||||
type name string
|
||||
|
||||
// A keyword is a PDF keyword.
|
||||
// Delimiter tokens used in higher-level syntax,
|
||||
// such as "<<", ">>", "[", "]", "{", "}", are also treated as keywords.
|
||||
type keyword string
|
||||
|
||||
// A buffer holds buffered input bytes from the PDF file.
|
||||
type buffer struct {
|
||||
r io.Reader // source of data
|
||||
buf []byte // buffered data
|
||||
pos int // read index in buf
|
||||
offset int64 // offset at end of buf; aka offset of next read
|
||||
tmp []byte // scratch space for accumulating token
|
||||
unread []token // queue of read but then unread tokens
|
||||
allowEOF bool
|
||||
allowObjptr bool
|
||||
allowStream bool
|
||||
eof bool
|
||||
key []byte
|
||||
useAES bool
|
||||
objptr objptr
|
||||
}
|
||||
|
||||
// newBuffer returns a new buffer reading from r at the given offset.
|
||||
func newBuffer(r io.Reader, offset int64) *buffer {
|
||||
return &buffer{
|
||||
r: r,
|
||||
offset: offset,
|
||||
buf: make([]byte, 0, 4096),
|
||||
allowObjptr: true,
|
||||
allowStream: true,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *buffer) seek(offset int64) {
|
||||
b.offset = offset
|
||||
b.buf = b.buf[:0]
|
||||
b.pos = 0
|
||||
b.unread = b.unread[:0]
|
||||
}
|
||||
|
||||
func (b *buffer) readByte() byte {
|
||||
if b.pos >= len(b.buf) {
|
||||
b.reload()
|
||||
if b.pos >= len(b.buf) {
|
||||
return '\n'
|
||||
}
|
||||
}
|
||||
c := b.buf[b.pos]
|
||||
b.pos++
|
||||
return c
|
||||
}
|
||||
|
||||
func (b *buffer) errorf(format string, args ...interface{}) {
|
||||
panic(fmt.Errorf(format, args...))
|
||||
}
|
||||
|
||||
func (b *buffer) reload() bool {
|
||||
n := cap(b.buf) - int(b.offset%int64(cap(b.buf)))
|
||||
n, err := b.r.Read(b.buf[:n])
|
||||
if n == 0 && err != nil {
|
||||
b.buf = b.buf[:0]
|
||||
b.pos = 0
|
||||
if b.allowEOF && err == io.EOF {
|
||||
b.eof = true
|
||||
return false
|
||||
}
|
||||
b.errorf("malformed PDF: reading at offset %d: %v", b.offset, err)
|
||||
return false
|
||||
}
|
||||
b.offset += int64(n)
|
||||
b.buf = b.buf[:n]
|
||||
b.pos = 0
|
||||
return true
|
||||
}
|
||||
|
||||
func (b *buffer) seekForward(offset int64) {
|
||||
for b.offset < offset {
|
||||
if !b.reload() {
|
||||
return
|
||||
}
|
||||
}
|
||||
b.pos = len(b.buf) - int(b.offset-offset)
|
||||
}
|
||||
|
||||
func (b *buffer) readOffset() int64 {
|
||||
return b.offset - int64(len(b.buf)) + int64(b.pos)
|
||||
}
|
||||
|
||||
func (b *buffer) unreadByte() {
|
||||
if b.pos > 0 {
|
||||
b.pos--
|
||||
}
|
||||
}
|
||||
|
||||
func (b *buffer) unreadToken(t token) {
|
||||
b.unread = append(b.unread, t)
|
||||
}
|
||||
|
||||
func (b *buffer) readToken() token {
|
||||
if n := len(b.unread); n > 0 {
|
||||
t := b.unread[n-1]
|
||||
b.unread = b.unread[:n-1]
|
||||
return t
|
||||
}
|
||||
|
||||
// Find first non-space, non-comment byte.
|
||||
c := b.readByte()
|
||||
for {
|
||||
if isSpace(c) {
|
||||
if b.eof {
|
||||
return io.EOF
|
||||
}
|
||||
c = b.readByte()
|
||||
} else if c == '%' {
|
||||
for c != '\r' && c != '\n' {
|
||||
c = b.readByte()
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '<':
|
||||
if b.readByte() == '<' {
|
||||
return keyword("<<")
|
||||
}
|
||||
b.unreadByte()
|
||||
return b.readHexString()
|
||||
|
||||
case '(':
|
||||
return b.readLiteralString()
|
||||
|
||||
case '[', ']', '{', '}':
|
||||
return keyword(string(c))
|
||||
|
||||
case '/':
|
||||
return b.readName()
|
||||
|
||||
case '>':
|
||||
if b.readByte() == '>' {
|
||||
return keyword(">>")
|
||||
}
|
||||
b.unreadByte()
|
||||
fallthrough
|
||||
|
||||
default:
|
||||
if isDelim(c) {
|
||||
b.errorf("unexpected delimiter %#q", rune(c))
|
||||
return nil
|
||||
}
|
||||
b.unreadByte()
|
||||
return b.readKeyword()
|
||||
}
|
||||
}
|
||||
|
||||
func (b *buffer) readHexString() token {
|
||||
tmp := b.tmp[:0]
|
||||
for {
|
||||
Loop:
|
||||
c := b.readByte()
|
||||
if c == '>' {
|
||||
break
|
||||
}
|
||||
if isSpace(c) {
|
||||
goto Loop
|
||||
}
|
||||
Loop2:
|
||||
c2 := b.readByte()
|
||||
if isSpace(c2) {
|
||||
goto Loop2
|
||||
}
|
||||
x := unhex(c)<<4 | unhex(c2)
|
||||
if x < 0 {
|
||||
b.errorf("malformed hex string %c %c %s", c, c2, b.buf[b.pos:])
|
||||
break
|
||||
}
|
||||
tmp = append(tmp, byte(x))
|
||||
}
|
||||
b.tmp = tmp
|
||||
return string(tmp)
|
||||
}
|
||||
|
||||
func unhex(b byte) int {
|
||||
switch {
|
||||
case '0' <= b && b <= '9':
|
||||
return int(b) - '0'
|
||||
case 'a' <= b && b <= 'f':
|
||||
return int(b) - 'a' + 10
|
||||
case 'A' <= b && b <= 'F':
|
||||
return int(b) - 'A' + 10
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func (b *buffer) readLiteralString() token {
|
||||
tmp := b.tmp[:0]
|
||||
depth := 1
|
||||
Loop:
|
||||
for {
|
||||
c := b.readByte()
|
||||
switch c {
|
||||
default:
|
||||
tmp = append(tmp, c)
|
||||
case '(':
|
||||
depth++
|
||||
tmp = append(tmp, c)
|
||||
case ')':
|
||||
if depth--; depth == 0 {
|
||||
break Loop
|
||||
}
|
||||
tmp = append(tmp, c)
|
||||
case '\\':
|
||||
switch c = b.readByte(); c {
|
||||
default:
|
||||
b.errorf("invalid escape sequence \\%c", c)
|
||||
tmp = append(tmp, '\\', c)
|
||||
case 'n':
|
||||
tmp = append(tmp, '\n')
|
||||
case 'r':
|
||||
tmp = append(tmp, '\r')
|
||||
case 'b':
|
||||
tmp = append(tmp, '\b')
|
||||
case 't':
|
||||
tmp = append(tmp, '\t')
|
||||
case 'f':
|
||||
tmp = append(tmp, '\f')
|
||||
case '(', ')', '\\':
|
||||
tmp = append(tmp, c)
|
||||
case '\r':
|
||||
if b.readByte() != '\n' {
|
||||
b.unreadByte()
|
||||
}
|
||||
fallthrough
|
||||
case '\n':
|
||||
// no append
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
x := int(c - '0')
|
||||
for i := 0; i < 2; i++ {
|
||||
c = b.readByte()
|
||||
if c < '0' || c > '7' {
|
||||
b.unreadByte()
|
||||
break
|
||||
}
|
||||
x = x*8 + int(c-'0')
|
||||
}
|
||||
if x > 255 {
|
||||
b.errorf("invalid octal escape \\%03o", x)
|
||||
}
|
||||
tmp = append(tmp, byte(x))
|
||||
}
|
||||
}
|
||||
}
|
||||
b.tmp = tmp
|
||||
return string(tmp)
|
||||
}
|
||||
|
||||
func (b *buffer) readName() token {
|
||||
tmp := b.tmp[:0]
|
||||
for {
|
||||
c := b.readByte()
|
||||
if isDelim(c) || isSpace(c) {
|
||||
b.unreadByte()
|
||||
break
|
||||
}
|
||||
if c == '#' {
|
||||
x := unhex(b.readByte())<<4 | unhex(b.readByte())
|
||||
if x < 0 {
|
||||
b.errorf("malformed name")
|
||||
}
|
||||
tmp = append(tmp, byte(x))
|
||||
continue
|
||||
}
|
||||
tmp = append(tmp, c)
|
||||
}
|
||||
b.tmp = tmp
|
||||
return name(string(tmp))
|
||||
}
|
||||
|
||||
func (b *buffer) readKeyword() token {
|
||||
tmp := b.tmp[:0]
|
||||
for {
|
||||
c := b.readByte()
|
||||
if isDelim(c) || isSpace(c) {
|
||||
b.unreadByte()
|
||||
break
|
||||
}
|
||||
tmp = append(tmp, c)
|
||||
}
|
||||
b.tmp = tmp
|
||||
s := string(tmp)
|
||||
switch {
|
||||
case s == "true":
|
||||
return true
|
||||
case s == "false":
|
||||
return false
|
||||
case isInteger(s):
|
||||
x, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
b.errorf("invalid integer %s", s)
|
||||
}
|
||||
return x
|
||||
case isReal(s):
|
||||
x, err := strconv.ParseFloat(s, 64)
|
||||
if err != nil {
|
||||
b.errorf("invalid real %s", s)
|
||||
}
|
||||
return x
|
||||
}
|
||||
return keyword(string(tmp))
|
||||
}
|
||||
|
||||
func isInteger(s string) bool {
|
||||
if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
|
||||
s = s[1:]
|
||||
}
|
||||
if len(s) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, c := range s {
|
||||
if c < '0' || '9' < c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isReal(s string) bool {
|
||||
if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
|
||||
s = s[1:]
|
||||
}
|
||||
if len(s) == 0 {
|
||||
return false
|
||||
}
|
||||
ndot := 0
|
||||
for _, c := range s {
|
||||
if c == '.' {
|
||||
ndot++
|
||||
continue
|
||||
}
|
||||
if c < '0' || '9' < c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return ndot == 1
|
||||
}
|
||||
|
||||
// An object is a PDF syntax object, one of the following Go types:
|
||||
//
|
||||
// bool, a PDF boolean
|
||||
// int64, a PDF integer
|
||||
// float64, a PDF real
|
||||
// string, a PDF string literal
|
||||
// name, a PDF name without the leading slash
|
||||
// dict, a PDF dictionary
|
||||
// array, a PDF array
|
||||
// stream, a PDF stream
|
||||
// objptr, a PDF object reference
|
||||
// objdef, a PDF object definition
|
||||
//
|
||||
// An object may also be nil, to represent the PDF null.
|
||||
type object interface{}
|
||||
|
||||
type dict map[name]object
|
||||
|
||||
type array []object
|
||||
|
||||
type stream struct {
|
||||
hdr dict
|
||||
ptr objptr
|
||||
offset int64
|
||||
}
|
||||
|
||||
type objptr struct {
|
||||
id uint32
|
||||
gen uint16
|
||||
}
|
||||
|
||||
type objdef struct {
|
||||
ptr objptr
|
||||
obj object
|
||||
}
|
||||
|
||||
func (b *buffer) readObject() object {
|
||||
tok := b.readToken()
|
||||
if kw, ok := tok.(keyword); ok {
|
||||
switch kw {
|
||||
case "null":
|
||||
return nil
|
||||
case "<<":
|
||||
return b.readDict()
|
||||
case "[":
|
||||
return b.readArray()
|
||||
}
|
||||
b.errorf("unexpected keyword %q parsing object", kw)
|
||||
return nil
|
||||
}
|
||||
|
||||
if str, ok := tok.(string); ok && b.key != nil && b.objptr.id != 0 {
|
||||
tok = decryptString(b.key, b.useAES, b.objptr, str)
|
||||
}
|
||||
|
||||
if !b.allowObjptr {
|
||||
return tok
|
||||
}
|
||||
|
||||
if t1, ok := tok.(int64); ok && int64(uint32(t1)) == t1 {
|
||||
tok2 := b.readToken()
|
||||
if t2, ok := tok2.(int64); ok && int64(uint16(t2)) == t2 {
|
||||
tok3 := b.readToken()
|
||||
switch tok3 {
|
||||
case keyword("R"):
|
||||
return objptr{uint32(t1), uint16(t2)}
|
||||
case keyword("obj"):
|
||||
old := b.objptr
|
||||
b.objptr = objptr{uint32(t1), uint16(t2)}
|
||||
obj := b.readObject()
|
||||
if _, ok := obj.(stream); !ok {
|
||||
tok4 := b.readToken()
|
||||
if tok4 != keyword("endobj") {
|
||||
b.errorf("missing endobj after indirect object definition")
|
||||
b.unreadToken(tok4)
|
||||
}
|
||||
}
|
||||
b.objptr = old
|
||||
return objdef{objptr{uint32(t1), uint16(t2)}, obj}
|
||||
}
|
||||
b.unreadToken(tok3)
|
||||
}
|
||||
b.unreadToken(tok2)
|
||||
}
|
||||
return tok
|
||||
}
|
||||
|
||||
func (b *buffer) readArray() object {
|
||||
var x array
|
||||
for {
|
||||
tok := b.readToken()
|
||||
if tok == nil || tok == keyword("]") {
|
||||
break
|
||||
}
|
||||
b.unreadToken(tok)
|
||||
x = append(x, b.readObject())
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func (b *buffer) readDict() object {
|
||||
x := make(dict)
|
||||
for {
|
||||
tok := b.readToken()
|
||||
if tok == nil || tok == keyword(">>") {
|
||||
break
|
||||
}
|
||||
n, ok := tok.(name)
|
||||
if !ok {
|
||||
b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok)
|
||||
continue
|
||||
}
|
||||
x[n] = b.readObject()
|
||||
}
|
||||
|
||||
if !b.allowStream {
|
||||
return x
|
||||
}
|
||||
|
||||
tok := b.readToken()
|
||||
if tok != keyword("stream") {
|
||||
b.unreadToken(tok)
|
||||
return x
|
||||
}
|
||||
|
||||
switch b.readByte() {
|
||||
case '\r':
|
||||
if b.readByte() != '\n' {
|
||||
b.unreadByte()
|
||||
}
|
||||
case '\n':
|
||||
// ok
|
||||
default:
|
||||
b.errorf("stream keyword not followed by newline")
|
||||
}
|
||||
|
||||
return stream{x, b.objptr, b.readOffset()}
|
||||
}
|
||||
|
||||
func isSpace(b byte) bool {
|
||||
switch b {
|
||||
case '\x00', '\t', '\n', '\f', '\r', ' ':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isDelim(b byte) bool {
|
||||
switch b {
|
||||
case '<', '>', '(', ')', '[', ']', '{', '}', '/', '%':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,666 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pdf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Page represent a single page in a PDF file.
|
||||
// The methods interpret a Page dictionary stored in V.
|
||||
type Page struct {
|
||||
V Value
|
||||
}
|
||||
|
||||
// Page returns the page for the given page number.
|
||||
// Page numbers are indexed starting at 1, not 0.
|
||||
// If the page is not found, Page returns a Page with p.V.IsNull().
|
||||
func (r *Reader) Page(num int) Page {
|
||||
num-- // now 0-indexed
|
||||
page := r.Trailer().Key("Root").Key("Pages")
|
||||
Search:
|
||||
for page.Key("Type").Name() == "Pages" {
|
||||
count := int(page.Key("Count").Int64())
|
||||
if count < num {
|
||||
return Page{}
|
||||
}
|
||||
kids := page.Key("Kids")
|
||||
for i := 0; i < kids.Len(); i++ {
|
||||
kid := kids.Index(i)
|
||||
if kid.Key("Type").Name() == "Pages" {
|
||||
c := int(kid.Key("Count").Int64())
|
||||
if num < c {
|
||||
page = kid
|
||||
continue Search
|
||||
}
|
||||
num -= c
|
||||
continue
|
||||
}
|
||||
if kid.Key("Type").Name() == "Page" {
|
||||
if num == 0 {
|
||||
return Page{kid}
|
||||
}
|
||||
num--
|
||||
}
|
||||
}
|
||||
}
|
||||
return Page{}
|
||||
}
|
||||
|
||||
// NumPage returns the number of pages in the PDF file.
|
||||
func (r *Reader) NumPage() int {
|
||||
return int(r.Trailer().Key("Root").Key("Pages").Key("Count").Int64())
|
||||
}
|
||||
|
||||
func (p Page) findInherited(key string) Value {
|
||||
for v := p.V; !v.IsNull(); v = v.Key("Parent") {
|
||||
if r := v.Key(key); !r.IsNull() {
|
||||
return r
|
||||
}
|
||||
}
|
||||
return Value{}
|
||||
}
|
||||
|
||||
/*
|
||||
func (p Page) MediaBox() Value {
|
||||
return p.findInherited("MediaBox")
|
||||
}
|
||||
|
||||
func (p Page) CropBox() Value {
|
||||
return p.findInherited("CropBox")
|
||||
}
|
||||
*/
|
||||
|
||||
// Resources returns the resources dictionary associated with the page.
|
||||
func (p Page) Resources() Value {
|
||||
return p.findInherited("Resources")
|
||||
}
|
||||
|
||||
// Fonts returns a list of the fonts associated with the page.
|
||||
func (p Page) Fonts() []string {
|
||||
return p.Resources().Key("Font").Keys()
|
||||
}
|
||||
|
||||
// Font returns the font with the given name associated with the page.
|
||||
func (p Page) Font(name string) Font {
|
||||
return Font{p.Resources().Key("Font").Key(name)}
|
||||
}
|
||||
|
||||
// A Font represent a font in a PDF file.
|
||||
// The methods interpret a Font dictionary stored in V.
|
||||
type Font struct {
|
||||
V Value
|
||||
}
|
||||
|
||||
// BaseFont returns the font's name (BaseFont property).
|
||||
func (f Font) BaseFont() string {
|
||||
return f.V.Key("BaseFont").Name()
|
||||
}
|
||||
|
||||
// FirstChar returns the code point of the first character in the font.
|
||||
func (f Font) FirstChar() int {
|
||||
return int(f.V.Key("FirstChar").Int64())
|
||||
}
|
||||
|
||||
// LastChar returns the code point of the last character in the font.
|
||||
func (f Font) LastChar() int {
|
||||
return int(f.V.Key("LastChar").Int64())
|
||||
}
|
||||
|
||||
// Widths returns the widths of the glyphs in the font.
|
||||
// In a well-formed PDF, len(f.Widths()) == f.LastChar()+1 - f.FirstChar().
|
||||
func (f Font) Widths() []float64 {
|
||||
x := f.V.Key("Widths")
|
||||
var out []float64
|
||||
for i := 0; i < x.Len(); i++ {
|
||||
out = append(out, x.Index(i).Float64())
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Width returns the width of the given code point.
|
||||
func (f Font) Width(code int) float64 {
|
||||
first := f.FirstChar()
|
||||
last := f.LastChar()
|
||||
if code < first || last < code {
|
||||
return 0
|
||||
}
|
||||
return f.V.Key("Widths").Index(code - first).Float64()
|
||||
}
|
||||
|
||||
// Encoder returns the encoding between font code point sequences and UTF-8.
|
||||
func (f Font) Encoder() TextEncoding {
|
||||
enc := f.V.Key("Encoding")
|
||||
switch enc.Kind() {
|
||||
case Name:
|
||||
switch enc.Name() {
|
||||
case "WinAnsiEncoding":
|
||||
return &byteEncoder{&winAnsiEncoding}
|
||||
case "MacRomanEncoding":
|
||||
return &byteEncoder{&macRomanEncoding}
|
||||
case "Identity-H":
|
||||
// TODO: Should be big-endian UCS-2 decoder
|
||||
return &nopEncoder{}
|
||||
default:
|
||||
println("unknown encoding", enc.Name())
|
||||
return &nopEncoder{}
|
||||
}
|
||||
case Dict:
|
||||
return &dictEncoder{enc.Key("Differences")}
|
||||
case Null:
|
||||
// ok, try ToUnicode
|
||||
default:
|
||||
println("unexpected encoding", enc.String())
|
||||
return &nopEncoder{}
|
||||
}
|
||||
|
||||
toUnicode := f.V.Key("ToUnicode")
|
||||
if toUnicode.Kind() == Dict {
|
||||
m := readCmap(toUnicode)
|
||||
if m == nil {
|
||||
return &nopEncoder{}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
return &byteEncoder{&pdfDocEncoding}
|
||||
}
|
||||
|
||||
type dictEncoder struct {
|
||||
v Value
|
||||
}
|
||||
|
||||
func (e *dictEncoder) Decode(raw string) (text string) {
|
||||
r := make([]rune, 0, len(raw))
|
||||
for i := 0; i < len(raw); i++ {
|
||||
ch := rune(raw[i])
|
||||
n := -1
|
||||
for j := 0; j < e.v.Len(); j++ {
|
||||
x := e.v.Index(j)
|
||||
if x.Kind() == Integer {
|
||||
n = int(x.Int64())
|
||||
continue
|
||||
}
|
||||
if x.Kind() == Name {
|
||||
if int(raw[i]) == n {
|
||||
r := nameToRune[x.Name()]
|
||||
if r != 0 {
|
||||
ch = r
|
||||
break
|
||||
}
|
||||
}
|
||||
n++
|
||||
}
|
||||
}
|
||||
r = append(r, ch)
|
||||
}
|
||||
return string(r)
|
||||
}
|
||||
|
||||
// A TextEncoding represents a mapping between
|
||||
// font code points and UTF-8 text.
|
||||
type TextEncoding interface {
|
||||
// Decode returns the UTF-8 text corresponding to
|
||||
// the sequence of code points in raw.
|
||||
Decode(raw string) (text string)
|
||||
}
|
||||
|
||||
type nopEncoder struct {
|
||||
}
|
||||
|
||||
func (e *nopEncoder) Decode(raw string) (text string) {
|
||||
return raw
|
||||
}
|
||||
|
||||
type byteEncoder struct {
|
||||
table *[256]rune
|
||||
}
|
||||
|
||||
func (e *byteEncoder) Decode(raw string) (text string) {
|
||||
r := make([]rune, 0, len(raw))
|
||||
for i := 0; i < len(raw); i++ {
|
||||
r = append(r, e.table[raw[i]])
|
||||
}
|
||||
return string(r)
|
||||
}
|
||||
|
||||
type cmap struct {
|
||||
space [4][][2]string
|
||||
bfrange []bfrange
|
||||
}
|
||||
|
||||
func (m *cmap) Decode(raw string) (text string) {
|
||||
var r []rune
|
||||
Parse:
|
||||
for len(raw) > 0 {
|
||||
for n := 1; n <= 4 && n <= len(raw); n++ {
|
||||
for _, space := range m.space[n-1] {
|
||||
if space[0] <= raw[:n] && raw[:n] <= space[1] {
|
||||
text := raw[:n]
|
||||
raw = raw[n:]
|
||||
for _, bf := range m.bfrange {
|
||||
if len(bf.lo) == n && bf.lo <= text && text <= bf.hi {
|
||||
if bf.dst.Kind() == String {
|
||||
s := bf.dst.RawString()
|
||||
if bf.lo != text {
|
||||
b := []byte(s)
|
||||
b[len(b)-1] += text[len(text)-1] - bf.lo[len(bf.lo)-1]
|
||||
s = string(b)
|
||||
}
|
||||
r = append(r, []rune(utf16Decode(s))...)
|
||||
continue Parse
|
||||
}
|
||||
if bf.dst.Kind() == Array {
|
||||
fmt.Printf("array %v\n", bf.dst)
|
||||
} else {
|
||||
fmt.Printf("unknown dst %v\n", bf.dst)
|
||||
}
|
||||
r = append(r, noRune)
|
||||
continue Parse
|
||||
}
|
||||
}
|
||||
fmt.Printf("no text for %q", text)
|
||||
r = append(r, noRune)
|
||||
continue Parse
|
||||
}
|
||||
}
|
||||
}
|
||||
println("no code space found")
|
||||
r = append(r, noRune)
|
||||
raw = raw[1:]
|
||||
}
|
||||
return string(r)
|
||||
}
|
||||
|
||||
type bfrange struct {
|
||||
lo string
|
||||
hi string
|
||||
dst Value
|
||||
}
|
||||
|
||||
func readCmap(toUnicode Value) *cmap {
|
||||
n := -1
|
||||
var m cmap
|
||||
ok := true
|
||||
Interpret(toUnicode, func(stk *Stack, op string) {
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
switch op {
|
||||
case "findresource":
|
||||
category := stk.Pop()
|
||||
key := stk.Pop()
|
||||
fmt.Println("findresource", key, category)
|
||||
stk.Push(newDict())
|
||||
case "begincmap":
|
||||
stk.Push(newDict())
|
||||
case "endcmap":
|
||||
stk.Pop()
|
||||
case "begincodespacerange":
|
||||
n = int(stk.Pop().Int64())
|
||||
case "endcodespacerange":
|
||||
if n < 0 {
|
||||
println("missing begincodespacerange")
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
hi, lo := stk.Pop().RawString(), stk.Pop().RawString()
|
||||
if len(lo) == 0 || len(lo) != len(hi) {
|
||||
println("bad codespace range")
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
m.space[len(lo)-1] = append(m.space[len(lo)-1], [2]string{lo, hi})
|
||||
}
|
||||
n = -1
|
||||
case "beginbfrange":
|
||||
n = int(stk.Pop().Int64())
|
||||
case "endbfrange":
|
||||
if n < 0 {
|
||||
panic("missing beginbfrange")
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
dst, srcHi, srcLo := stk.Pop(), stk.Pop().RawString(), stk.Pop().RawString()
|
||||
m.bfrange = append(m.bfrange, bfrange{srcLo, srcHi, dst})
|
||||
}
|
||||
case "defineresource":
|
||||
category := stk.Pop().Name()
|
||||
value := stk.Pop()
|
||||
key := stk.Pop().Name()
|
||||
fmt.Println("defineresource", key, value, category)
|
||||
stk.Push(value)
|
||||
default:
|
||||
println("interp\t", op)
|
||||
}
|
||||
})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return &m
|
||||
}
|
||||
|
||||
type matrix [3][3]float64
|
||||
|
||||
var ident = matrix{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}
|
||||
|
||||
func (x matrix) mul(y matrix) matrix {
|
||||
var z matrix
|
||||
for i := 0; i < 3; i++ {
|
||||
for j := 0; j < 3; j++ {
|
||||
for k := 0; k < 3; k++ {
|
||||
z[i][j] += x[i][k] * y[k][j]
|
||||
}
|
||||
}
|
||||
}
|
||||
return z
|
||||
}
|
||||
|
||||
// A Text represents a single piece of text drawn on a page.
|
||||
type Text struct {
|
||||
Font string // the font used
|
||||
FontSize float64 // the font size, in points (1/72 of an inch)
|
||||
X float64 // the X coordinate, in points, increasing left to right
|
||||
Y float64 // the Y coordinate, in points, increasing bottom to top
|
||||
W float64 // the width of the text, in points
|
||||
S string // the actual UTF-8 text
|
||||
}
|
||||
|
||||
// A Rect represents a rectangle.
|
||||
type Rect struct {
|
||||
Min, Max Point
|
||||
}
|
||||
|
||||
// A Point represents an X, Y pair.
|
||||
type Point struct {
|
||||
X float64
|
||||
Y float64
|
||||
}
|
||||
|
||||
// Content describes the basic content on a page: the text and any drawn rectangles.
|
||||
type Content struct {
|
||||
Text []Text
|
||||
Rect []Rect
|
||||
}
|
||||
|
||||
type gstate struct {
|
||||
Tc float64
|
||||
Tw float64
|
||||
Th float64
|
||||
Tl float64
|
||||
Tf Font
|
||||
Tfs float64
|
||||
Tmode int
|
||||
Trise float64
|
||||
Tm matrix
|
||||
Tlm matrix
|
||||
Trm matrix
|
||||
CTM matrix
|
||||
}
|
||||
|
||||
// Content returns the page's content.
|
||||
func (p Page) Content() Content {
|
||||
strm := p.V.Key("Contents")
|
||||
var enc TextEncoding = &nopEncoder{}
|
||||
|
||||
var g = gstate{
|
||||
Th: 1,
|
||||
CTM: ident,
|
||||
}
|
||||
|
||||
var text []Text
|
||||
showText := func(s string) {
|
||||
n := 0
|
||||
for _, ch := range enc.Decode(s) {
|
||||
Trm := matrix{{g.Tfs * g.Th, 0, 0}, {0, g.Tfs, 0}, {0, g.Trise, 1}}.mul(g.Tm).mul(g.CTM)
|
||||
w0 := g.Tf.Width(int(s[n]))
|
||||
n++
|
||||
if ch != ' ' {
|
||||
f := g.Tf.BaseFont()
|
||||
if i := strings.Index(f, "+"); i >= 0 {
|
||||
f = f[i+1:]
|
||||
}
|
||||
text = append(text, Text{f, Trm[0][0], Trm[2][0], Trm[2][1], w0 / 1000 * Trm[0][0], string(ch)})
|
||||
}
|
||||
tx := w0/1000*g.Tfs + g.Tc
|
||||
if ch == ' ' {
|
||||
tx += g.Tw
|
||||
}
|
||||
tx *= g.Th
|
||||
g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm)
|
||||
}
|
||||
}
|
||||
|
||||
var rect []Rect
|
||||
var gstack []gstate
|
||||
Interpret(strm, func(stk *Stack, op string) {
|
||||
n := stk.Len()
|
||||
args := make([]Value, n)
|
||||
for i := n - 1; i >= 0; i-- {
|
||||
args[i] = stk.Pop()
|
||||
}
|
||||
switch op {
|
||||
default:
|
||||
//fmt.Println(op, args)
|
||||
return
|
||||
|
||||
case "cm": // update g.CTM
|
||||
if len(args) != 6 {
|
||||
panic("bad g.Tm")
|
||||
}
|
||||
var m matrix
|
||||
for i := 0; i < 6; i++ {
|
||||
m[i/2][i%2] = args[i].Float64()
|
||||
}
|
||||
m[2][2] = 1
|
||||
g.CTM = m.mul(g.CTM)
|
||||
|
||||
case "gs": // set parameters from graphics state resource
|
||||
gs := p.Resources().Key("ExtGState").Key(args[0].Name())
|
||||
font := gs.Key("Font")
|
||||
if font.Kind() == Array && font.Len() == 2 {
|
||||
//fmt.Println("FONT", font)
|
||||
}
|
||||
|
||||
case "f": // fill
|
||||
case "g": // setgray
|
||||
case "l": // lineto
|
||||
case "m": // moveto
|
||||
|
||||
case "cs": // set colorspace non-stroking
|
||||
case "scn": // set color non-stroking
|
||||
|
||||
case "re": // append rectangle to path
|
||||
if len(args) != 4 {
|
||||
panic("bad re")
|
||||
}
|
||||
x, y, w, h := args[0].Float64(), args[1].Float64(), args[2].Float64(), args[3].Float64()
|
||||
rect = append(rect, Rect{Point{x, y}, Point{x + w, y + h}})
|
||||
|
||||
case "q": // save graphics state
|
||||
gstack = append(gstack, g)
|
||||
|
||||
case "Q": // restore graphics state
|
||||
n := len(gstack) - 1
|
||||
g = gstack[n]
|
||||
gstack = gstack[:n]
|
||||
|
||||
case "BT": // begin text (reset text matrix and line matrix)
|
||||
g.Tm = ident
|
||||
g.Tlm = g.Tm
|
||||
|
||||
case "ET": // end text
|
||||
|
||||
case "T*": // move to start of next line
|
||||
x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}}
|
||||
g.Tlm = x.mul(g.Tlm)
|
||||
g.Tm = g.Tlm
|
||||
|
||||
case "Tc": // set character spacing
|
||||
if len(args) != 1 {
|
||||
panic("bad g.Tc")
|
||||
}
|
||||
g.Tc = args[0].Float64()
|
||||
|
||||
case "TD": // move text position and set leading
|
||||
if len(args) != 2 {
|
||||
panic("bad Td")
|
||||
}
|
||||
g.Tl = -args[1].Float64()
|
||||
fallthrough
|
||||
case "Td": // move text position
|
||||
if len(args) != 2 {
|
||||
panic("bad Td")
|
||||
}
|
||||
tx := args[0].Float64()
|
||||
ty := args[1].Float64()
|
||||
x := matrix{{1, 0, 0}, {0, 1, 0}, {tx, ty, 1}}
|
||||
g.Tlm = x.mul(g.Tlm)
|
||||
g.Tm = g.Tlm
|
||||
|
||||
case "Tf": // set text font and size
|
||||
if len(args) != 2 {
|
||||
panic("bad TL")
|
||||
}
|
||||
f := args[0].Name()
|
||||
g.Tf = p.Font(f)
|
||||
enc = g.Tf.Encoder()
|
||||
if enc == nil {
|
||||
println("no cmap for", f)
|
||||
enc = &nopEncoder{}
|
||||
}
|
||||
g.Tfs = args[1].Float64()
|
||||
|
||||
case "\"": // set spacing, move to next line, and show text
|
||||
if len(args) != 3 {
|
||||
panic("bad \" operator")
|
||||
}
|
||||
g.Tw = args[0].Float64()
|
||||
g.Tc = args[1].Float64()
|
||||
args = args[2:]
|
||||
fallthrough
|
||||
case "'": // move to next line and show text
|
||||
if len(args) != 1 {
|
||||
panic("bad ' operator")
|
||||
}
|
||||
x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}}
|
||||
g.Tlm = x.mul(g.Tlm)
|
||||
g.Tm = g.Tlm
|
||||
fallthrough
|
||||
case "Tj": // show text
|
||||
if len(args) != 1 {
|
||||
panic("bad Tj operator")
|
||||
}
|
||||
showText(args[0].RawString())
|
||||
|
||||
case "TJ": // show text, allowing individual glyph positioning
|
||||
v := args[0]
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
x := v.Index(i)
|
||||
if x.Kind() == String {
|
||||
showText(x.RawString())
|
||||
} else {
|
||||
tx := -x.Float64() / 1000 * g.Tfs * g.Th
|
||||
g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm)
|
||||
}
|
||||
}
|
||||
|
||||
case "TL": // set text leading
|
||||
if len(args) != 1 {
|
||||
panic("bad TL")
|
||||
}
|
||||
g.Tl = args[0].Float64()
|
||||
|
||||
case "Tm": // set text matrix and line matrix
|
||||
if len(args) != 6 {
|
||||
panic("bad g.Tm")
|
||||
}
|
||||
var m matrix
|
||||
for i := 0; i < 6; i++ {
|
||||
m[i/2][i%2] = args[i].Float64()
|
||||
}
|
||||
m[2][2] = 1
|
||||
g.Tm = m
|
||||
g.Tlm = m
|
||||
|
||||
case "Tr": // set text rendering mode
|
||||
if len(args) != 1 {
|
||||
panic("bad Tr")
|
||||
}
|
||||
g.Tmode = int(args[0].Int64())
|
||||
|
||||
case "Ts": // set text rise
|
||||
if len(args) != 1 {
|
||||
panic("bad Ts")
|
||||
}
|
||||
g.Trise = args[0].Float64()
|
||||
|
||||
case "Tw": // set word spacing
|
||||
if len(args) != 1 {
|
||||
panic("bad g.Tw")
|
||||
}
|
||||
g.Tw = args[0].Float64()
|
||||
|
||||
case "Tz": // set horizontal text scaling
|
||||
if len(args) != 1 {
|
||||
panic("bad Tz")
|
||||
}
|
||||
g.Th = args[0].Float64() / 100
|
||||
}
|
||||
})
|
||||
return Content{text, rect}
|
||||
}
|
||||
|
||||
// TextVertical implements sort.Interface for sorting
|
||||
// a slice of Text values in vertical order, top to bottom,
|
||||
// and then left to right within a line.
|
||||
type TextVertical []Text
|
||||
|
||||
func (x TextVertical) Len() int { return len(x) }
|
||||
func (x TextVertical) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x TextVertical) Less(i, j int) bool {
|
||||
if x[i].Y != x[j].Y {
|
||||
return x[i].Y > x[j].Y
|
||||
}
|
||||
return x[i].X < x[j].X
|
||||
}
|
||||
|
||||
// TextVertical implements sort.Interface for sorting
|
||||
// a slice of Text values in horizontal order, left to right,
|
||||
// and then top to bottom within a column.
|
||||
type TextHorizontal []Text
|
||||
|
||||
func (x TextHorizontal) Len() int { return len(x) }
|
||||
func (x TextHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x TextHorizontal) Less(i, j int) bool {
|
||||
if x[i].X != x[j].X {
|
||||
return x[i].X < x[j].X
|
||||
}
|
||||
return x[i].Y > x[j].Y
|
||||
}
|
||||
|
||||
// An Outline is a tree describing the outline (also known as the table of contents)
|
||||
// of a document.
|
||||
type Outline struct {
|
||||
Title string // title for this element
|
||||
Child []Outline // child elements
|
||||
}
|
||||
|
||||
// Outline returns the document outline.
|
||||
// The Outline returned is the root of the outline tree and typically has no Title itself.
|
||||
// That is, the children of the returned root are the top-level entries in the outline.
|
||||
func (r *Reader) Outline() Outline {
|
||||
return buildOutline(r.Trailer().Key("Root").Key("Outlines"))
|
||||
}
|
||||
|
||||
func buildOutline(entry Value) Outline {
|
||||
var x Outline
|
||||
x.Title = entry.Key("Title").Text()
|
||||
for child := entry.Key("First"); child.Kind() == Dict; child = child.Key("Next") {
|
||||
x.Child = append(x.Child, buildOutline(child))
|
||||
}
|
||||
return x
|
||||
}
|
|
@ -0,0 +1,138 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pdf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// A Stack represents a stack of values.
|
||||
type Stack struct {
|
||||
stack []Value
|
||||
}
|
||||
|
||||
func (stk *Stack) Len() int {
|
||||
return len(stk.stack)
|
||||
}
|
||||
|
||||
func (stk *Stack) Push(v Value) {
|
||||
stk.stack = append(stk.stack, v)
|
||||
}
|
||||
|
||||
func (stk *Stack) Pop() Value {
|
||||
n := len(stk.stack)
|
||||
if n == 0 {
|
||||
return Value{}
|
||||
}
|
||||
v := stk.stack[n-1]
|
||||
stk.stack[n-1] = Value{}
|
||||
stk.stack = stk.stack[:n-1]
|
||||
return v
|
||||
}
|
||||
|
||||
func newDict() Value {
|
||||
return Value{nil, objptr{}, make(dict)}
|
||||
}
|
||||
|
||||
// Interpret interprets the content in a stream as a basic PostScript program,
|
||||
// pushing values onto a stack and then calling the do function to execute
|
||||
// operators. The do function may push or pop values from the stack as needed
|
||||
// to implement op.
|
||||
//
|
||||
// Interpret handles the operators "dict", "currentdict", "begin", "end", "def", and "pop" itself.
|
||||
//
|
||||
// Interpret is not a full-blown PostScript interpreter. Its job is to handle the
|
||||
// very limited PostScript found in certain supporting file formats embedded
|
||||
// in PDF files, such as cmap files that describe the mapping from font code
|
||||
// points to Unicode code points.
|
||||
//
|
||||
// There is no support for executable blocks, among other limitations.
|
||||
//
|
||||
func Interpret(strm Value, do func(stk *Stack, op string)) {
|
||||
rd := strm.Reader()
|
||||
b := newBuffer(rd, 0)
|
||||
b.allowEOF = true
|
||||
b.allowObjptr = false
|
||||
b.allowStream = false
|
||||
var stk Stack
|
||||
var dicts []dict
|
||||
Reading:
|
||||
for {
|
||||
tok := b.readToken()
|
||||
if tok == io.EOF {
|
||||
break
|
||||
}
|
||||
if kw, ok := tok.(keyword); ok {
|
||||
switch kw {
|
||||
case "null", "[", "]", "<<", ">>":
|
||||
break
|
||||
default:
|
||||
for i := len(dicts) - 1; i >= 0; i-- {
|
||||
if v, ok := dicts[i][name(kw)]; ok {
|
||||
stk.Push(Value{nil, objptr{}, v})
|
||||
continue Reading
|
||||
}
|
||||
}
|
||||
do(&stk, string(kw))
|
||||
continue
|
||||
case "dict":
|
||||
stk.Pop()
|
||||
stk.Push(Value{nil, objptr{}, make(dict)})
|
||||
continue
|
||||
case "currentdict":
|
||||
if len(dicts) == 0 {
|
||||
panic("no current dictionary")
|
||||
}
|
||||
stk.Push(Value{nil, objptr{}, dicts[len(dicts)-1]})
|
||||
continue
|
||||
case "begin":
|
||||
d := stk.Pop()
|
||||
if d.Kind() != Dict {
|
||||
panic("cannot begin non-dict")
|
||||
}
|
||||
dicts = append(dicts, d.data.(dict))
|
||||
continue
|
||||
case "end":
|
||||
if len(dicts) <= 0 {
|
||||
panic("mismatched begin/end")
|
||||
}
|
||||
dicts = dicts[:len(dicts)-1]
|
||||
continue
|
||||
case "def":
|
||||
if len(dicts) <= 0 {
|
||||
panic("def without open dict")
|
||||
}
|
||||
val := stk.Pop()
|
||||
key, ok := stk.Pop().data.(name)
|
||||
if !ok {
|
||||
panic("def of non-name")
|
||||
}
|
||||
dicts[len(dicts)-1][key] = val.data
|
||||
continue
|
||||
case "pop":
|
||||
stk.Pop()
|
||||
continue
|
||||
}
|
||||
}
|
||||
b.unreadToken(tok)
|
||||
obj := b.readObject()
|
||||
stk.Push(Value{nil, objptr{}, obj})
|
||||
}
|
||||
}
|
||||
|
||||
type seqReader struct {
|
||||
rd io.Reader
|
||||
offset int64
|
||||
}
|
||||
|
||||
func (r *seqReader) ReadAt(buf []byte, offset int64) (int, error) {
|
||||
if offset != r.offset {
|
||||
return 0, fmt.Errorf("non-sequential read of stream")
|
||||
}
|
||||
n, err := io.ReadFull(r.rd, buf)
|
||||
r.offset += int64(n)
|
||||
return n, err
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,158 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pdf
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf16"
|
||||
)
|
||||
|
||||
const noRune = unicode.ReplacementChar
|
||||
|
||||
func isPDFDocEncoded(s string) bool {
|
||||
if isUTF16(s) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(s); i++ {
|
||||
if pdfDocEncoding[s[i]] == noRune {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func pdfDocDecode(s string) string {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] >= 0x80 || pdfDocEncoding[s[i]] != rune(s[i]) {
|
||||
goto Decode
|
||||
}
|
||||
}
|
||||
return s
|
||||
|
||||
Decode:
|
||||
r := make([]rune, len(s))
|
||||
for i := 0; i < len(s); i++ {
|
||||
r[i] = pdfDocEncoding[s[i]]
|
||||
}
|
||||
return string(r)
|
||||
}
|
||||
|
||||
func isUTF16(s string) bool {
|
||||
return len(s) >= 2 && s[0] == 0xfe && s[1] == 0xff && len(s)%2 == 0
|
||||
}
|
||||
|
||||
func utf16Decode(s string) string {
|
||||
var u []uint16
|
||||
for i := 0; i < len(s); i += 2 {
|
||||
u = append(u, uint16(s[i])<<8|uint16(s[i+1]))
|
||||
}
|
||||
return string(utf16.Decode(u))
|
||||
}
|
||||
|
||||
// See PDF 32000-1:2008, Table D.2
|
||||
var pdfDocEncoding = [256]rune{
|
||||
noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune,
|
||||
noRune, 0x0009, 0x000a, noRune, noRune, 0x000d, noRune, noRune,
|
||||
noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune,
|
||||
0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, noRune,
|
||||
0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
|
||||
0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
|
||||
0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160,
|
||||
0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, noRune,
|
||||
0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
|
||||
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, noRune, 0x00ae, 0x00af,
|
||||
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
|
||||
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
|
||||
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
|
||||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
|
||||
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
|
||||
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
|
||||
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
|
||||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
|
||||
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
|
||||
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
|
||||
}
|
||||
|
||||
var winAnsiEncoding = [256]rune{
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
|
||||
0x20ac, noRune, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
|
||||
0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, noRune, 0x017d, noRune,
|
||||
noRune, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
|
||||
0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, noRune, 0x017e, 0x0178,
|
||||
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
|
||||
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
|
||||
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
|
||||
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
|
||||
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
|
||||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
|
||||
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
|
||||
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
|
||||
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
|
||||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
|
||||
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
|
||||
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
|
||||
}
|
||||
|
||||
var macRomanEncoding = [256]rune{
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
|
||||
0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1,
|
||||
0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8,
|
||||
0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3,
|
||||
0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc,
|
||||
0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
|
||||
0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
|
||||
0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
|
||||
0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8,
|
||||
0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
|
||||
0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153,
|
||||
0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
|
||||
0x00ff, 0x0178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
|
||||
0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x00c2, 0x00ca, 0x00c1,
|
||||
0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4,
|
||||
0xf8ff, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc,
|
||||
0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7,
|
||||
}
|
Loading…
Reference in New Issue