spaCy/spacy/cfile.pyx

104 lines
3.5 KiB
Cython
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
2017-03-21 20:08:54 +00:00
from libc.stdio cimport fopen, fclose, fread, fwrite
from libc.string cimport memcpy
2015-07-22 23:10:36 +00:00
cdef class CFile:
2016-09-24 18:26:17 +00:00
def __init__(self, loc, mode, on_open_error=None):
2015-07-23 11:24:43 +00:00
if isinstance(mode, unicode):
2015-07-23 12:30:29 +00:00
mode_str = mode.encode('ascii')
2015-07-25 20:55:53 +00:00
else:
mode_str = mode
2016-09-24 20:01:46 +00:00
if hasattr(loc, 'as_posix'):
loc = loc.as_posix()
self.mem = Pool()
2015-07-22 23:10:36 +00:00
cdef bytes bytes_loc = loc.encode('utf8') if type(loc) == unicode else loc
2015-07-23 12:52:30 +00:00
self.fp = fopen(<char*>bytes_loc, mode_str)
2015-07-22 23:10:36 +00:00
if self.fp == NULL:
2016-09-24 18:26:17 +00:00
if on_open_error is not None:
on_open_error()
else:
raise IOError("Could not open binary file %s" % bytes_loc)
2015-07-22 23:10:36 +00:00
self.is_open = True
def __dealloc__(self):
if self.is_open:
fclose(self.fp)
def close(self):
fclose(self.fp)
self.is_open = False
cdef int read_into(self, void* dest, size_t number, size_t elem_size) except -1:
st = fread(dest, elem_size, number, self.fp)
if st != number:
raise IOError
cdef int write_from(self, void* src, size_t number, size_t elem_size) except -1:
st = fwrite(src, elem_size, number, self.fp)
if st != number:
raise IOError
cdef void* alloc_read(self, Pool mem, size_t number, size_t elem_size) except *:
cdef void* dest = mem.alloc(number, elem_size)
self.read_into(dest, number, elem_size)
return dest
def write_unicode(self, unicode value):
cdef bytes py_bytes = value.encode('utf8')
cdef char* chars = <char*>py_bytes
self.write(sizeof(char), len(py_bytes), chars)
cdef class StringCFile:
def __init__(self, bytes data, mode, on_open_error=None):
self.mem = Pool()
self.is_open = 1 if 'w' in mode else 0
self._capacity = max(len(data), 8)
self.size = len(data)
self.i = 0
self.data = <unsigned char*>self.mem.alloc(1, self._capacity)
for i in range(len(data)):
2017-03-07 19:58:55 +00:00
self.data[i] = data[i]
def __dealloc__(self):
# Important to override this -- or
# we try to close a non-existant file pointer!
pass
def close(self):
self.is_open = False
def string_data(self):
cdef bytes byte_string = b'\0' * (self.size)
bytes_ptr = <char*>byte_string
for i in range(self.size):
bytes_ptr[i] = self.data[i]
print(byte_string)
return byte_string
cdef int read_into(self, void* dest, size_t number, size_t elem_size) except -1:
if self.i+(number * elem_size) < self.size:
memcpy(dest, &self.data[self.i], elem_size * number)
self.i += elem_size * number
2017-03-07 19:58:55 +00:00
cdef int write_from(self, void* src, size_t elem_size, size_t number) except -1:
write_size = number * elem_size
if (self.size + write_size) >= self._capacity:
self._capacity = (self.size + write_size) * 2
self.data = <unsigned char*>self.mem.realloc(self.data, self._capacity)
memcpy(&self.data[self.size], src, write_size)
self.size += write_size
cdef void* alloc_read(self, Pool mem, size_t number, size_t elem_size) except *:
cdef void* dest = mem.alloc(number, elem_size)
self.read_into(dest, number, elem_size)
return dest
def write_unicode(self, unicode value):
cdef bytes py_bytes = value.encode('utf8')
cdef char* chars = <char*>py_bytes
self.write(sizeof(char), len(py_bytes), chars)