fix single-byte codecs

they would eat the inband control codes
This commit is contained in:
ed 2018-01-16 20:05:36 +01:00
parent 9bf397bfcb
commit b84036a141
2 changed files with 32 additions and 15 deletions

View File

@ -184,37 +184,47 @@ class TelnetClient(VT100_Client):
while self.in_bytes:
len_at_start = len(self.in_bytes)
decode_until = len_at_start
# if the codec allows \xff as the 1st byte of a rune,
# make sure it doesn't consume it
if not self.inband_will_fail_decode:
ofs = self.in_bytes.find(b'\xff')
if ofs >= 0:
decode_until = ofs
try:
src = u'{0}'.format(self.in_bytes.decode(self.codec))
src = u'{0}'.format(self.in_bytes[:decode_until].decode(self.codec))
#print('got {0} no prob'.format(src))
#print('got {0} runes: {1}'.format(len(src),
# b2hex(src.encode('utf-8'))))
self.in_bytes = self.in_bytes[0:0]
self.in_bytes = self.in_bytes[decode_until:]
except UnicodeDecodeError as uee:
# first check whether the offending byte is an inband signal
if len(self.in_bytes) > uee.start and self.in_bytes[uee.start] == xff:
if decode_until > uee.start and self.in_bytes[uee.start] == xff:
# it is, keep the text before it
src = u'{0}'.format(self.in_bytes[:uee.start].decode(self.codec))
self.in_bytes = self.in_bytes[uee.start:]
elif len(self.in_bytes) < uee.start + 6 and self.codec != 'ascii':
elif decode_until < uee.start + 6 and not self.multibyte_codec:
print('need more data to parse unicode codepoint at {0} in {1} ...probably'.format(
uee.start, len(self.in_bytes)))
hexdump(self.in_bytes[-8:], 'XXX ')
uee.start, decode_until))
hexdump(self.in_bytes[decode_until-8:], 'XXX ')
return
else:
# it can't be helped
print('warning: unparseable data:')
print('warning: unparseable data before {0} in {1} total:'.format(
decode_until, len(self.in_bytes)))
hexdump(self.in_bytes, 'XXX ')
src = u'{0}'.format(self.in_bytes[:uee.start].decode(self.codec, 'backslashreplace'))
self.in_bytes = self.in_bytes[0:0] # todo: is this correct?
src = u'{0}'.format(self.in_bytes[:decode_until].decode(self.codec, 'backslashreplace'))
self.in_bytes = self.in_bytes[decode_until:]
#self.linebuf = self.linebuf[:self.linepos] + src + self.linebuf[self.linepos:]
#self.linepos += len(src)

View File

@ -103,7 +103,7 @@ class VT100_Client(asyncore.dispatcher):
self.echo_on = False # set true by buffy clients
self.vt100 = True # set nope by butty clients
self.slowmo_tx = SLOW_MOTION_TX
self.codec = 'utf-8'
self.set_codec('utf-8')
# outgoing data
self.outbox = Queue()
@ -187,6 +187,13 @@ class VT100_Client(asyncore.dispatcher):
def set_codec(self, codec_name):
multibyte = ['utf-8','shift_jis']
ff_illegal = ['utf-8','shift_jis']
self.codec = codec_name
self.multibyte_codec = self.codec in multibyte
self.inband_will_fail_decode = self.codec in ff_illegal
def handshake_timeout(self):
time.sleep(1)
self.handshake_sz = True
@ -1155,18 +1162,18 @@ class VT100_Client(asyncore.dispatcher):
self.linemode = True
self.echo_on = True
self.vt100 = False
self.codec = 'cp437'
self.set_codec('cp437')
self.wizard_stage = 'end'
# cheatcode: windows telnet + join
elif self.in_text.startswith('wtn'):
self.codec = 'cp437'
self.set_codec('cp437')
self.wizard_stage = 'end'
join_ch = self.in_text[3:]
# cheatcode: linux telnet + join
elif self.in_text.startswith('ltn'):
self.codec = 'utf-8'
self.set_codec('utf-8')
self.wizard_stage = 'end'
join_ch = self.in_text[3:]
@ -1352,7 +1359,7 @@ class VT100_Client(asyncore.dispatcher):
for n, letter in enumerate(AZ[:int(2+len(encs)/2)].lower()):
if letter in text:
self.wizard_stage = 'end'
self.codec = encs[n*2]
self.set_codec(encs[n*2])
break