From 85bcc66bb492931b6ca3de21ca53ca53b754be33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Tue, 4 Sep 2007 09:18:06 +0000 Subject: [PATCH] Convert code from sys.stdin.encoding to UTF-8 in interactive mode. Fixes #1100. --- Include/parsetok.h | 3 ++- Include/pythonrun.h | 3 ++- Parser/parsetok.c | 9 +++++---- Parser/tokenizer.c | 36 +++++++++++++++++++++++++++++++++++- Parser/tokenizer.h | 3 ++- Python/import.c | 3 ++- Python/pythonrun.c | 29 ++++++++++++++++++++++------- 7 files changed, 70 insertions(+), 16 deletions(-) diff --git a/Include/parsetok.h b/Include/parsetok.h index 2b4ce1ea4ba..71033dc7047 100644 --- a/Include/parsetok.h +++ b/Include/parsetok.h @@ -34,7 +34,8 @@ PyAPI_FUNC(node *) PyParser_ParseFile (FILE *, const char *, grammar *, int, PyAPI_FUNC(node *) PyParser_ParseStringFlags(const char *, grammar *, int, perrdetail *, int); -PyAPI_FUNC(node *) PyParser_ParseFileFlags(FILE *, const char *, grammar *, +PyAPI_FUNC(node *) PyParser_ParseFileFlags(FILE *, const char *, + const char*, grammar *, int, char *, char *, perrdetail *, int); diff --git a/Include/pythonrun.h b/Include/pythonrun.h index 08278cf8fb5..607826ad199 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -40,7 +40,8 @@ PyAPI_FUNC(int) PyRun_InteractiveLoopFlags(FILE *, const char *, PyCompilerFlags PyAPI_FUNC(struct _mod *) PyParser_ASTFromString(const char *, const char *, int, PyCompilerFlags *flags, PyArena *); -PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, int, +PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, + const char*, int, char *, char *, PyCompilerFlags *, int *, PyArena *); diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 71bed299197..b9664ea3ea2 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -59,19 +59,20 @@ node * PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, char *ps1, char *ps2, perrdetail *err_ret) { - return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2, - err_ret, 0); + return PyParser_ParseFileFlags(fp, filename, NULL, + g, start, ps1, ps2, err_ret, 0); } node * -PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, +PyParser_ParseFileFlags(FILE *fp, const char *filename, const char* enc, + grammar *g, int start, char *ps1, char *ps2, perrdetail *err_ret, int flags) { struct tok_state *tok; initerr(err_ret, filename); - if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) { + if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) { err_ret->error = E_NOMEM; return NULL; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 776183d8058..7f51e143fa5 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -677,7 +677,7 @@ PyTokenizer_FromString(const char *str) /* Set up tokenizer for file */ struct tok_state * -PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2) +PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2) { struct tok_state *tok = tok_new(); if (tok == NULL) @@ -691,6 +691,17 @@ PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2) tok->fp = fp; tok->prompt = ps1; tok->nextprompt = ps2; + if (enc != NULL) { + /* Must copy encoding declaration since it + gets copied into the parse tree. */ + tok->encoding = PyMem_MALLOC(strlen(enc)+1); + if (!tok->encoding) { + PyTokenizer_Free(tok); + return NULL; + } + strcpy(tok->encoding, enc); + tok->decoding_state = -1; + } return tok; } @@ -742,6 +753,29 @@ tok_nextc(register struct tok_state *tok) } if (tok->prompt != NULL) { char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); +#ifndef PGEN + if (tok->encoding && newtok && *newtok) { + /* Recode to UTF-8 */ + Py_ssize_t buflen; + const char* buf; + PyObject *u = translate_into_utf8(newtok, tok->encoding); + PyMem_FREE(newtok); + if (!u) { + tok->done = E_DECODE; + return EOF; + } + buflen = PyBytes_Size(u); + buf = PyBytes_AsString(u); + if (!buf) { + Py_DECREF(u); + tok->done = E_DECODE; + return EOF; + } + newtok = PyMem_MALLOC(buflen+1); + strcpy(newtok, buf); + Py_DECREF(u); + } +#endif if (tok->nextprompt != NULL) tok->prompt = tok->nextprompt; if (newtok == NULL) diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 5e7ebf74f11..ba90a5f63c2 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -55,7 +55,8 @@ struct tok_state { }; extern struct tok_state *PyTokenizer_FromString(const char *); -extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *); +extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, + char *, char *); extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); diff --git a/Python/import.c b/Python/import.c index 72837836bc0..c2f42e995b8 100644 --- a/Python/import.c +++ b/Python/import.c @@ -809,7 +809,8 @@ parse_source_module(const char *pathname, FILE *fp) if (arena == NULL) return NULL; - mod = PyParser_ASTFromFile(fp, pathname, Py_file_input, 0, 0, 0, + mod = PyParser_ASTFromFile(fp, pathname, NULL, + Py_file_input, 0, 0, 0, NULL, arena); if (mod) { co = PyAST_Compile(mod, pathname, NULL, arena); diff --git a/Python/pythonrun.c b/Python/pythonrun.c index eeed820faaa..d89f5f95812 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -744,12 +744,22 @@ PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flag int PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) { - PyObject *m, *d, *v, *w; + PyObject *m, *d, *v, *w, *oenc = NULL; mod_ty mod; PyArena *arena; - char *ps1 = "", *ps2 = ""; + char *ps1 = "", *ps2 = "", *enc = NULL; int errcode = 0; + if (fp == stdin) { + /* Fetch encoding from sys.stdin */ + v = PySys_GetObject("stdin"); + if (!v) + return -1; + oenc = PyObject_GetAttrString(v, "encoding"); + if (!oenc) + return -1; + enc = PyUnicode_AsString(oenc); + } v = PySys_GetObject("ps1"); if (v != NULL) { v = PyObject_Str(v); @@ -770,13 +780,15 @@ PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags if (arena == NULL) { Py_XDECREF(v); Py_XDECREF(w); + Py_XDECREF(oenc); return -1; } - mod = PyParser_ASTFromFile(fp, filename, + mod = PyParser_ASTFromFile(fp, filename, enc, Py_single_input, ps1, ps2, flags, &errcode, arena); Py_XDECREF(v); Py_XDECREF(w); + Py_XDECREF(oenc); if (mod == NULL) { PyArena_Free(arena); if (errcode == E_EOF) { @@ -1254,7 +1266,7 @@ PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals, if (arena == NULL) return NULL; - mod = PyParser_ASTFromFile(fp, filename, start, 0, 0, + mod = PyParser_ASTFromFile(fp, filename, NULL, start, 0, 0, flags, NULL, arena); if (closeit) fclose(fp); @@ -1379,13 +1391,15 @@ PyParser_ASTFromString(const char *s, const char *filename, int start, } mod_ty -PyParser_ASTFromFile(FILE *fp, const char *filename, int start, char *ps1, +PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc, + int start, char *ps1, char *ps2, PyCompilerFlags *flags, int *errcode, PyArena *arena) { mod_ty mod; perrdetail err; - node *n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, + node *n = PyParser_ParseFileFlags(fp, filename, enc, + &_PyParser_Grammar, start, ps1, ps2, &err, PARSER_FLAGS(flags)); if (n) { mod = PyAST_FromNode(n, flags, filename, arena); @@ -1406,7 +1420,8 @@ node * PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int flags) { perrdetail err; - node *n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, + node *n = PyParser_ParseFileFlags(fp, filename, NULL, + &_PyParser_Grammar, start, NULL, NULL, &err, flags); if (n == NULL) err_input(&err);