mirror of https://github.com/python/cpython.git
Bytes literal.
This commit is contained in:
parent
cf297e46b8
commit
00e41defe8
|
@ -176,9 +176,9 @@ struct _stmt {
|
|||
enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4,
|
||||
IfExp_kind=5, Dict_kind=6, Set_kind=7, ListComp_kind=8,
|
||||
GeneratorExp_kind=9, Yield_kind=10, Compare_kind=11,
|
||||
Call_kind=12, Num_kind=13, Str_kind=14, Ellipsis_kind=15,
|
||||
Attribute_kind=16, Subscript_kind=17, Name_kind=18,
|
||||
List_kind=19, Tuple_kind=20};
|
||||
Call_kind=12, Num_kind=13, Str_kind=14, Bytes_kind=15,
|
||||
Ellipsis_kind=16, Attribute_kind=17, Subscript_kind=18,
|
||||
Name_kind=19, List_kind=20, Tuple_kind=21};
|
||||
struct _expr {
|
||||
enum _expr_kind kind;
|
||||
union {
|
||||
|
@ -254,6 +254,10 @@ struct _expr {
|
|||
string s;
|
||||
} Str;
|
||||
|
||||
struct {
|
||||
string s;
|
||||
} Bytes;
|
||||
|
||||
struct {
|
||||
expr_ty value;
|
||||
identifier attr;
|
||||
|
@ -465,6 +469,8 @@ expr_ty _Py_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty
|
|||
expr_ty _Py_Num(object n, int lineno, int col_offset, PyArena *arena);
|
||||
#define Str(a0, a1, a2, a3) _Py_Str(a0, a1, a2, a3)
|
||||
expr_ty _Py_Str(string s, int lineno, int col_offset, PyArena *arena);
|
||||
#define Bytes(a0, a1, a2, a3) _Py_Bytes(a0, a1, a2, a3)
|
||||
expr_ty _Py_Bytes(string s, int lineno, int col_offset, PyArena *arena);
|
||||
#define Ellipsis(a0, a1, a2) _Py_Ellipsis(a0, a1, a2)
|
||||
expr_ty _Py_Ellipsis(int lineno, int col_offset, PyArena *arena);
|
||||
#define Attribute(a0, a1, a2, a3, a4, a5) _Py_Attribute(a0, a1, a2, a3, a4, a5)
|
||||
|
|
|
@ -72,7 +72,7 @@ extern "C" {
|
|||
#define LOAD_LOCALS 82
|
||||
#define RETURN_VALUE 83
|
||||
#define IMPORT_STAR 84
|
||||
|
||||
#define MAKE_BYTES 85
|
||||
#define YIELD_VALUE 86
|
||||
#define POP_BLOCK 87
|
||||
#define END_FINALLY 88
|
||||
|
|
|
@ -267,6 +267,20 @@ def getChildNodes(self):
|
|||
def __repr__(self):
|
||||
return "Break()"
|
||||
|
||||
class Bytes(Node):
|
||||
def __init__(self, value, lineno=None):
|
||||
self.value = value
|
||||
self.lineno = lineno
|
||||
|
||||
def getChildren(self):
|
||||
return self.value,
|
||||
|
||||
def getChildNodes(self):
|
||||
return ()
|
||||
|
||||
def __repr__(self):
|
||||
return "Bytes(%s)" % (repr(self.value),)
|
||||
|
||||
class CallFunc(Node):
|
||||
def __init__(self, node, args, star_args = None, dstar_args = None, lineno=None):
|
||||
self.node = node
|
||||
|
|
|
@ -792,6 +792,7 @@ def findDepth(self, insts, debug=0):
|
|||
'DELETE_ATTR': -1,
|
||||
'STORE_GLOBAL': -1,
|
||||
'BUILD_MAP': 1,
|
||||
'MAKE_BYTES': 0,
|
||||
'COMPARE_OP': -1,
|
||||
'STORE_FAST': -1,
|
||||
'IMPORT_STAR': -1,
|
||||
|
|
|
@ -930,6 +930,10 @@ def visitDiscard(self, node):
|
|||
|
||||
def visitConst(self, node):
|
||||
self.emit('LOAD_CONST', node.value)
|
||||
|
||||
def visitBytes(self, node):
|
||||
self.emit('LOAD_CONST', node.value)
|
||||
self.emit('MAKE_BYTES')
|
||||
|
||||
def visitKeyword(self, node):
|
||||
self.emit('LOAD_CONST', node.name)
|
||||
|
|
|
@ -745,9 +745,11 @@ def decode_literal(self, lit):
|
|||
return eval(lit)
|
||||
|
||||
def atom_string(self, nodelist):
|
||||
k = ''
|
||||
for node in nodelist:
|
||||
k = self.decode_literal(nodelist[0][1])
|
||||
for node in nodelist[1:]:
|
||||
k += self.decode_literal(node[1])
|
||||
if isinstance(k, bytes):
|
||||
return Bytes(str(k), lineno=nodelist[0][2])
|
||||
return Const(k, lineno=nodelist[0][2])
|
||||
|
||||
def atom_ellipsis(self, nodelist):
|
||||
|
|
|
@ -111,6 +111,7 @@ def jabs_op(name, op):
|
|||
def_op('LOAD_LOCALS', 82)
|
||||
def_op('RETURN_VALUE', 83)
|
||||
def_op('IMPORT_STAR', 84)
|
||||
def_op('MAKE_BYTES', 85)
|
||||
def_op('YIELD_VALUE', 86)
|
||||
def_op('POP_BLOCK', 87)
|
||||
def_op('END_FINALLY', 88)
|
||||
|
|
|
@ -403,7 +403,19 @@ def test_join(self):
|
|||
self.assertEqual(bytes.join(tuple(lst)), bytes("abc"))
|
||||
self.assertEqual(bytes.join(iter(lst)), bytes("abc"))
|
||||
# XXX more...
|
||||
|
||||
|
||||
def test_literal(self):
|
||||
tests = [
|
||||
(b"Wonderful spam", u"Wonderful spam"),
|
||||
(br"Wonderful spam too", u"Wonderful spam too"),
|
||||
(b"\xaa\x00\000\200", u"\xaa\x00\000\200"),
|
||||
(br"\xaa\x00\000\200", ur"\xaa\x00\000\200"),
|
||||
]
|
||||
for b, s in tests:
|
||||
self.assertEqual(b, bytes(s, 'latin-1'))
|
||||
for c in range(128, 256):
|
||||
self.assertRaises(SyntaxError, eval,
|
||||
'b"%s"' % chr(c))
|
||||
|
||||
# Optimizations:
|
||||
# __iter__? (optimization)
|
||||
|
|
|
@ -187,6 +187,30 @@ def testWithAss(self):
|
|||
exec(c, dct)
|
||||
self.assertEquals(dct.get('result'), 1)
|
||||
|
||||
def testBytesLiteral(self):
|
||||
c = compiler.compile("b'foo'", '<string>', 'eval')
|
||||
b = eval(c)
|
||||
|
||||
c = compiler.compile('def f(b=b"foo"):\n'
|
||||
' b[0] += 1\n'
|
||||
' return b\n'
|
||||
'f(); f(); result = f()\n',
|
||||
'<string>',
|
||||
'exec')
|
||||
dct = {}
|
||||
exec(c, dct)
|
||||
self.assertEquals(dct.get('result'), b"ioo")
|
||||
|
||||
c = compiler.compile('def f():\n'
|
||||
' b = b"foo"\n'
|
||||
' b[0] += 1\n'
|
||||
' return b\n'
|
||||
'f(); f(); result = f()\n',
|
||||
'<string>',
|
||||
'exec')
|
||||
dct = {}
|
||||
exec(c, dct)
|
||||
self.assertEquals(dct.get('result'), b"goo")
|
||||
|
||||
NOLINENO = (compiler.ast.Module, compiler.ast.Stmt, compiler.ast.Discard)
|
||||
|
||||
|
|
|
@ -60,6 +60,7 @@ module Python version "$Revision$"
|
|||
expr? starargs, expr? kwargs)
|
||||
| Num(object n) -- a number as a PyObject.
|
||||
| Str(string s) -- need to specify raw, unicode, etc?
|
||||
| Bytes(string s)
|
||||
| Ellipsis
|
||||
-- other literals? bools?
|
||||
|
||||
|
|
|
@ -1244,6 +1244,14 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
|||
if (c == '"' || c == '\'')
|
||||
goto letter_quote;
|
||||
break;
|
||||
case 'b':
|
||||
case 'B':
|
||||
c = tok_nextc(tok);
|
||||
if (c == 'r' || c == 'R')
|
||||
c = tok_nextc(tok);
|
||||
if (c == '"' || c == '\'')
|
||||
goto letter_quote;
|
||||
break;
|
||||
}
|
||||
while (isalnum(c) || c == '_') {
|
||||
c = tok_nextc(tok);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
|
||||
/*
|
||||
__version__ 53731.
|
||||
__version__ 53866.
|
||||
|
||||
This module must be committed separately after each AST grammar change;
|
||||
The __version__ number is set to the revision number of the commit
|
||||
|
@ -216,6 +216,10 @@ static PyTypeObject *Str_type;
|
|||
static char *Str_fields[]={
|
||||
"s",
|
||||
};
|
||||
static PyTypeObject *Bytes_type;
|
||||
static char *Bytes_fields[]={
|
||||
"s",
|
||||
};
|
||||
static PyTypeObject *Ellipsis_type;
|
||||
static PyTypeObject *Attribute_type;
|
||||
static char *Attribute_fields[]={
|
||||
|
@ -547,6 +551,8 @@ static int init_types(void)
|
|||
if (!Num_type) return 0;
|
||||
Str_type = make_type("Str", expr_type, Str_fields, 1);
|
||||
if (!Str_type) return 0;
|
||||
Bytes_type = make_type("Bytes", expr_type, Bytes_fields, 1);
|
||||
if (!Bytes_type) return 0;
|
||||
Ellipsis_type = make_type("Ellipsis", expr_type, NULL, 0);
|
||||
if (!Ellipsis_type) return 0;
|
||||
Attribute_type = make_type("Attribute", expr_type, Attribute_fields, 3);
|
||||
|
@ -1586,6 +1592,27 @@ Str(string s, int lineno, int col_offset, PyArena *arena)
|
|||
return p;
|
||||
}
|
||||
|
||||
expr_ty
|
||||
Bytes(string s, int lineno, int col_offset, PyArena *arena)
|
||||
{
|
||||
expr_ty p;
|
||||
if (!s) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"field s is required for Bytes");
|
||||
return NULL;
|
||||
}
|
||||
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
|
||||
if (!p) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
p->kind = Bytes_kind;
|
||||
p->v.Bytes.s = s;
|
||||
p->lineno = lineno;
|
||||
p->col_offset = col_offset;
|
||||
return p;
|
||||
}
|
||||
|
||||
expr_ty
|
||||
Ellipsis(int lineno, int col_offset, PyArena *arena)
|
||||
{
|
||||
|
@ -2550,6 +2577,15 @@ ast2obj_expr(void* _o)
|
|||
goto failed;
|
||||
Py_DECREF(value);
|
||||
break;
|
||||
case Bytes_kind:
|
||||
result = PyType_GenericNew(Bytes_type, NULL, NULL);
|
||||
if (!result) goto failed;
|
||||
value = ast2obj_string(o->v.Bytes.s);
|
||||
if (!value) goto failed;
|
||||
if (PyObject_SetAttrString(result, "s", value) == -1)
|
||||
goto failed;
|
||||
Py_DECREF(value);
|
||||
break;
|
||||
case Ellipsis_kind:
|
||||
result = PyType_GenericNew(Ellipsis_type, NULL, NULL);
|
||||
if (!result) goto failed;
|
||||
|
@ -3089,7 +3125,7 @@ init_ast(void)
|
|||
if (PyDict_SetItemString(d, "AST", (PyObject*)AST_type) < 0) return;
|
||||
if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)
|
||||
return;
|
||||
if (PyModule_AddStringConstant(m, "__version__", "53731") < 0)
|
||||
if (PyModule_AddStringConstant(m, "__version__", "53866") < 0)
|
||||
return;
|
||||
if (PyDict_SetItemString(d, "mod", (PyObject*)mod_type) < 0) return;
|
||||
if (PyDict_SetItemString(d, "Module", (PyObject*)Module_type) < 0)
|
||||
|
@ -3155,6 +3191,7 @@ init_ast(void)
|
|||
if (PyDict_SetItemString(d, "Call", (PyObject*)Call_type) < 0) return;
|
||||
if (PyDict_SetItemString(d, "Num", (PyObject*)Num_type) < 0) return;
|
||||
if (PyDict_SetItemString(d, "Str", (PyObject*)Str_type) < 0) return;
|
||||
if (PyDict_SetItemString(d, "Bytes", (PyObject*)Bytes_type) < 0) return;
|
||||
if (PyDict_SetItemString(d, "Ellipsis", (PyObject*)Ellipsis_type) < 0)
|
||||
return;
|
||||
if (PyDict_SetItemString(d, "Attribute", (PyObject*)Attribute_type) <
|
||||
|
|
50
Python/ast.c
50
Python/ast.c
|
@ -33,8 +33,9 @@ static expr_ty ast_for_testlist_gexp(struct compiling *, const node *);
|
|||
static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
|
||||
|
||||
static PyObject *parsenumber(const char *);
|
||||
static PyObject *parsestr(const char *s, const char *encoding);
|
||||
static PyObject *parsestrplus(struct compiling *, const node *n);
|
||||
static PyObject *parsestr(const node *n, const char *encoding, int *bytesmode);
|
||||
static PyObject *parsestrplus(struct compiling *, const node *n,
|
||||
int *bytesmode);
|
||||
|
||||
#ifndef LINENO
|
||||
#define LINENO(n) ((n)->n_lineno)
|
||||
|
@ -1383,6 +1384,7 @@ ast_for_atom(struct compiling *c, const node *n)
|
|||
| '{' [dictsetmaker] '}' | NAME | NUMBER | STRING+
|
||||
*/
|
||||
node *ch = CHILD(n, 0);
|
||||
int bytesmode = 0;
|
||||
|
||||
switch (TYPE(ch)) {
|
||||
case NAME:
|
||||
|
@ -1390,12 +1392,15 @@ ast_for_atom(struct compiling *c, const node *n)
|
|||
changed. */
|
||||
return Name(NEW_IDENTIFIER(ch), Load, LINENO(n), n->n_col_offset, c->c_arena);
|
||||
case STRING: {
|
||||
PyObject *str = parsestrplus(c, n);
|
||||
PyObject *str = parsestrplus(c, n, &bytesmode);
|
||||
if (!str)
|
||||
return NULL;
|
||||
|
||||
PyArena_AddPyObject(c->c_arena, str);
|
||||
return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
|
||||
if (bytesmode)
|
||||
return Bytes(str, LINENO(n), n->n_col_offset, c->c_arena);
|
||||
else
|
||||
return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
|
||||
}
|
||||
case NUMBER: {
|
||||
PyObject *pynum = parsenumber(STR(ch));
|
||||
|
@ -3254,9 +3259,10 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
|
|||
* parsestr parses it, and returns the decoded Python string object.
|
||||
*/
|
||||
static PyObject *
|
||||
parsestr(const char *s, const char *encoding)
|
||||
parsestr(const node *n, const char *encoding, int *bytesmode)
|
||||
{
|
||||
size_t len;
|
||||
const char *s = STR(n);
|
||||
int quote = Py_CHARMASK(*s);
|
||||
int rawmode = 0;
|
||||
int need_encoding;
|
||||
|
@ -3267,6 +3273,10 @@ parsestr(const char *s, const char *encoding)
|
|||
quote = *++s;
|
||||
unicode = 1;
|
||||
}
|
||||
if (quote == 'b' || quote == 'B') {
|
||||
quote = *++s;
|
||||
*bytesmode = 1;
|
||||
}
|
||||
if (quote == 'r' || quote == 'R') {
|
||||
quote = *++s;
|
||||
rawmode = 1;
|
||||
|
@ -3276,6 +3286,10 @@ parsestr(const char *s, const char *encoding)
|
|||
PyErr_BadInternalCall();
|
||||
return NULL;
|
||||
}
|
||||
if (unicode && *bytesmode) {
|
||||
ast_error(n, "string cannot be both bytes and unicode");
|
||||
return NULL;
|
||||
}
|
||||
s++;
|
||||
len = strlen(s);
|
||||
if (len > INT_MAX) {
|
||||
|
@ -3300,7 +3314,18 @@ parsestr(const char *s, const char *encoding)
|
|||
return decode_unicode(s, len, rawmode, encoding);
|
||||
}
|
||||
#endif
|
||||
need_encoding = (encoding != NULL &&
|
||||
if (*bytesmode) {
|
||||
/* Disallow non-ascii characters (but not escapes) */
|
||||
const char *c;
|
||||
for (c = s; *c; c++) {
|
||||
if (Py_CHARMASK(*c) >= 0x80) {
|
||||
ast_error(n, "bytes can only contain ASCII "
|
||||
"literal characters.");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
need_encoding = (!*bytesmode && encoding != NULL &&
|
||||
strcmp(encoding, "utf-8") != 0 &&
|
||||
strcmp(encoding, "iso-8859-1") != 0);
|
||||
if (rawmode || strchr(s, '\\') == NULL) {
|
||||
|
@ -3332,18 +3357,25 @@ parsestr(const char *s, const char *encoding)
|
|||
* pasting the intermediate results together.
|
||||
*/
|
||||
static PyObject *
|
||||
parsestrplus(struct compiling *c, const node *n)
|
||||
parsestrplus(struct compiling *c, const node *n, int *bytesmode)
|
||||
{
|
||||
PyObject *v;
|
||||
int i;
|
||||
REQ(CHILD(n, 0), STRING);
|
||||
if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
|
||||
v = parsestr(CHILD(n, 0), c->c_encoding, bytesmode);
|
||||
if (v != NULL) {
|
||||
/* String literal concatenation */
|
||||
for (i = 1; i < NCH(n); i++) {
|
||||
PyObject *s;
|
||||
s = parsestr(STR(CHILD(n, i)), c->c_encoding);
|
||||
int subbm = 0;
|
||||
s = parsestr(CHILD(n, i), c->c_encoding, &subbm);
|
||||
if (s == NULL)
|
||||
goto onError;
|
||||
if (*bytesmode != subbm) {
|
||||
ast_error(n, "cannot mix bytes and nonbytes"
|
||||
"literals");
|
||||
goto onError;
|
||||
}
|
||||
if (PyString_Check(v) && PyString_Check(s)) {
|
||||
PyString_ConcatAndDel(&v, s);
|
||||
if (v == NULL)
|
||||
|
|
|
@ -1885,6 +1885,19 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
PUSH(x);
|
||||
if (x != NULL) continue;
|
||||
break;
|
||||
|
||||
case MAKE_BYTES:
|
||||
w = POP();
|
||||
if (PyString_Check(w))
|
||||
x = PyBytes_FromStringAndSize(
|
||||
PyString_AS_STRING(w),
|
||||
PyString_GET_SIZE(w));
|
||||
else
|
||||
x = NULL;
|
||||
Py_DECREF(w);
|
||||
PUSH(x);
|
||||
if (x != NULL) continue;
|
||||
break;
|
||||
|
||||
case LOAD_ATTR:
|
||||
w = GETITEM(names, oparg);
|
||||
|
|
|
@ -789,6 +789,8 @@ opcode_stack_effect(int opcode, int oparg)
|
|||
return 1-oparg;
|
||||
case BUILD_MAP:
|
||||
return 1;
|
||||
case MAKE_BYTES:
|
||||
return 0;
|
||||
case LOAD_ATTR:
|
||||
return 0;
|
||||
case COMPARE_OP:
|
||||
|
@ -3077,6 +3079,10 @@ compiler_visit_expr(struct compiler *c, expr_ty e)
|
|||
case Str_kind:
|
||||
ADDOP_O(c, LOAD_CONST, e->v.Str.s, consts);
|
||||
break;
|
||||
case Bytes_kind:
|
||||
ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts);
|
||||
ADDOP(c, MAKE_BYTES);
|
||||
break;
|
||||
case Ellipsis_kind:
|
||||
ADDOP_O(c, LOAD_CONST, Py_Ellipsis, consts);
|
||||
break;
|
||||
|
@ -3426,7 +3432,6 @@ compiler_visit_slice(struct compiler *c, slice_ty s, expr_context_ty ctx)
|
|||
return compiler_handle_subscr(c, kindname, ctx);
|
||||
}
|
||||
|
||||
|
||||
/* End of the compiler section, beginning of the assembler section */
|
||||
|
||||
/* do depth-first search of basic block graph, starting with block.
|
||||
|
|
Loading…
Reference in New Issue