cpython/Parser/parsetok.c

/***********************************************************
Copyright (c) 2000, BeOpen.com.
Copyright (c) 1995-2000, Corporation for National Research Initiatives.
Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
All rights reserved.

See the file "Misc/COPYRIGHT" for information on usage and
redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
******************************************************************/

/* Parser-tokenizer link implementation */

#include "pgenheaders.h"
#include "tokenizer.h"
#include "node.h"
#include "grammar.h"
#include "parser.h"
#include "parsetok.h"
#include "errcode.h"

int Py_TabcheckFlag;


/* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *);

/* Parse input coming from a string.  Return error code, print some errors. */

node *
PyParser_ParseString(char *s, grammar *g, int start, perrdetail *err_ret)
{
	struct tok_state *tok;

	err_ret->error = E_OK;
	err_ret->filename = NULL;
	err_ret->lineno = 0;
	err_ret->offset = 0;
	err_ret->text = NULL;

	if ((tok = PyTokenizer_FromString(s)) == NULL) {
		err_ret->error = E_NOMEM;
		return NULL;
	}

	if (Py_TabcheckFlag || Py_VerboseFlag) {
		tok->filename = "<string>";
		tok->altwarning = (tok->filename != NULL);
		if (Py_TabcheckFlag >= 2)
			tok->alterror++;
	}

	return parsetok(tok, g, start, err_ret);
}


/* Parse input coming from a file.  Return error code, print some errors. */

node *
PyParser_ParseFile(FILE *fp, char *filename, grammar *g, int start,
		   char *ps1, char *ps2, perrdetail *err_ret)
{
	struct tok_state *tok;

	err_ret->error = E_OK;
	err_ret->filename = filename;
	err_ret->lineno = 0;
	err_ret->offset = 0;
	err_ret->text = NULL;

	if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) {
		err_ret->error = E_NOMEM;
		return NULL;
	}
	if (Py_TabcheckFlag || Py_VerboseFlag) {
		tok->filename = filename;
		tok->altwarning = (filename != NULL);
		if (Py_TabcheckFlag >= 2)
			tok->alterror++;
	}


	return parsetok(tok, g, start, err_ret);
}

/* Parse input coming from the given tokenizer structure.
   Return error code. */

static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret)
{
	parser_state *ps;
	node *n;
	int started = 0;

	if ((ps = PyParser_New(g, start)) == NULL) {
		fprintf(stderr, "no mem for new parser\n");
		err_ret->error = E_NOMEM;
		return NULL;
	}

	for (;;) {
		char *a, *b;
		int type;
		size_t len;
		char *str;

		type = PyTokenizer_Get(tok, &a, &b);
		if (type == ERRORTOKEN) {
			err_ret->error = tok->done;
			break;
		}
		if (type == ENDMARKER && started) {
			type = NEWLINE; /* Add an extra newline */
			started = 0;
		}
		else
			started = 1;
		len = b - a; /* XXX this may compute NULL - NULL */
		str = PyMem_NEW(char, len + 1);
		if (str == NULL) {
			fprintf(stderr, "no mem for next token\n");
			err_ret->error = E_NOMEM;
			break;
		}
		if (len > 0)
			strncpy(str, a, len);
		str[len] = '\0';
		if ((err_ret->error =
		     PyParser_AddToken(ps, (int)type, str, tok->lineno,
				       &(err_ret->expected))) != E_OK) {
			if (err_ret->error != E_DONE)
				PyMem_DEL(str);
			break;
		}
	}

	if (err_ret->error == E_DONE) {
		n = ps->p_tree;
		ps->p_tree = NULL;
	}
	else
		n = NULL;

	PyParser_Delete(ps);

	if (n == NULL) {
		if (tok->lineno <= 1 && tok->done == E_EOF)
			err_ret->error = E_EOF;
		err_ret->lineno = tok->lineno;
		err_ret->offset = tok->cur - tok->buf;
		if (tok->buf != NULL) {
			size_t len = tok->inp - tok->buf;
			err_ret->text = PyMem_NEW(char, len + 1);
			if (err_ret->text != NULL) {
				if (len > 0)
					strncpy(err_ret->text, tok->buf, len);
				err_ret->text[len] = '\0';
			}
		}
	}

	PyTokenizer_Free(tok);

	return n;
}
Added copyright notice. 1991-02-19 12:39:46 +00:00			`/***********************************************************`
Change copyright notice. 2000-06-30 23:50:40 +00:00			`Copyright (c) 2000, BeOpen.com.`
			`Copyright (c) 1995-2000, Corporation for National Research Initiatives.`
			`Copyright (c) 1990-1995, Stichting Mathematisch Centrum.`
			`All rights reserved.`

			`See the file "Misc/COPYRIGHT" for information on usage and`
			`redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.`
Added copyright notice. 1991-02-19 12:39:46 +00:00			`******************************************************************/`

Initial revision 1990-10-14 12:07:46 +00:00			`/* Parser-tokenizer link implementation */`

"Compiling" version 1990-12-20 15:06:42 +00:00			`#include "pgenheaders.h"`
Initial revision 1990-10-14 12:07:46 +00:00			`#include "tokenizer.h"`
			`#include "node.h"`
			`#include "grammar.h"`
			`#include "parser.h"`
"Compiling" version 1990-12-20 15:06:42 +00:00			`#include "parsetok.h"`
Initial revision 1990-10-14 12:07:46 +00:00			`#include "errcode.h"`

Declare and use Py_TabcheckFlag here. 1998-04-10 19:35:06 +00:00			`int Py_TabcheckFlag;`

Initial revision 1990-10-14 12:07:46 +00:00
"Compiling" version 1990-12-20 15:06:42 +00:00			`/* Forward */`
Nuke all remaining occurrences of Py_PROTO and Py_FPROTO. 2000-07-09 03:09:57 +00:00			`static node parsetok(struct tok_state , grammar , int, perrdetail );`
Initial revision 1990-10-14 12:07:46 +00:00
"Compiling" version 1990-12-20 15:06:42 +00:00			`/* Parse input coming from a string. Return error code, print some errors. */`
Initial revision 1990-10-14 12:07:46 +00:00
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`node *`
Mass ANSIfication. Work around intrcheck.c's desire to pass 'PyErr_CheckSignals' to 'Py_AddPendingCall' by providing a (static) wrapper function that has the right number of arguments. 2000-07-22 19:20:54 +00:00			`PyParser_ParseString(char s, grammar g, int start, perrdetail *err_ret)`
Initial revision 1990-10-14 12:07:46 +00:00			`{`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`struct tok_state *tok;`

			`err_ret->error = E_OK;`
			`err_ret->filename = NULL;`
			`err_ret->lineno = 0;`
			`err_ret->offset = 0;`
			`err_ret->text = NULL;`

Another directory quickly renamed. 1997-04-29 21:03:06 +00:00			`if ((tok = PyTokenizer_FromString(s)) == NULL) {`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`err_ret->error = E_NOMEM;`
			`return NULL;`
Initial revision 1990-10-14 12:07:46 +00:00			`}`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00
Make sure that -t and -tt also work on strings passed to compile(). 1998-12-21 18:32:40 +00:00			`if (Py_TabcheckFlag \|\| Py_VerboseFlag) {`
			`tok->filename = "<string>";`
			`tok->altwarning = (tok->filename != NULL);`
			`if (Py_TabcheckFlag >= 2)`
			`tok->alterror++;`
			`}`

don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`return parsetok(tok, g, start, err_ret);`
Initial revision 1990-10-14 12:07:46 +00:00			`}`


"Compiling" version 1990-12-20 15:06:42 +00:00			`/* Parse input coming from a file. Return error code, print some errors. */`
Initial revision 1990-10-14 12:07:46 +00:00
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`node *`
Mass ANSIfication. Work around intrcheck.c's desire to pass 'PyErr_CheckSignals' to 'Py_AddPendingCall' by providing a (static) wrapper function that has the right number of arguments. 2000-07-22 19:20:54 +00:00			`PyParser_ParseFile(FILE fp, char filename, grammar *g, int start,`
			`char ps1, char ps2, perrdetail *err_ret)`
Initial revision 1990-10-14 12:07:46 +00:00			`{`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`struct tok_state *tok;`

			`err_ret->error = E_OK;`
			`err_ret->filename = filename;`
			`err_ret->lineno = 0;`
			`err_ret->offset = 0;`
			`err_ret->text = NULL;`

Another directory quickly renamed. 1997-04-29 21:03:06 +00:00			`if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) {`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`err_ret->error = E_NOMEM;`
			`return NULL;`
Initial revision 1990-10-14 12:07:46 +00:00			`}`
Declare and use Py_TabcheckFlag here. 1998-04-10 19:35:06 +00:00			`if (Py_TabcheckFlag \|\| Py_VerboseFlag) {`
			`tok->filename = filename;`
			`tok->altwarning = (filename != NULL);`
			`if (Py_TabcheckFlag >= 2)`
			`tok->alterror++;`
			`}`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00
"Compiling" version 1990-12-20 15:06:42 +00:00
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`return parsetok(tok, g, start, err_ret);`
			`}`
"Compiling" version 1990-12-20 15:06:42 +00:00
			`/* Parse input coming from the given tokenizer structure.`
			`Return error code. */`

don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`static node *`
Mass ANSIfication. Work around intrcheck.c's desire to pass 'PyErr_CheckSignals' to 'Py_AddPendingCall' by providing a (static) wrapper function that has the right number of arguments. 2000-07-22 19:20:54 +00:00			`parsetok(struct tok_state tok, grammar g, int start, perrdetail *err_ret)`
"Compiling" version 1990-12-20 15:06:42 +00:00			`{`
			`parser_state *ps;`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`node *n;`
Append a NEWLINE token at the end of a file. 1992-03-04 16:40:44 +00:00			`int started = 0;`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00
Another directory quickly renamed. 1997-04-29 21:03:06 +00:00			`if ((ps = PyParser_New(g, start)) == NULL) {`
"Compiling" version 1990-12-20 15:06:42 +00:00			`fprintf(stderr, "no mem for new parser\n");`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`err_ret->error = E_NOMEM;`
			`return NULL;`
"Compiling" version 1990-12-20 15:06:42 +00:00			`}`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00
"Compiling" version 1990-12-20 15:06:42 +00:00			`for (;;) {`
			`char a, b;`
			`int type;`
Trent Mick: familiar simple Win64 patches 2000-06-28 22:00:02 +00:00			`size_t len;`
"Compiling" version 1990-12-20 15:06:42 +00:00			`char *str;`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00
Another directory quickly renamed. 1997-04-29 21:03:06 +00:00			`type = PyTokenizer_Get(tok, &a, &b);`
"Compiling" version 1990-12-20 15:06:42 +00:00			`if (type == ERRORTOKEN) {`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`err_ret->error = tok->done;`
"Compiling" version 1990-12-20 15:06:42 +00:00			`break;`
			`}`
Append a NEWLINE token at the end of a file. 1992-03-04 16:40:44 +00:00			`if (type == ENDMARKER && started) {`
			`type = NEWLINE; /* Add an extra newline */`
			`started = 0;`
			`}`
			`else`
			`started = 1;`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`len = b - a; /* XXX this may compute NULL - NULL */`
Another directory quickly renamed. 1997-04-29 21:03:06 +00:00			`str = PyMem_NEW(char, len + 1);`
"Compiling" version 1990-12-20 15:06:42 +00:00			`if (str == NULL) {`
			`fprintf(stderr, "no mem for next token\n");`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`err_ret->error = E_NOMEM;`
"Compiling" version 1990-12-20 15:06:42 +00:00			`break;`
			`}`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`if (len > 0)`
			`strncpy(str, a, len);`
"Compiling" version 1990-12-20 15:06:42 +00:00			`str[len] = '\0';`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`if ((err_ret->error =`
Create two new exceptions: IndentationError and TabError. These are used for indentation related errors. This patch includes Ping's improvements for indentation-related error messages. Closes SourceForge patches #100734 and #100856. 2000-07-11 17:53:00 +00:00			`PyParser_AddToken(ps, (int)type, str, tok->lineno,`
			`&(err_ret->expected))) != E_OK) {`
Finally plug the memory leak caused by syntax error (including interactive EOF, which leaked one byte). 1997-07-27 01:52:50 +00:00			`if (err_ret->error != E_DONE)`
			`PyMem_DEL(str);`
"Compiling" version 1990-12-20 15:06:42 +00:00			`break;`
Finally plug the memory leak caused by syntax error (including interactive EOF, which leaked one byte). 1997-07-27 01:52:50 +00:00			`}`
"Compiling" version 1990-12-20 15:06:42 +00:00			`}`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00
			`if (err_ret->error == E_DONE) {`
			`n = ps->p_tree;`
			`ps->p_tree = NULL;`
			`}`
			`else`
			`n = NULL;`

Another directory quickly renamed. 1997-04-29 21:03:06 +00:00			`PyParser_Delete(ps);`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00
			`if (n == NULL) {`
			`if (tok->lineno <= 1 && tok->done == E_EOF)`
			`err_ret->error = E_EOF;`
			`err_ret->lineno = tok->lineno;`
			`err_ret->offset = tok->cur - tok->buf;`
			`if (tok->buf != NULL) {`
Trent Mick: familiar simple Win64 patches 2000-06-28 22:00:02 +00:00			`size_t len = tok->inp - tok->buf;`
Vladimir Marangozov's long-awaited malloc restructuring. For more comments, read the patches@python.org archives. For documentation read the comments in mymalloc.h and objimpl.h. (This is not exactly what Vladimir posted to the patches list; I've made a few changes, and Vladimir sent me a fix in private email for a problem that only occurs in debug mode. I'm also holding back on his change to main.c, which seems unnecessary to me.) 2000-05-03 23:44:39 +00:00			`err_ret->text = PyMem_NEW(char, len + 1);`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`if (err_ret->text != NULL) {`
fix strncpy call (uninitialized memory read) 1995-01-20 16:59:12 +00:00			`if (len > 0)`
			`strncpy(err_ret->text, tok->buf, len);`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00			`err_ret->text[len] = '\0';`
			`}`
			`}`
			`}`

Another directory quickly renamed. 1997-04-29 21:03:06 +00:00			`PyTokenizer_Free(tok);`
don't call strncpy(str, NULL, 0) 1994-08-29 12:25:45 +00:00
			`return n;`
"Compiling" version 1990-12-20 15:06:42 +00:00			`}`