cpython/Parser/tokenizer.h

#ifndef Py_TOKENIZER_H
#define Py_TOKENIZER_H
#ifdef __cplusplus
extern "C" {
#endif

#include "object.h"

/* Tokenizer interface */

#include "token.h"	/* For token types */

#define MAXINDENT 100	/* Max indentation level */

/* Tokenizer state */
struct tok_state {
	/* Input state; buf <= cur <= inp <= end */
	/* NB an entire line is held in the buffer */
	char *buf;	/* Input buffer, or NULL; malloc'ed if fp != NULL */
	char *cur;	/* Next character in buffer */
	char *inp;	/* End of data in buffer */
	char *end;	/* End of input buffer if buf != NULL */
	char *start;	/* Start of current token if not NULL */
	int done;	/* E_OK normally, E_EOF at EOF, otherwise error code */
	/* NB If done != E_OK, cur must be == inp!!! */
	FILE *fp;	/* Rest of input; NULL if tokenizing a string */
	int tabsize;	/* Tab spacing */
	int indent;	/* Current indentation index */
	int indstack[MAXINDENT];	/* Stack of indents */
	int atbol;	/* Nonzero if at begin of new line */
	int pendin;	/* Pending indents (if > 0) or dedents (if < 0) */
	char *prompt, *nextprompt;	/* For interactive prompting */
	int lineno;	/* Current line number */
	int level;	/* () [] {} Parentheses nesting level */
			/* Used to allow free continuations inside them */
	/* Stuff for checking on different tab sizes */
	char *filename;	/* For error messages */
	int altwarning;	/* Issue warning if alternate tabs don't match */
	int alterror;	/* Issue error if alternate tabs don't match */
	int alttabsize;	/* Alternate tab spacing */
	int altindstack[MAXINDENT];	/* Stack of alternate indents */
	/* Stuff for PEP 0263 */
	int decoding_state;	/* -1:decoding, 0:init, 1:raw */
	int decoding_erred;	/* whether erred in decoding  */
	int read_coding_spec;	/* whether 'coding:...' has been read  */
	int issued_encoding_warning; /* whether non-ASCII warning was issued */
	char *encoding;
	int cont_line;          /* whether we are in a continuation line. */
#ifndef PGEN
	PyObject *decoding_readline; /* codecs.open(...).readline */
	PyObject *decoding_buffer;
#endif
	const char* enc;
	const char* str;
};

extern struct tok_state *PyTokenizer_FromString(char *);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);

#ifdef __cplusplus
}
#endif
#endif /* !Py_TOKENIZER_H */
* Added support for X11 modules. * Makefile: change location of FORMS library. * posixmodule.c: turn #if 0 into #ifdef MSDOS (stuff in unistd.h or not) * Almost all .h files: added CPP magic to avoid duplicate inclusions and to support inclusion from C++. 1993-07-28 09:05:47 +00:00			`#ifndef Py_TOKENIZER_H`
			`#define Py_TOKENIZER_H`
			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

Patch #534304: Implement phase 1 of PEP 263. 2002-08-04 17:29:52 +00:00			`#include "object.h"`
Added copyright notice. 1991-02-19 12:39:46 +00:00
Initial revision 1990-10-14 12:07:46 +00:00			`/* Tokenizer interface */`

			`#include "token.h" /* For token types */`

			`#define MAXINDENT 100 /* Max indentation level */`

			`/* Tokenizer state */`
			`struct tok_state {`
			`/* Input state; buf <= cur <= inp <= end */`
* pythonrun.c: Print exception type+arg after stack trace instead of before it. * ceval.c, object.c: moved testbool() to object.c (now extern visible) * stringobject.c: fix bugs in and rationalize string resize in formatstring() * tokenizer.[ch]: fix non-working code for lines longer than BUFSIZ 1993-05-12 08:24:20 +00:00			`/* NB an entire line is held in the buffer */`
			`char buf; / Input buffer, or NULL; malloc'ed if fp != NULL */`
Initial revision 1990-10-14 12:07:46 +00:00			`char cur; / Next character in buffer */`
			`char inp; / End of data in buffer */`
* pythonrun.c: Print exception type+arg after stack trace instead of before it. * ceval.c, object.c: moved testbool() to object.c (now extern visible) * stringobject.c: fix bugs in and rationalize string resize in formatstring() * tokenizer.[ch]: fix non-working code for lines longer than BUFSIZ 1993-05-12 08:24:20 +00:00			`char end; / End of input buffer if buf != NULL */`
Merge back to main trunk 1994-08-30 08:27:36 +00:00			`char start; / Start of current token if not NULL */`
* pythonrun.c: Print exception type+arg after stack trace instead of before it. * ceval.c, object.c: moved testbool() to object.c (now extern visible) * stringobject.c: fix bugs in and rationalize string resize in formatstring() * tokenizer.[ch]: fix non-working code for lines longer than BUFSIZ 1993-05-12 08:24:20 +00:00			`int done; /* E_OK normally, E_EOF at EOF, otherwise error code */`
			`/* NB If done != E_OK, cur must be == inp!!! */`
Initial revision 1990-10-14 12:07:46 +00:00			`FILE fp; / Rest of input; NULL if tokenizing a string */`
			`int tabsize; /* Tab spacing */`
			`int indent; /* Current indentation index */`
			`int indstack[MAXINDENT]; /* Stack of indents */`
			`int atbol; /* Nonzero if at begin of new line */`
			`int pendin; /* Pending indents (if > 0) or dedents (if < 0) */`
			`char prompt, nextprompt; /* For interactive prompting */`
			`int lineno; /* Current line number */`
* selectmodule.c: fix (another!) two memory leaks -- this time in list2set * tokenizer.[ch]: allow continuation without \ inside () [] {}. 1993-05-12 11:35:44 +00:00			`int level; /* () [] {} Parentheses nesting level */`
			`/* Used to allow free continuations inside them */`
Add checking for inconsistent tab usage 1998-04-09 21:38:06 +00:00			`/* Stuff for checking on different tab sizes */`
			`char filename; / For error messages */`
			`int altwarning; /* Issue warning if alternate tabs don't match */`
			`int alterror; /* Issue error if alternate tabs don't match */`
			`int alttabsize; /* Alternate tab spacing */`
			`int altindstack[MAXINDENT]; /* Stack of alternate indents */`
Patch #534304: Implement phase 1 of PEP 263. 2002-08-04 17:29:52 +00:00			`/* Stuff for PEP 0263 */`
			`int decoding_state; /* -1:decoding, 0:init, 1:raw */`
			`int decoding_erred; /* whether erred in decoding */`
			`int read_coding_spec; /* whether 'coding:...' has been read */`
			`int issued_encoding_warning; /* whether non-ASCII warning was issued */`
			`char *encoding;`
Ignore encoding declarations inside strings. Fixes #603509. 2002-09-03 11:52:44 +00:00			`int cont_line; /* whether we are in a continuation line. */`
Make pgen compile with pydebug. Duplicate normalized names, as it may be longer than the old string. 2002-08-04 20:10:29 +00:00			`#ifndef PGEN`
Patch #534304: Implement phase 1 of PEP 263. 2002-08-04 17:29:52 +00:00			`PyObject decoding_readline; / codecs.open(...).readline */`
			`PyObject *decoding_buffer;`
Make pgen compile with pydebug. Duplicate normalized names, as it may be longer than the old string. 2002-08-04 20:10:29 +00:00			`#endif`
Patch #534304: Implement phase 1 of PEP 263. 2002-08-04 17:29:52 +00:00			`const char* enc;`
			`const char* str;`
Initial revision 1990-10-14 12:07:46 +00:00			`};`

Nuke all remaining occurrences of Py_PROTO and Py_FPROTO. 2000-07-09 03:09:57 +00:00			`extern struct tok_state PyTokenizer_FromString(char );`
			`extern struct tok_state PyTokenizer_FromFile(FILE , char , char );`
			`extern void PyTokenizer_Free(struct tok_state *);`
			`extern int PyTokenizer_Get(struct tok_state , char , char *);`
* Added support for X11 modules. * Makefile: change location of FORMS library. * posixmodule.c: turn #if 0 into #ifdef MSDOS (stuff in unistd.h or not) * Almost all .h files: added CPP magic to avoid duplicate inclusions and to support inclusion from C++. 1993-07-28 09:05:47 +00:00
			`#ifdef __cplusplus`
			`}`
			`#endif`
			`#endif /* !Py_TOKENIZER_H */`