Skip to content
This repository has been archived by the owner on Jul 5, 2023. It is now read-only.

Commit

Permalink
Implement underscores in numeric literals (#21)
Browse files Browse the repository at this point in the history
Implements underscores in numeric literals. Cherry-picks the necessary parts from the original implementation in Python 3.6 by Georg and Serhiy.

This could be useful for mypy.
  • Loading branch information
ilevkivskyi authored and ddfisher committed Oct 31, 2016
1 parent 15e1bf1 commit 93d4e80
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 48 deletions.
6 changes: 4 additions & 2 deletions ast35/Include/Python-ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ struct _expr {

struct {
object n;
int contains_underscores;
} Num;

struct {
Expand Down Expand Up @@ -570,8 +571,9 @@ expr_ty _Ta35_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators,
#define Call(a0, a1, a2, a3, a4, a5) _Ta35_Call(a0, a1, a2, a3, a4, a5)
expr_ty _Ta35_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int
lineno, int col_offset, PyArena *arena);
#define Num(a0, a1, a2, a3) _Ta35_Num(a0, a1, a2, a3)
expr_ty _Ta35_Num(object n, int lineno, int col_offset, PyArena *arena);
#define Num(a0, a1, a2, a3, a4) _Ta35_Num(a0, a1, a2, a3, a4)
expr_ty _Ta35_Num(object n, int contains_underscores, int lineno, int
col_offset, PyArena *arena);
#define Str(a0, a1, a2, a3) _Ta35_Str(a0, a1, a2, a3)
expr_ty _Ta35_Str(string s, int lineno, int col_offset, PyArena *arena);
#define Bytes(a0, a1, a2, a3) _Ta35_Bytes(a0, a1, a2, a3)
Expand Down
4 changes: 3 additions & 1 deletion ast35/Parser/Python.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ module Python
-- x < 4 < 3 and (x < 4) < 3
| Compare(expr left, cmpop* ops, expr* comparators)
| Call(expr func, expr* args, keyword* keywords)
| Num(object n) -- a number as a PyObject.
-- contains_underscores is not part of standard Python ASDL
-- and exists here to signal that a Python 3.6 feature was used
| Num(object n, int? contains_underscores) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
| Bytes(bytes s)
| NameConstant(singleton value)
Expand Down
129 changes: 90 additions & 39 deletions ast35/Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1343,6 +1343,27 @@ verify_identifier(struct tok_state *tok)
}
#endif

static int
tok_decimal_tail(struct tok_state *tok)
{
int c;
while (1) {
do {
c = tok_nextc(tok);
} while (isdigit(c));
if (c != '_') {
break;
}
c = tok_nextc(tok);
if (!isdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return 0;
}
}
return c;
}

/* Get next token, after space stripping etc. */

static int
Expand Down Expand Up @@ -1644,64 +1665,88 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
if (c == '0') {
/* Hex, octal or binary -- maybe. */
c = tok_nextc(tok);
if (c == '.')
goto fraction;
if (c == 'j' || c == 'J')
goto imaginary;
if (c == 'x' || c == 'X') {

/* Hex */
c = tok_nextc(tok);
if (!isxdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (isxdigit(c));
if (c == '_')
c = tok_nextc(tok);
if (!isxdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (isxdigit(c));
} while (c == '_');
}
else if (c == 'o' || c == 'O') {
/* Octal */
c = tok_nextc(tok);
if (c < '0' || c >= '8') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while ('0' <= c && c < '8');
if (c == '_')
c = tok_nextc(tok);
if (c < '0' || c >= '8') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while ('0' <= c && c < '8');
} while (c == '_');
}
else if (c == 'b' || c == 'B') {
/* Binary */
c = tok_nextc(tok);
if (c != '0' && c != '1') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (c == '0' || c == '1');
if (c == '_')
c = tok_nextc(tok);
if (c != '0' && c != '1') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (c == '0' || c == '1');
} while (c == '_');
}
else {
int nonzero = 0;
/* maybe old-style octal; c is first char of it */
/* in any case, allow '0' as a literal */
while (c == '0')
while (1) {
if (c == '_') {
c = tok_nextc(tok);
if (!isdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
}
if (c != '0')
break;
c = tok_nextc(tok);
while (isdigit(c)) {
}
if (isdigit(c)) {
nonzero = 1;
c = tok_nextc(tok);
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
if (c == '.')
if (c == '.') {
c = tok_nextc(tok);
goto fraction;
}
else if (c == 'e' || c == 'E')
goto exponent;
else if (c == 'j' || c == 'J')
goto imaginary;
else if (nonzero) {
/* Old-style octal: now disallowed. */
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
Expand All @@ -1710,17 +1755,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}
else {
/* Decimal */
do {
c = tok_nextc(tok);
} while (isdigit(c));
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
{
/* Accept floating point numbers. */
if (c == '.') {
c = tok_nextc(tok);
fraction:
/* Fraction */
do {
c = tok_nextc(tok);
} while (isdigit(c));
if (isdigit(c)) {
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
}
if (c == 'e' || c == 'E') {
int e;
Expand All @@ -1742,9 +1792,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_end = tok->cur;
return NUMBER;
}
do {
c = tok_nextc(tok);
} while (isdigit(c));
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
if (c == 'j' || c == 'J')
/* Imaginary part */
Expand Down
27 changes: 24 additions & 3 deletions ast35/Python/Python-ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,10 @@ static char *Call_fields[]={
};
static PyTypeObject *Num_type;
_Py_IDENTIFIER(n);
_Py_IDENTIFIER(contains_underscores);
static char *Num_fields[]={
"n",
"contains_underscores",
};
static PyTypeObject *Str_type;
_Py_IDENTIFIER(s);
Expand Down Expand Up @@ -937,7 +939,7 @@ static int init_types(void)
if (!Compare_type) return 0;
Call_type = make_type("Call", expr_type, Call_fields, 3);
if (!Call_type) return 0;
Num_type = make_type("Num", expr_type, Num_fields, 1);
Num_type = make_type("Num", expr_type, Num_fields, 2);
if (!Num_type) return 0;
Str_type = make_type("Str", expr_type, Str_fields, 1);
if (!Str_type) return 0;
Expand Down Expand Up @@ -2077,7 +2079,8 @@ Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int
}

expr_ty
Num(object n, int lineno, int col_offset, PyArena *arena)
Num(object n, int contains_underscores, int lineno, int col_offset, PyArena
*arena)
{
expr_ty p;
if (!n) {
Expand All @@ -2090,6 +2093,7 @@ Num(object n, int lineno, int col_offset, PyArena *arena)
return NULL;
p->kind = Num_kind;
p->v.Num.n = n;
p->v.Num.contains_underscores = contains_underscores;
p->lineno = lineno;
p->col_offset = col_offset;
return p;
Expand Down Expand Up @@ -3267,6 +3271,12 @@ ast2obj_expr(void* _o)
if (_PyObject_SetAttrId(result, &PyId_n, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->v.Num.contains_underscores);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_contains_underscores, value) ==
-1)
goto failed;
Py_DECREF(value);
break;
case Str_kind:
result = PyType_GenericNew(Str_type, NULL, NULL);
Expand Down Expand Up @@ -6267,6 +6277,7 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
}
if (isinstance) {
object n;
int contains_underscores;

if (_PyObject_HasAttrId(obj, &PyId_n)) {
int res;
Expand All @@ -6279,7 +6290,17 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
PyErr_SetString(PyExc_TypeError, "required field \"n\" missing from Num");
return 1;
}
*out = Num(n, lineno, col_offset, arena);
if (exists_not_none(obj, &PyId_contains_underscores)) {
int res;
tmp = _PyObject_GetAttrId(obj, &PyId_contains_underscores);
if (tmp == NULL) goto failed;
res = obj2ast_int(tmp, &contains_underscores, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
} else {
contains_underscores = 0;
}
*out = Num(n, contains_underscores, lineno, col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
Expand Down
34 changes: 31 additions & 3 deletions ast35/Python/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -2204,15 +2204,18 @@ ast_for_atom(struct compiling *c, const node *n)
return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
}
case NUMBER: {
PyObject *pynum = parsenumber(c, STR(ch));
const char *s = STR(ch);
int contains_underscores = strchr(s, '_') != NULL;
PyObject *pynum = parsenumber(c, s);
if (!pynum)
return NULL;

if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
Py_DECREF(pynum);
return NULL;
}
return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
return Num(pynum, contains_underscores, LINENO(n),
n->n_col_offset, c->c_arena);
}
case ELLIPSIS: /* Ellipsis */
return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena);
Expand Down Expand Up @@ -4124,7 +4127,7 @@ ast_for_stmt(struct compiling *c, const node *n)
}

static PyObject *
parsenumber(struct compiling *c, const char *s)
parsenumber_raw(struct compiling *c, const char *s)
{
const char *end;
long x;
Expand Down Expand Up @@ -4166,6 +4169,31 @@ parsenumber(struct compiling *c, const char *s)
}
}

static PyObject *
parsenumber(struct compiling *c, const char *s)
{
char *dup, *end;
PyObject *res = NULL;

assert(s != NULL);

if (strchr(s, '_') == NULL) {
return parsenumber_raw(c, s);
}
/* Create a duplicate without underscores. */
dup = PyMem_Malloc(strlen(s) + 1);
end = dup;
for (; *s; s++) {
if (*s != '_') {
*end++ = *s;
}
}
*end = '\0';
res = parsenumber_raw(c, dup);
PyMem_Free(dup);
return res;
}

static PyObject *
decode_utf8(struct compiling *c, const char **sPtr, const char *end)
{
Expand Down

0 comments on commit 93d4e80

Please sign in to comment.