Skip to content

Commit

Permalink
Implement printf %()T %.s %*s %.*s (#668)
Browse files Browse the repository at this point in the history
* [builtin/printf] Support %()T

* [builtin/printf] Support %*.*s

* [builtin/printf] Support %6.s

* [test/spec] Fix test case for "printf # flag"

* [builtin/printf] Support width and precision for %()T

* [builtin/printf] Support multiple flags
  • Loading branch information
akinomyoga authored Mar 19, 2020
1 parent 99f3349 commit cde3f10
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 42 deletions.
2 changes: 1 addition & 1 deletion frontend/id_kind_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def AddKinds(spec):
spec.AddKind('Format', [
'EscapedPercent',
'Percent', # starts another lexer mode
'Flag', 'Num', 'Dot', 'Type',
'Flag', 'Num', 'Dot', 'Type', 'Star', 'Time', 'Zero',
])

# For parsing prompt strings like PS1.
Expand Down
7 changes: 5 additions & 2 deletions frontend/lexer_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,13 +530,16 @@ def IsKeyword(name):
# Maybe: bash also supports %(strftime)T
LEXER_DEF[lex_mode_e.PrintfPercent] = [
# Flags
R('[-0 +#]', Id.Format_Flag),
R('[- +#]', Id.Format_Flag),
C('0', Id.Format_Zero),

R('[1-9][0-9]*', Id.Format_Num),
C('*', Id.Format_Star),
C('.', Id.Format_Dot),
# We support dsq. The others we parse to display an error message.
R('[disqbcouxXeEfFgG]', Id.Format_Type),
R(r'[^\0]', Id.Unknown_Tok), # any otehr char
R('\([^()]*\)T', Id.Format_Time),
R(r'[^\0]', Id.Unknown_Tok), # any other char
]

LEXER_DEF[lex_mode_e.VSub_1] = [
Expand Down
2 changes: 1 addition & 1 deletion frontend/syntax.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ module syntax
Literal(Token token)
-- flags are 0 hyphen space + #
-- type is 's' for %s, etc.
| Percent(Token? flag, Token? width, Token? precision, Token type)
| Percent(Token* flags, Token? width, Token? precision, Token type)

--
-- OIL LANGUAGE
Expand Down
154 changes: 127 additions & 27 deletions osh/builtin_printf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@
from __future__ import print_function

from _devbuild.gen.id_kind_asdl import Id, Kind
from _devbuild.gen.runtime_asdl import cmd_value__Argv
from _devbuild.gen.runtime_asdl import cmd_value__Argv, value_e, value__Str
from _devbuild.gen.syntax_asdl import (
printf_part, printf_part_t,
source
source,
)
from _devbuild.gen.types_asdl import lex_mode_e, lex_mode_t

import sys
import time
import os

from asdl import runtime
from core import error
Expand All @@ -27,7 +29,7 @@
from osh import string_ops
from osh import word_compile

from typing import Dict, List, TYPE_CHECKING
from typing import Dict, List, TYPE_CHECKING, cast

if TYPE_CHECKING:
from frontend.lexer import Lexer
Expand All @@ -40,12 +42,15 @@
PRINTF_SPEC = arg_def.Register('printf') # TODO: Don't need this?
PRINTF_SPEC.ShortFlag('-v', args.Str)

shell_start_time = time.time()

class _FormatStringParser(object):
"""
Grammar:
fmt = Format_Percent Flag? Num? (Dot Num)? Type
width = Num | Star
precision = Dot (Num | Star | Zero)?
fmt = Percent (Flag | Zero)* width? precision? (Type | Time)
part = Char_* | Format_EscapedPercent | fmt
printf_format = part* Eof_Real # we're using the main lexer
Expand All @@ -70,25 +75,27 @@ def _ParseFormatStr(self):
self._Next(lex_mode_e.PrintfPercent) # move past %

part = printf_part.Percent()
if self.token_type == Id.Format_Flag:
part.flag = self.cur_token
self._Next(lex_mode_e.PrintfPercent)

while self.token_type in (Id.Format_Flag, Id.Format_Zero):
# space and + could be implemented
flag = part.flag.val
flag = self.cur_token.val
if flag in '# +':
p_die("osh printf doesn't support the %r flag", flag, token=part.flag)
p_die("osh printf doesn't support the %r flag", flag, token=self.cur_token)

part.flags.append(self.cur_token)
self._Next(lex_mode_e.PrintfPercent)

if self.token_type == Id.Format_Num:
if self.token_type in (Id.Format_Num, Id.Format_Star):
part.width = self.cur_token
self._Next(lex_mode_e.PrintfPercent)

if self.token_type == Id.Format_Dot:
self._Next(lex_mode_e.PrintfPercent) # past dot
part.precision = self.cur_token
self._Next(lex_mode_e.PrintfPercent)
self._Next(lex_mode_e.PrintfPercent) # past dot
if self.token_type in (Id.Format_Num, Id.Format_Star, Id.Format_Zero):
part.precision = self.cur_token
self._Next(lex_mode_e.PrintfPercent)

if self.token_type == Id.Format_Type:
if self.token_type in (Id.Format_Type, Id.Format_Time):
part.type = self.cur_token

# ADDITIONAL VALIDATION outside the "grammar".
Expand All @@ -108,7 +115,7 @@ def _ParseFormatStr(self):
p_die(msg, token=self.cur_token)

# Do this check AFTER the floating point checks
if part.precision and part.type.val not in 'fs':
if part.precision and part.type.val[-1] not in 'fsT':
p_die("precision can't be specified when here",
token=part.precision)

Expand Down Expand Up @@ -205,28 +212,92 @@ def Run(self, cmd_val):
out.append(s)

elif isinstance(part, printf_part.Percent):
try:
flags = None
if len(part.flags) > 0:
flags = ''
for flag_token in part.flags:
flags += flag_token.val

width = None
if part.width:
if part.width.id in (Id.Format_Num, Id.Format_Zero):
width = part.width.val
width_spid = part.width.span_id
elif part.width.id == Id.Format_Star:
if arg_index < num_args:
width = varargs[arg_index]
width_spid = spids[arg_index]
arg_index += 1
else:
width = ''
width_spid = runtime.NO_SPID
else:
raise AssertionError()

try:
width = int(width)
except ValueError:
if width_spid == runtime.NO_SPID:
width_spid = part.width.span_id
self.errfmt.Print("printf got invalid number %r for the width", s,
span_id = width_spid)
return 1

precision = None
if part.precision:
if part.precision.id == Id.Format_Dot:
precision = '0'
precision_spid = part.precision.span_id
elif part.precision.id in (Id.Format_Num, Id.Format_Zero):
precision = part.precision.val
precision_spid = part.precision.span_id
elif part.precision.id == Id.Format_Star:
if arg_index < num_args:
precision = varargs[arg_index]
precision_spid = spids[arg_index]
arg_index += 1
else:
precision = ''
precision_spid = runtime.NO_SPID
else:
raise AssertionError()

try:
precision = int(precision)
except ValueError:
if precision_spid == runtime.NO_SPID:
precision_spid = part.precision.span_id
self.errfmt.Print("printf got invalid number %r for the precision", s,
span_id = precision_spid)
return 1

if arg_index < num_args:
s = varargs[arg_index]
word_spid = spids[arg_index]
except IndexError:
arg_index += 1
else:
s = ''
word_spid = runtime.NO_SPID

typ = part.type.val
if typ == 's':
if part.precision:
precision = int(part.precision.val)
if precision is not None:
s = s[:precision] # truncate
elif typ == 'q':
s = string_ops.ShellQuoteOneLine(s)
elif typ in 'diouxX':
elif typ in 'diouxX' or part.type.id == Id.Format_Time:
try:
d = int(s)
except ValueError:
if len(s) >= 2 and s[0] in '\'"':
# TODO: utf-8 decode s[1:] to be more correct. Probably
# depends on issue #366, a utf-8 library.
d = ord(s[1])
elif part.type.id == Id.Format_Time and len(s) == 0 and word_spid == runtime.NO_SPID:
# Note: No argument means -1 for %(...)T as in Bash Reference
# Manual 4.2 "If no argument is specified, conversion behaves
# as if -1 had been given."
d = -1
else:
# This works around the fact that in the arg recycling case, you have no spid.
if word_spid == runtime.NO_SPID:
Expand All @@ -252,27 +323,56 @@ def Run(self, cmd_val):
s = '%x' % d
elif typ == 'X':
s = '%X' % d
elif part.type.id == Id.Format_Time:
# %(...)T

# Initialize timezone:
# `localtime' uses the current timezone information initialized
# by `tzset'. The function `tzset' refers to the environment
# variable `TZ'. When the exported variable `TZ' is present,
# its value should be reflected in the real environment
# variable `TZ' before call of `tzset'.
tzcell = self.mem.GetCell('TZ')
if tzcell and tzcell.exported and tzcell.val.tag_() == value_e.Str:
tzval = cast(value__Str, tzcell.val)
os.environ['TZ'] = tzval.s
elif 'TZ' in os.environ:
del os.environ['TZ']
time.tzset()

# Handle special values:
# User can specify two special values -1 and -2 as in Bash
# Reference Manual 4.2: "Two special argument values may be
# used: -1 represents the current time, and -2 represents the
# time the shell was invoked." from
# https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf
if d == -1: # the current time
d = time.time()
elif d == -2: # the shell start time
d = shell_start_time

s = time.strftime(typ[1:-2], time.localtime(d))
if precision is not None:
s = s[:precision] # truncate

else:
raise AssertionError()

else:
raise AssertionError()

if part.width:
width = int(part.width.val)
if part.flag:
flag = part.flag.val
if flag == '-':
if width is not None:
if flags:
if '-' in flags:
s = s.ljust(width, ' ')
elif flag == '0':
elif '0' in flags:
s = s.rjust(width, '0')
else:
pass
else:
s = s.rjust(width, ' ')

out.append(s)
arg_index += 1

else:
raise AssertionError()
Expand Down
69 changes: 58 additions & 11 deletions spec/builtin-printf.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,30 @@ printf '[%0.0s]\n' foo
## N-I mksh stdout-json: "[ ]\n["
## N-I mksh status: 1

#### printf %6.s and %0.s
printf '[%6.s]\n' foo
printf '[%0.s]\n' foo
## STDOUT:
[ ]
[]
## END
## N-I mksh stdout-json: "[ ]\n["
## N-I mksh status: 1

#### printf %*.*s (width/precision from args)
printf '[%*s]\n' 9 hello
printf '[%.*s]\n' 3 hello
printf '[%*.3s]\n' 9 hello
printf '[%9.*s]\n' 3 hello
printf '[%*.*s]\n' 9 3 hello
## STDOUT:
[ hello]
[hel]
[ hel]
[ hel]
[ hel]
## END

#### unsigned / octal / hex
printf '[%u]\n' 42
printf '[%o]\n' 42
Expand Down Expand Up @@ -491,17 +515,22 @@ printf '[% d]\n' -42

#### printf # flag
# I didn't know these existed -- I only knew about - and 0 !
printf '[%#o]\n' 42
printf '[%#x]\n' 42
printf '[%#X]\n' 42
# Note: '#' flag for integers outputs a prefix ONLY WHEN the value is non-zero
printf '[%#o][%#o]\n' 0 42
printf '[%#x][%#x]\n' 0 42
printf '[%#X][%#X]\n' 0 42
echo ---
printf '[%#f]\n' 3
## STDOUT:
[052]
[0x2a]
[0X2A]
# Note: '#' flag for %f, %g always outputs the decimal point.
printf '[%.0f][%#.0f]\n' 3 3
# Note: In addition, '#' flag for %g does not omit zeroes in fraction
printf '[%g][%#g]\n' 3 3
## STDOUT:
[0][052]
[0][0x2a]
[0][0X2A]
---
[3.000000]
[3][3.]
[3][3.00000]
## END
## N-I osh STDOUT:
---
Expand Down Expand Up @@ -541,15 +570,33 @@ status=1
## END

#### %(strftime format)T
# The result depends on timezone
export TZ=Asia/Tokyo
printf '%(%Y-%m-%d)T\n' 1557978599
export TZ=US/Eastern
printf '%(%Y-%m-%d)T\n' 1557978599
echo status=$?
## STDOUT:
2019-05-16
2019-05-15
status=0
## END
## N-I dash/mksh/zsh/ash STDOUT:
status=1
## END
## N-I osh STDOUT:
status=2

#### %10.5(strftime format)T
# The result depends on timezone
export TZ=Asia/Tokyo
printf '[%10.5(%Y-%m-%d)T]\n' 1557978599
export TZ=US/Eastern
printf '[%10.5(%Y-%m-%d)T]\n' 1557978599
echo status=$?
## STDOUT:
[ 2019-]
[ 2019-]
status=0
## END
## N-I dash/mksh/zsh/ash STDOUT:
[[status=1
## END

0 comments on commit cde3f10

Please sign in to comment.