From dc2c4f1254603b88a2ff69c2f8bdfc3a8acfca60 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Wed, 15 Nov 2023 14:15:16 -0500 Subject: [PATCH] [REPL] fix computation of startpos for path completions (#52009) Fixes https://github.com/JuliaLang/julia/issues/51985 Ensure that the REPL completions escape and unescape text correctly, using the correct functions, and accounting for exactly what the user has currently typed. The old broken method is left around for Pkg, since it has an over-reliance on it returning incorrect answers. Once Pkg is fixed, we can delete that code. Co-authored-by: Jameson Nash (cherry picked from commit 5edcdc5a234fa69a65f5e2b8d85ac51cfb37a653) --- base/shell.jl | 24 ++- stdlib/REPL/src/REPLCompletions.jl | 266 +++++++++++++++++++--------- stdlib/REPL/test/replcompletions.jl | 58 +++--- test/spawn.jl | 16 +- 4 files changed, 246 insertions(+), 118 deletions(-) diff --git a/base/shell.jl b/base/shell.jl index 5bfd11fb46d29..48214418bdee5 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -18,12 +18,11 @@ end function shell_parse(str::AbstractString, interpolate::Bool=true; special::AbstractString="", filename="none") - s = SubString(str, firstindex(str)) + last_arg = firstindex(str) # N.B.: This is used by REPLCompletions + s = SubString(str, last_arg) s = rstrip_shell(lstrip(s)) - # N.B.: This is used by REPLCompletions - last_parse = 0:-1 - isempty(s) && return interpolate ? (Expr(:tuple,:()),last_parse) : ([],last_parse) + isempty(s) && return interpolate ? (Expr(:tuple,:()), last_arg) : ([], last_arg) in_single_quotes = false in_double_quotes = false @@ -32,6 +31,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; arg = [] i = firstindex(s) st = Iterators.Stateful(pairs(s)) + update_last_arg = false # true after spaces or interpolate function push_nonempty!(list, x) if !isa(x,AbstractString) || !isempty(x) @@ -54,6 +54,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; for (j, c) in st j, c = j::Int, c::C if !in_single_quotes && !in_double_quotes && isspace(c) + update_last_arg = true i = consume_upto!(arg, s, i, j) append_2to1!(args, arg) while !isempty(st) @@ -77,12 +78,17 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; # use parseatom instead of parse to respect filename (#28188) ex, j = Meta.parseatom(s, stpos, filename=filename) end - last_parse = (stpos:prevind(s, j)) .+ s.offset - push_nonempty!(arg, ex) + last_arg = stpos + s.offset + update_last_arg = true + push!(arg, ex) s = SubString(s, j) Iterators.reset!(st, pairs(s)) i = firstindex(s) else + if update_last_arg + last_arg = i + s.offset + update_last_arg = false + end if !in_double_quotes && c == '\'' in_single_quotes = !in_single_quotes i = consume_upto!(arg, s, i, j) @@ -124,14 +130,14 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; push_nonempty!(arg, s[i:end]) append_2to1!(args, arg) - interpolate || return args, last_parse + interpolate || return args, last_arg # construct an expression ex = Expr(:tuple) for arg in args push!(ex.args, Expr(:tuple, arg...)) end - return ex, last_parse + return ex, last_arg end function shell_split(s::AbstractString) @@ -216,7 +222,7 @@ function print_shell_escaped_posixly(io::IO, args::AbstractString...) function isword(c::AbstractChar) if '0' <= c <= '9' || 'a' <= c <= 'z' || 'A' <= c <= 'Z' # word characters - elseif c == '_' || c == '/' || c == '+' || c == '-' + elseif c == '_' || c == '/' || c == '+' || c == '-' || c == '.' # other common characters elseif c == '\'' have_single = true diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index debe568b25f64..94ca678b8f387 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -243,8 +243,21 @@ const sorted_keyvals = ["false", "true"] complete_keyval(s::Union{String,SubString{String}}) = complete_from_list(KeyvalCompletion, sorted_keyvals, s) -function complete_path(path::AbstractString, pos::Int; - use_envpath=false, shell_escape=false, +function do_raw_escape(s) + # escape_raw_string with delim='`' and ignoring the rule for the ending \ + return replace(s, r"(\\+)`" => s"\1\\`") +end +function do_shell_escape(s) + return Base.shell_escape_posixly(s) +end +function do_string_escape(s) + return escape_string(s, ('\"','$')) +end + +function complete_path(path::AbstractString; + use_envpath=false, + shell_escape=false, + raw_escape=false, string_escape=false) @assert !(shell_escape && string_escape) if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path) @@ -257,50 +270,49 @@ function complete_path(path::AbstractString, pos::Int; else dir, prefix = splitdir(path) end - local files - try + files = try if isempty(dir) - files = readdir() + readdir() elseif isdir(dir) - files = readdir(dir) + readdir(dir) else - return Completion[], 0:-1, false + return Completion[], dir, false end - catch - return Completion[], 0:-1, false + catch ex + ex isa Base.IOError || rethrow() + return Completion[], dir, false end matches = Set{String}() for file in files if startswith(file, prefix) p = joinpath(dir, file) - is_dir = try isdir(p) catch; false end - push!(matches, is_dir ? joinpath(file, "") : file) + is_dir = try isdir(p) catch ex; ex isa Base.IOError ? false : rethrow() end + push!(matches, is_dir ? file * "/" : file) end end - if use_envpath && length(dir) == 0 + if use_envpath && isempty(dir) # Look for files in PATH as well - local pathdirs = split(ENV["PATH"], @static Sys.iswindows() ? ";" : ":") + pathdirs = split(ENV["PATH"], @static Sys.iswindows() ? ";" : ":") for pathdir in pathdirs - local actualpath - try - actualpath = realpath(pathdir) - catch + actualpath = try + realpath(pathdir) + catch ex + ex isa Base.IOError || rethrow() # Bash doesn't expect every folder in PATH to exist, so neither shall we continue end - if actualpath != pathdir && in(actualpath,pathdirs) + if actualpath != pathdir && in(actualpath, pathdirs) # Remove paths which (after resolving links) are in the env path twice. # Many distros eg. point /bin to /usr/bin but have both in the env path. continue end - local filesinpath - try - filesinpath = readdir(pathdir) + filesinpath = try + readdir(pathdir) catch e # Bash allows dirs in PATH that can't be read, so we should as well. if isa(e, Base.IOError) || isa(e, Base.ArgumentError) @@ -321,18 +333,38 @@ function complete_path(path::AbstractString, pos::Int; end end - function do_escape(s) - return shell_escape ? replace(s, r"(\s|\\)" => s"\\\0") : - string_escape ? escape_string(s, ('\"','$')) : - s - end + matches = ((shell_escape ? do_shell_escape(s) : string_escape ? do_string_escape(s) : s) for s in matches) + matches = ((raw_escape ? do_raw_escape(s) : s) for s in matches) + matches = Completion[PathCompletion(s) for s in matches] + return matches, dir, !isempty(matches) +end - matchList = Completion[PathCompletion(do_escape(s)) for s in matches] - startpos = pos - lastindex(do_escape(prefix)) + 1 - # The pos - lastindex(prefix) + 1 is correct due to `lastindex(prefix)-lastindex(prefix)==0`, - # hence we need to add one to get the first index. This is also correct when considering - # pos, because pos is the `lastindex` a larger string which `endswith(path)==true`. - return matchList, startpos:pos, !isempty(matchList) +function complete_path(path::AbstractString, + pos::Int; + use_envpath=false, + shell_escape=false, + string_escape=false) + ## TODO: enable this depwarn once Pkg is fixed + #Base.depwarn("complete_path with pos argument is deprecated because the return value [2] is incorrect to use", :complete_path) + paths, dir, success = complete_path(path; use_envpath, shell_escape, string_escape) + if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path) + # if the path is just "~", don't consider the expanded username as a prefix + if path == "~" + dir, prefix = homedir(), "" + else + dir, prefix = splitdir(homedir() * path[2:end]) + end + else + dir, prefix = splitdir(path) + end + startpos = pos - lastindex(prefix) + 1 + Sys.iswindows() && map!(paths, paths) do c::PathCompletion + # emulation for unnecessarily complicated return value, since / is a + # perfectly acceptable path character which does not require quoting + # but is required by Pkg's awkward parser handling + return endswith(c.path, "/") ? PathCompletion(chop(c.path) * "\\\\") : c + end + return paths, startpos:pos, success end function complete_expanduser(path::AbstractString, r) @@ -784,10 +816,11 @@ function afterusing(string::String, startpos::Int) return occursin(r"^\b(using|import)\s*((\w+[.])*\w+\s*,\s*)*$", str[fr:end]) end -function close_path_completion(str, startpos, r, paths, pos) +function close_path_completion(dir, paths, str, pos) length(paths) == 1 || return false # Only close if there's a single choice... - _path = str[startpos:prevind(str, first(r))] * (paths[1]::PathCompletion).path - path = expanduser(unescape_string(replace(_path, "\\\$"=>"\$", "\\\""=>"\""))) + path = (paths[1]::PathCompletion).path + path = unescape_string(replace(path, "\\\$"=>"\$")) + path = joinpath(dir, path) # ...except if it's a directory... try isdir(path) @@ -1075,42 +1108,80 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif return complete_identifiers!(Completion[], ffunc, context_module, string, string[startpos:pos], pos, dotpos, startpos) elseif inc_tag === :cmd - m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) - startpos = nextind(partial, reverseind(partial, m.offset)) - r = startpos:pos - - # This expansion with "\\ "=>' ' replacement and shell_escape=true - # assumes the path isn't further quoted within the cmd backticks. - expanded = complete_expanduser(replace(string[r], r"\\ " => " "), r) - expanded[3] && return expanded # If user expansion available, return it - - paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos, - shell_escape=true) - - return sort!(paths, by=p->p.path), r, success + # TODO: should this call shell_completions instead of partially reimplementing it? + let m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) # fuzzy shell_parse in reverse + startpos = nextind(partial, reverseind(partial, m.offset)) + r = startpos:pos + scs::String = string[r] + + expanded = complete_expanduser(scs, r) + expanded[3] && return expanded # If user expansion available, return it + + path::String = replace(scs, r"(\\+)\g1(\\?)`" => "\1\2`") # fuzzy unescape_raw_string: match an even number of \ before ` and replace with half as many + # This expansion with "\\ "=>' ' replacement and shell_escape=true + # assumes the path isn't further quoted within the cmd backticks. + path = replace(path, r"\\ " => " ", r"\$" => "\$") # fuzzy shell_parse (reversed by shell_escape_posixly) + paths, dir, success = complete_path(path, shell_escape=true, raw_escape=true) + + if success && !isempty(dir) + let dir = do_raw_escape(do_shell_escape(dir)) + # if escaping of dir matches scs prefix, remove that from the completions + # otherwise make it the whole completion + if endswith(dir, "/") && startswith(scs, dir) + r = (startpos + sizeof(dir)):pos + elseif startswith(scs, dir * "/") + r = nextind(string, startpos + sizeof(dir)):pos + else + map!(paths, paths) do c::PathCompletion + return PathCompletion(dir * "/" * c.path) + end + end + end + end + return sort!(paths, by=p->p.path), r, success + end elseif inc_tag === :string # Find first non-escaped quote - m = match(r"\"(?!\\)", reverse(partial)) - startpos = nextind(partial, reverseind(partial, m.offset)) - r = startpos:pos + let m = match(r"\"(?!\\)", reverse(partial)) + startpos = nextind(partial, reverseind(partial, m.offset)) + r = startpos:pos + scs::String = string[r] + + expanded = complete_expanduser(scs, r) + expanded[3] && return expanded # If user expansion available, return it + + path = try + unescape_string(replace(scs, "\\\$"=>"\$")) + catch ex + ex isa ArgumentError || rethrow() + nothing + end + if !isnothing(path) + paths, dir, success = complete_path(path::String, string_escape=true) - expanded = complete_expanduser(string[r], r) - expanded[3] && return expanded # If user expansion available, return it + if close_path_completion(dir, paths, path, pos) + paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"") + end - path_prefix = try - unescape_string(replace(string[r], "\\\$"=>"\$", "\\\""=>"\"")) - catch - nothing - end - if !isnothing(path_prefix) - paths, r, success = complete_path(path_prefix, pos, string_escape=true) + if success && !isempty(dir) + let dir = do_string_escape(dir) + # if escaping of dir matches scs prefix, remove that from the completions + # otherwise make it the whole completion + if endswith(dir, "/") && startswith(scs, dir) + r = (startpos + sizeof(dir)):pos + elseif startswith(scs, dir * "/") + r = nextind(string, startpos + sizeof(dir)):pos + else + map!(paths, paths) do c::PathCompletion + return PathCompletion(dir * "/" * c.path) + end + end + end + end - if close_path_completion(string, startpos, r, paths, pos) - paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"") + # Fallthrough allowed so that Latex symbols can be completed in strings + success && return sort!(paths, by=p->p.path), r, success end - - # Fallthrough allowed so that Latex symbols can be completed in strings - success && return sort!(paths, by=p->p.path), r, success end end @@ -1196,35 +1267,58 @@ end function shell_completions(string, pos) # First parse everything up to the current position scs = string[1:pos] - local args, last_parse - try - args, last_parse = Base.shell_parse(scs, true)::Tuple{Expr,UnitRange{Int}} - catch + args, last_arg_start = try + Base.shell_parse(scs, true)::Tuple{Expr,Int} + catch ex + ex isa ArgumentError || ex isa ErrorException || rethrow() return Completion[], 0:-1, false end ex = args.args[end]::Expr # Now look at the last thing we parsed isempty(ex.args) && return Completion[], 0:-1, false - arg = ex.args[end] - if all(s -> isa(s, AbstractString), ex.args) - arg = arg::AbstractString - # Treat this as a path - - # As Base.shell_parse throws away trailing spaces (unless they are escaped), - # we need to special case here. - # If the last char was a space, but shell_parse ignored it search on "". - ignore_last_word = arg != " " && scs[end] == ' ' - prefix = ignore_last_word ? "" : join(ex.args) + lastarg = ex.args[end] + # As Base.shell_parse throws away trailing spaces (unless they are escaped), + # we need to special case here. + # If the last char was a space, but shell_parse ignored it search on "". + if isexpr(lastarg, :incomplete) || isexpr(lastarg, :error) + partial = string[last_arg_start:pos] + ret, range = completions(partial, lastindex(partial)) + range = range .+ (last_arg_start - 1) + return ret, range, true + elseif endswith(scs, ' ') && !endswith(scs, "\\ ") + r = pos+1:pos + paths, dir, success = complete_path("", use_envpath=false, shell_escape=true) + return paths, r, success + elseif all(arg -> arg isa AbstractString, ex.args) + # Join these and treat this as a path + path::String = join(ex.args) + r = last_arg_start:pos # Also try looking into the env path if the user wants to complete the first argument - use_envpath = !ignore_last_word && length(args.args) < 2 + use_envpath = length(args.args) < 2 - return complete_path(prefix, pos, use_envpath=use_envpath, shell_escape=true) - elseif isexpr(arg, :incomplete) || isexpr(arg, :error) - partial = scs[last_parse] - ret, range = completions(partial, lastindex(partial)) - range = range .+ (first(last_parse) - 1) - return ret, range, true + # TODO: call complete_expanduser here? + + paths, dir, success = complete_path(path, use_envpath=use_envpath, shell_escape=true) + + if success && !isempty(dir) + let dir = do_shell_escape(dir) + # if escaping of dir matches scs prefix, remove that from the completions + # otherwise make it the whole completion + partial = string[last_arg_start:pos] + if endswith(dir, "/") && startswith(partial, dir) + r = (last_arg_start + sizeof(dir)):pos + elseif startswith(partial, dir * "/") + r = nextind(string, last_arg_start + sizeof(dir)):pos + else + map!(paths, paths) do c::PathCompletion + return PathCompletion(dir * "/" * c.path) + end + end + end + end + + return paths, r, success end return Completion[], 0:-1, false end diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl index c5a2d1c8e006e..c515ec5927dd3 100644 --- a/stdlib/REPL/test/replcompletions.jl +++ b/stdlib/REPL/test/replcompletions.jl @@ -325,7 +325,7 @@ end let s = "\\alpha" c, r = test_bslashcomplete(s) @test c[1] == "α" - @test r == 1:length(s) + @test r == 1:lastindex(s) @test length(c) == 1 end @@ -1041,7 +1041,7 @@ let s, c, r s = "@show \"/dev/nul\"" c,r = completions(s, 15) c = map(completion_text, c) - @test "null" in c + @test "null\"" in c @test r == 13:15 @test s[r] == "nul" @@ -1065,8 +1065,8 @@ let s, c, r if !isdir(joinpath(s, "tmp")) c,r = test_scomplete(s) @test !("tmp/" in c) - @test r === length(s) + 1:0 - @test s[r] == "" + @test !("$s/tmp/" in c) + @test r === (sizeof(s) + 1):sizeof(s) end s = "cd \$(Iter" @@ -1091,7 +1091,7 @@ let s, c, r touch(file) s = string(tempdir(), "/repl\\ ") c,r = test_scomplete(s) - @test ["repl\\ completions"] == c + @test ["'repl completions'"] == c @test s[r] == "repl\\ " rm(file) end @@ -1185,7 +1185,7 @@ let current_dir, forbidden catch e e isa Base.IOError && occursin("ELOOP", e.msg) end - c, r = test_complete("\""*escape_string(joinpath(path, "selfsym"))) + c, r = test_complete("\"$(escape_string(path))/selfsym") @test c == ["selfsymlink"] end end @@ -1222,20 +1222,20 @@ mktempdir() do path dir_space = replace(space_folder, " " => "\\ ") s = Sys.iswindows() ? "cd $dir_space\\\\space" : "cd $dir_space/space" c, r = test_scomplete(s) - @test s[r] == "space" - @test "space\\ .file" in c + @test s[r] == (Sys.iswindows() ? "$dir_space\\\\space" : "$dir_space/space") + @test "'$space_folder'/'space .file'" in c # Also use shell escape rules within cmd backticks s = "`$s" c, r = test_scomplete(s) - @test s[r] == "space" - @test "space\\ .file" in c + @test s[r] == (Sys.iswindows() ? "$dir_space\\\\space" : "$dir_space/space") + @test "'$space_folder'/'space .file'" in c # escape string according to Julia escaping rules - julia_esc(str) = escape_string(str, ('\"','$')) + julia_esc(str) = REPL.REPLCompletions.do_string_escape(str) # For normal strings the string should be properly escaped according to # the usual rules for Julia strings. - s = "cd(\"" * julia_esc(joinpath(path, space_folder, "space")) + s = "cd(\"" * julia_esc(joinpath(path, space_folder) * "/space") c, r = test_complete(s) @test s[r] == "space" @test "space .file\"" in c @@ -1244,7 +1244,7 @@ mktempdir() do path # which needs to be escaped in Julia strings (on unix we could do this # test with all sorts of special chars) touch(joinpath(space_folder, "needs_escape\$.file")) - escpath = julia_esc(joinpath(path, space_folder, "needs_escape\$")) + escpath = julia_esc(joinpath(path, space_folder) * "/needs_escape\$") s = "cd(\"$escpath" c, r = test_complete(s) @test s[r] == "needs_escape\\\$" @@ -1281,12 +1281,12 @@ mktempdir() do path # in shell commands the shell path completion cannot complete # paths with these characters c, r, res = test_scomplete(test_dir) - @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/") + @test c[1] == "'$test_dir/'" @test res end escdir = julia_esc(test_dir) c, r, res = test_complete("\""*escdir) - @test c[1] == escdir*(Sys.iswindows() ? "\\\\" : "/") + @test c[1] == escdir * "/" @test res finally rm(joinpath(path, test_dir), recursive=true) @@ -1322,27 +1322,43 @@ if Sys.iswindows() cd(path) do s = "cd ..\\\\" c,r = test_scomplete(s) - @test r == length(s)+1:length(s) - @test temp_name * "\\\\" in c + @test r == lastindex(s)-3:lastindex(s) + @test "../$temp_name/" in c + + s = "cd ../" + c,r = test_scomplete(s) + @test r == lastindex(s)+1:lastindex(s) + @test "$temp_name/" in c s = "ls $(file[1:2])" c,r = test_scomplete(s) - @test r == length(s)-1:length(s) + @test r == lastindex(s)-1:lastindex(s) @test file in c s = "cd(\"..\\\\" c,r = test_complete(s) - @test r == length(s)+1:length(s) - @test temp_name * "\\\\" in c + @test r == lastindex(s)-3:lastindex(s) + @test "../$temp_name/" in c + + s = "cd(\"../" + c,r = test_complete(s) + @test r == lastindex(s)+1:lastindex(s) + @test "$temp_name/" in c s = "cd(\"$(file[1:2])" c,r = test_complete(s) - @test r == length(s) - 1:length(s) + @test r == lastindex(s) - 1:lastindex(s) @test (length(c) > 1 && file in c) || (["$file\""] == c) end rm(tmp) end +# issue 51985 +let s = "`\\" + c,r = test_scomplete(s) + @test r == lastindex(s)+1:lastindex(s) +end + # auto completions of true and false... issue #14101 let s = "tru" c, r, res = test_complete(s) diff --git a/test/spawn.jl b/test/spawn.jl index 3fdfa794ff39e..1fab652199ee0 100644 --- a/test/spawn.jl +++ b/test/spawn.jl @@ -660,9 +660,21 @@ let p = run(`$sleepcmd 100`, wait=false) kill(p) end -# Second argument of shell_parse +# Second return of shell_parse let s = " \$abc " - @test s[Base.shell_parse(s)[2]] == "abc" + @test Base.shell_parse(s)[2] === findfirst('a', s) + s = "abc def" + @test Base.shell_parse(s)[2] === findfirst('d', s) + s = "abc 'de'f\"\"g" + @test Base.shell_parse(s)[2] === findfirst('\'', s) + s = "abc \$x'de'f\"\"g" + @test Base.shell_parse(s)[2] === findfirst('\'', s) + s = "abc def\$x'g'" + @test Base.shell_parse(s)[2] === findfirst('\'', s) + s = "abc def\$x " + @test Base.shell_parse(s)[2] === findfirst('x', s) + s = "abc \$(d)ef\$(x " + @test Base.shell_parse(s)[2] === findfirst('x', s) - 1 end # Logging macros should not output to finalized streams (#26687)