Skip to content

Commit

Permalink
Merge pull request #5451 from tanmaykm/tanmaykm
Browse files Browse the repository at this point in the history
Faster bytestring for SubString{ByteString} and splitsub to split into substrings
  • Loading branch information
StefanKarpinski committed Jan 28, 2014
2 parents 6bebb9b + 16d9ef6 commit 1234340
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 14 deletions.
2 changes: 1 addition & 1 deletion base/help.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ function clear_cache()
end

function decor_help_desc(func::String, mfunc::String, desc::String)
sd = split(desc, '\n')
sd = convert(Array{ByteString,1}, split(desc, '\n'))
for i = 1:length(sd)
if beginswith(sd[i], func)
sd[i] = mfunc * sd[i][length(func)+1:end]
Expand Down
26 changes: 15 additions & 11 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,8 @@ prevind(s::SubString, i::Integer) = prevind(s.string, i+s.offset)-s.offset

convert{T<:String}(::Type{SubString{T}}, s::T) = SubString(s, 1, endof(s))

bytestring{T <: ByteString}(p::SubString{T}) = bytestring(pointer(p.string.data)+p.offset, nextind(p, p.endof)-1)

function serialize{T}(s, ss::SubString{T})
# avoid saving a copy of the parent string, keeping the type of ss
invoke(serialize, (Any,Any), s, convert(SubString{T}, convert(T,ss)))
Expand Down Expand Up @@ -1247,19 +1249,20 @@ lpad(s, n::Integer, p=" ") = lpad(string(s),n,string(p))
rpad(s, n::Integer, p=" ") = rpad(string(s),n,string(p))
cpad(s, n::Integer, p=" ") = rpad(lpad(s,div(n+strwidth(s),2),p),n,p)


# splitter can be a Char, Vector{Char}, String, Regex, ...
# any splitter that provides search(s::String, splitter)

function split(str::String, splitter, limit::Integer, keep_empty::Bool)
strs = String[]
split{T<:SubString}(str::T, splitter, limit::Integer, keep_empty::Bool) = _split(str, splitter, limit, keep_empty, T[])
split{T<:String}(str::T, splitter, limit::Integer, keep_empty::Bool) = _split(str, splitter, limit, keep_empty, SubString{T}[])
function _split{T<:String,U<:Array}(str::T, splitter, limit::Integer, keep_empty::Bool, strs::U)
i = start(str)
n = endof(str)
r = search(str,splitter,i)
j, k = first(r), nextind(str,last(r))
while 0 < j <= n && length(strs) != limit-1
if i < k
if keep_empty || i < j
push!(strs, str[i:prevind(str,j)])
push!(strs, SubString(str,i,prevind(str,j)))
end
i = k
end
Expand All @@ -1268,7 +1271,7 @@ function split(str::String, splitter, limit::Integer, keep_empty::Bool)
j, k = first(r), nextind(str,last(r))
end
if keep_empty || !done(str,i)
push!(strs, str[i:end])
push!(strs, SubString(str,i))
end
return strs
end
Expand All @@ -1278,34 +1281,35 @@ split(s::String, spl) = split(s, spl, 0, true)

# a bit oddball, but standard behavior in Perl, Ruby & Python:
const _default_delims = [' ','\t','\n','\v','\f','\r']
split(str::String) = split(str, _default_delims, 0, false)
split(str::String) = split(str, _default_delims, 0, false)

function rsplit(str::String, splitter, limit::Integer, keep_empty::Bool)
strs = String[]

rsplit{T<:SubString}(str::T, splitter, limit::Integer, keep_empty::Bool) = _rsplit(str, splitter, limit, keep_empty, T[])
rsplit{T<:String}(str::T, splitter, limit::Integer, keep_empty::Bool) = _rsplit(str, splitter, limit, keep_empty, SubString{T}[])
function _rsplit{T<:String,U<:Array}(str::T, splitter, limit::Integer, keep_empty::Bool, strs::U)
i = start(str)
n = endof(str)
r = rsearch(str,splitter)
j = first(r)-1
k = last(r)
while((0 <= j < n) && (length(strs) != limit-1))
if i <= k
(keep_empty || (k < n)) && unshift!(strs, str[k+1:n])
(keep_empty || (k < n)) && unshift!(strs, SubString(str,k+1,n))
n = j
end
(k <= j) && (j = prevind(str,j))
r = rsearch(str,splitter,j)
j = first(r)-1
k = last(r)
end
(keep_empty || (n > 0)) && unshift!(strs, str[1:n])
(keep_empty || (n > 0)) && unshift!(strs, SubString(str,1,n))
return strs
end
rsplit(s::String, spl, n::Integer) = rsplit(s, spl, n, true)
rsplit(s::String, spl, keep::Bool) = rsplit(s, spl, 0, keep)
rsplit(s::String, spl) = rsplit(s, spl, 0, true)
#rsplit(str::String) = rsplit(str, _default_delims, 0, false)


function replace(str::ByteString, pattern, repl::Function, limit::Integer)
n = 1
e = endof(str)
Expand Down
2 changes: 1 addition & 1 deletion doc/helpdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1509,7 +1509,7 @@

("Strings","Base","split","split(string, [chars, [limit,] [include_empty]])
Return an array of strings by splitting the given string on
Return an array of substrings by splitting the given string on
occurrences of the given character delimiters, which may be
specified in any of the formats allowed by \"search\"'s second
argument (i.e. a single character, collection of characters,
Expand Down
2 changes: 1 addition & 1 deletion doc/stdlib/base.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1011,7 +1011,7 @@ Strings

.. function:: split(string, [chars, [limit,] [include_empty]])

Return an array of strings by splitting the given string on occurrences of the given character delimiters, which may be specified in any of the formats allowed by ``search``'s second argument (i.e. a single character, collection of characters, string, or regular expression). If ``chars`` is omitted, it defaults to the set of all space characters, and ``include_empty`` is taken to be false. The last two arguments are also optional: they are are a maximum size for the result and a flag determining whether empty fields should be included in the result.
Return an array of substrings by splitting the given string on occurrences of the given character delimiters, which may be specified in any of the formats allowed by ``search``'s second argument (i.e. a single character, collection of characters, string, or regular expression). If ``chars`` is omitted, it defaults to the set of all space characters, and ``include_empty`` is taken to be false. The last two arguments are also optional: they are are a maximum size for the result and a flag determining whether empty fields should be included in the result.

.. function:: rsplit(string, [chars, [limit,] [include_empty]])

Expand Down

0 comments on commit 1234340

Please sign in to comment.