-
Notifications
You must be signed in to change notification settings - Fork 370
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support multithreading in groupreduce #2491
base: main
Are you sure you want to change the base?
Changes from all commits
2d57734
713d5b8
3b5addb
ab76ff8
3e225ad
011a9b8
9595755
8000e2e
d7192d5
cc8d2d4
08aa0d9
2678ccb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ module DataFrames | |
using Statistics, Printf, REPL | ||
using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays | ||
@reexport using Missings, InvertedIndices | ||
using Base.Sort, Base.Order, Base.Iterators | ||
using Base.Sort, Base.Order, Base.Iterators, Base.Threads | ||
using TableTraits, IteratorInterfaceExtensions | ||
import LinearAlgebra: norm | ||
using Markdown | ||
|
@@ -91,6 +91,8 @@ else | |
export only | ||
end | ||
|
||
const NTHREADS = Ref(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe add a docstring? |
||
|
||
include("other/utils.jl") | ||
include("other/index.jl") | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -163,18 +163,80 @@ function groupreduce!(res::AbstractVector, f, op, condf, adjust, checkempty::Boo | |||||||||
counts = zeros(Int, n) | ||||||||||
end | ||||||||||
groups = gd.groups | ||||||||||
@inbounds for i in eachindex(incol, groups) | ||||||||||
gix = groups[i] | ||||||||||
x = incol[i] | ||||||||||
if gix > 0 && (condf === nothing || condf(x)) | ||||||||||
# this check should be optimized out if U is not Any | ||||||||||
if eltype(res) === Any && !isassigned(res, gix) | ||||||||||
res[gix] = f(x, gix) | ||||||||||
else | ||||||||||
res[gix] = op(res[gix], f(x, gix)) | ||||||||||
nt = min(NTHREADS[], Threads.nthreads()) | ||||||||||
if nt <= 1 || axes(incol) != axes(groups) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since |
||||||||||
@inbounds for i in eachindex(incol, groups) | ||||||||||
gix = groups[i] | ||||||||||
x = incol[i] | ||||||||||
if gix > 0 && (condf === nothing || condf(x)) | ||||||||||
# this check should be optimized out if eltype is not Any | ||||||||||
if eltype(res) === Any && !isassigned(res, gix) | ||||||||||
res[gix] = f(x, gix) | ||||||||||
else | ||||||||||
res[gix] = op(res[gix], f(x, gix)) | ||||||||||
end | ||||||||||
if adjust !== nothing || checkempty | ||||||||||
counts[gix] += 1 | ||||||||||
end | ||||||||||
end | ||||||||||
end | ||||||||||
else | ||||||||||
res_vec = Vector{typeof(res)}(undef, nt) | ||||||||||
# needs to be always allocated to fix type instability with @threads | ||||||||||
counts_vec = Vector{Vector{Int}}(undef, nt) | ||||||||||
res_vec[1] = res | ||||||||||
if adjust !== nothing || checkempty | ||||||||||
counts_vec[1] = counts | ||||||||||
end | ||||||||||
for i in 2:nt | ||||||||||
res_vec[i] = copy(res) | ||||||||||
if adjust !== nothing || checkempty | ||||||||||
counts[gix] += 1 | ||||||||||
counts_vec[i] = zeros(Int, n) | ||||||||||
end | ||||||||||
end | ||||||||||
@sync for tid in 1:nt | ||||||||||
Threads.@spawn begin | ||||||||||
res′ = res_vec[tid] | ||||||||||
if adjust !== nothing || checkempty | ||||||||||
counts′ = counts_vec[tid] | ||||||||||
end | ||||||||||
start = 1 + ((tid - 1) * length(groups)) ÷ nt | ||||||||||
stop = (tid * length(groups)) ÷ nt | ||||||||||
Comment on lines
+203
to
+204
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. avoid overflow on 32-bit machines, on 64-bit machine this is a no-op
Suggested change
|
||||||||||
@inbounds for i in start:stop | ||||||||||
gix = groups[i] | ||||||||||
x = incol[i] | ||||||||||
if gix > 0 && (condf === nothing || condf(x)) | ||||||||||
# this check should be optimized out if eltype is not Any | ||||||||||
if eltype(res′) === Any && !isassigned(res′, gix) | ||||||||||
res′[gix] = f(x, gix) | ||||||||||
else | ||||||||||
res′[gix] = op(res′[gix], f(x, gix)) | ||||||||||
end | ||||||||||
if adjust !== nothing || checkempty | ||||||||||
counts′[gix] += 1 | ||||||||||
end | ||||||||||
end | ||||||||||
end | ||||||||||
end | ||||||||||
end | ||||||||||
for i in 2:length(res_vec) | ||||||||||
resi = res_vec[i] | ||||||||||
@inbounds @simd for j in eachindex(res) | ||||||||||
# this check should be optimized out if eltype is not Any | ||||||||||
if eltype(res) === Any | ||||||||||
if isassigned(resi, j) && isassigned(res, j) | ||||||||||
res[j] = op(res[j], resi[j]) | ||||||||||
elseif isassigned(resi, j) | ||||||||||
res[j] = resi[j] | ||||||||||
end | ||||||||||
else | ||||||||||
res[j] = op(res[j], resi[j]) | ||||||||||
end | ||||||||||
end | ||||||||||
end | ||||||||||
if adjust !== nothing || checkempty | ||||||||||
for i in 2:length(counts_vec) | ||||||||||
counts .+= counts_vec[i] | ||||||||||
end | ||||||||||
end | ||||||||||
end | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.