-
Notifications
You must be signed in to change notification settings - Fork 141
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize findIndex, findIndexEnd and map #347
Conversation
Here is the relevant piece of core that @Bodigrim was curious about:map [InlPrag=NOUSERINLINE[2]]
:: (Word8 -> Word8) -> ByteString -> ByteString
[GblId,
Arity=2,
Str=<L,C(U(U))><S,1*U(U,U,U)>,
Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
WorkFree=True, Expandable=True,
Guidance=ALWAYS_IF(arity=2,unsat_ok=True,boring_ok=False)
Tmpl= \ (w_sg45 [Occ=Once] :: Word8 -> Word8)
(w1_sg46 [Occ=Once!] :: ByteString) ->
case w1_sg46 of
{ BS ww1_sg49 [Occ=Once] ww2_sg4a [Occ=Once] ww3_sg4b [Occ=Once] ->
Data.ByteString.$wmap w_sg45 ww1_sg49 ww2_sg4a ww3_sg4b
}}]
map
= \ (w_sg45 :: Word8 -> Word8) (w1_sg46 :: ByteString) ->
case w1_sg46 of { BS ww1_sg49 ww2_sg4a ww3_sg4b ->
Data.ByteString.$wmap w_sg45 ww1_sg49 ww2_sg4a ww3_sg4b
}
Data.ByteString.$wmap [InlPrag=NOUSERINLINE[2]]
:: (Word8 -> Word8)
-> GHC.Prim.Addr#
-> GHC.ForeignPtr.ForeignPtrContents
-> GHC.Prim.Int#
-> ByteString
[GblId,
Arity=4,
Str=<L,C(U(U))><L,U><L,U><L,U>,
Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
WorkFree=True, Expandable=True, Guidance=IF_ARGS [60 0 0 0] 171 0}]
Data.ByteString.$wmap
= \ (w_sg45 :: Word8 -> Word8)
(ww_sg49 :: GHC.Prim.Addr#)
(ww1_sg4a :: GHC.ForeignPtr.ForeignPtrContents)
(ww2_sg4b :: GHC.Prim.Int#) ->
case GHC.Magic.runRW#
@ ('GHC.Types.TupleRep
'[ 'GHC.Types.TupleRep '[], 'GHC.Types.LiftedRep])
@ (# GHC.Prim.State# GHC.Prim.RealWorld, ByteString #)
(\ (s_i7oX [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
case GHC.Prim.<# ww2_sg4b 0# of {
__DEFAULT ->
case GHC.Prim.newPinnedByteArray#
@ GHC.Prim.RealWorld ww2_sg4b s_i7oX
of
{ (# ipv_i6vL, ipv1_i6vM #) ->
let {
ipv2_s9Jh :: GHC.Prim.Addr#
[LclId]
ipv2_s9Jh
= GHC.Prim.byteArrayContents#
(ipv1_i6vM
`cast` (UnsafeCo representational (GHC.Prim.MutableByteArray#
GHC.Prim.RealWorld) GHC.Prim.ByteArray#
:: GHC.Prim.MutableByteArray# GHC.Prim.RealWorld
~R# GHC.Prim.ByteArray#)) } in
join {
$w$j_sg3X [InlPrag=NOUSERINLINE[2], Dmd=<C(S),C(U)>]
:: GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld, ByteString #)
[LclId[JoinId(1)], Arity=1, Str=<L,U>]
$w$j_sg3X (w1_sg3V [OS=OneShot]
:: GHC.Prim.State# GHC.Prim.RealWorld)
= let {
ipv3_s9Jg :: GHC.ForeignPtr.ForeignPtrContents
[LclId, Unf=OtherCon []]
ipv3_s9Jg = GHC.ForeignPtr.PlainPtr ipv1_i6vM } in
case GHC.Prim.touch#
@ 'GHC.Types.LiftedRep
@ GHC.ForeignPtr.ForeignPtrContents
ipv3_s9Jg
w1_sg3V
of s'_i6w1
{ __DEFAULT ->
case GHC.Prim.touch#
@ 'GHC.Types.LiftedRep
@ GHC.ForeignPtr.ForeignPtrContents
ww1_sg4a
s'_i6w1
of s'1_i7pa
{ __DEFAULT ->
(# s'1_i7pa,
Data.ByteString.Internal.BS ipv2_s9Jh ipv3_s9Jg ww2_sg4b #)
}
} } in
joinrec {
$wmap__sg44 [InlPrag=NOUSERINLINE[2], Occ=LoopBreaker]
:: GHC.Prim.Int#
-> GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld, ByteString #)
[LclId[JoinId(2)], Arity=2, Str=<L,U><L,U>, Unf=OtherCon []]
$wmap__sg44 (ww3_sg42 :: GHC.Prim.Int#)
(w1_sg3Z :: GHC.Prim.State# GHC.Prim.RealWorld)
= case GHC.Prim.>=# ww3_sg42 ww2_sg4b of {
__DEFAULT ->
case GHC.Prim.readWord8OffAddr#
@ GHC.Prim.RealWorld
(GHC.Prim.plusAddr# ww_sg49 ww3_sg42)
0#
w1_sg3Z
of
{ (# ipv4_i7qw, ipv5_i7qx #) ->
case w_sg45 (GHC.Word.W8# ipv5_i7qx) of { W8# x_i7tG ->
case GHC.Prim.writeWord8OffAddr#
@ GHC.Prim.RealWorld
(GHC.Prim.plusAddr# ipv2_s9Jh ww3_sg42)
0#
x_i7tG
ipv4_i7qw
of s2_i7tI
{ __DEFAULT ->
jump $wmap__sg44 (GHC.Prim.+# ww3_sg42 1#) s2_i7tI
}
}
};
1# -> jump $w$j_sg3X w1_sg3Z
}; } in
jump $wmap__sg44 0# ipv_i6vL
};
1# ->
case GHC.ForeignPtr.mallocPlainForeignPtrBytes2 of wild_00 { }
})
of
{ (# ipv_i6Ap, ipv1_i6Aq #) ->
ipv1_i6Aq
} It does indeed have the join points that you had mentioned wanting to see. |
And here is the old core:map [InlPrag=INLINE (sat-args=2)]
:: (Word8 -> Word8) -> ByteString -> ByteString
[GblId,
Arity=2,
Str=<L,C(U(U))><S,1*U(U,U,U)>,
Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
WorkFree=True, Expandable=True,
Guidance=ALWAYS_IF(arity=2,unsat_ok=False,boring_ok=False)
Tmpl= \ (f_a46u [Occ=OnceL!] :: Word8 -> Word8)
(ds_d6Xw [Occ=Once!] :: ByteString) ->
case ds_d6Xw of
{ BS dt_d7n1 [Occ=Once] dt1_d7n2 [Occ=Once] dt2_d7n3 ->
let {
len_a46w [Occ=OnceL] :: Int
[LclId, Unf=OtherCon []]
len_a46w = GHC.Types.I# dt2_d7n3 } in
letrec {
map__a46x [Occ=LoopBreaker]
:: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
[LclId, Arity=3, Unf=OtherCon []]
map__a46x
= \ (n_a46y [Occ=Once!] :: Int)
(p1_a46z [Occ=Once!] :: Ptr Word8)
(p2_a46A [Occ=Once!] :: Ptr Word8) ->
case n_a46y of n1_X46K [Occ=Once] { GHC.Types.I# ipv_s87g ->
case p1_a46z of p4_X46M [Occ=Once]
{ GHC.Ptr.Ptr ipv1_s87j [Occ=Once] ->
case p2_a46A of p5_X46O [Occ=Once]
{ GHC.Ptr.Ptr ipv2_s87m [Occ=Once] ->
case GHC.Classes.geInt n1_X46K len_a46w of {
False ->
(\ (s_i7pp [Occ=Once] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
case GHC.Prim.readWord8OffAddr#
@ GHC.Prim.RealWorld
(GHC.Prim.plusAddr# ipv1_s87j ipv_s87g)
0#
s_i7pp
of
{ (# ipv3_i7qk [Occ=Once], ipv4_i7ql [Occ=Once] #) ->
case f_a46u (GHC.Word.W8# ipv4_i7ql) of { W8# x_i7EL [Occ=Once] ->
case GHC.Prim.writeWord8OffAddr#
@ GHC.Prim.RealWorld
(GHC.Prim.plusAddr# ipv2_s87m ipv_s87g)
0#
x_i7EL
ipv3_i7qk
of s2_i7EN [Occ=Once]
{ __DEFAULT ->
((map__a46x
(GHC.Types.I# (GHC.Prim.+# ipv_s87g 1#)) p4_X46M p5_X46O)
`cast` (GHC.Types.N:IO[0] <()>_R
:: IO ()
~R# (GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld, () #))))
s2_i7EN
}
}
})
`cast` (Sym (GHC.Types.N:IO[0] <()>_R)
:: (GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld, () #))
~R# IO ());
True ->
(\ (s_i7qz [Occ=Once] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
(# s_i7qz, GHC.Tuple.() #))
`cast` (Sym (GHC.Types.N:IO[0] <()>_R)
:: (GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld, () #))
~R# IO ())
}
}
}
}; } in
case GHC.Magic.runRW#
@ ('GHC.Types.TupleRep
'[ 'GHC.Types.TupleRep '[], 'GHC.Types.LiftedRep])
@ (# GHC.Prim.State# GHC.Prim.RealWorld, ByteString #)
(\ (s_i7oL [Occ=Once, OS=OneShot]
:: GHC.Prim.State# GHC.Prim.RealWorld) ->
case GHC.Prim.<# dt2_d7n3 0# of {
__DEFAULT ->
case GHC.Prim.newPinnedByteArray#
@ GHC.Prim.RealWorld dt2_d7n3 s_i7oL
of
{ (# ipv_i6vt [Occ=Once], ipv1_i6vu #) ->
let {
ipv2_i6vs :: GHC.Prim.Addr#
[LclId]
ipv2_i6vs
= GHC.Prim.byteArrayContents#
(ipv1_i6vu
`cast` (UnsafeCo representational (GHC.Prim.MutableByteArray#
GHC.Prim.RealWorld) GHC.Prim.ByteArray#
:: GHC.Prim.MutableByteArray# GHC.Prim.RealWorld
~R# GHC.Prim.ByteArray#)) } in
let {
ipv3_i6vw :: GHC.ForeignPtr.ForeignPtrContents
[LclId, Unf=OtherCon []]
ipv3_i6vw = GHC.ForeignPtr.PlainPtr ipv1_i6vu } in
case ((map__a46x
(GHC.Types.I# 0#)
(GHC.Ptr.Ptr @ Word8 dt_d7n1)
(GHC.Ptr.Ptr @ Word8 ipv2_i6vs))
`cast` (GHC.Types.N:IO[0] <()>_R
:: IO ()
~R# (GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld,
() #))))
ipv_i6vt
of
{ (# ipv4_i6vG [Occ=Once], _ [Occ=Dead] #) ->
case GHC.Prim.touch#
@ 'GHC.Types.LiftedRep
@ GHC.ForeignPtr.ForeignPtrContents
ipv3_i6vw
ipv4_i6vG
of s'_i6vJ [Occ=Once]
{ __DEFAULT ->
case GHC.Prim.touch#
@ 'GHC.Types.LiftedRep
@ GHC.ForeignPtr.ForeignPtrContents
dt1_d7n2
s'_i6vJ
of s'1_i7oY [Occ=Once]
{ __DEFAULT ->
(# s'1_i7oY,
Data.ByteString.Internal.BS ipv2_i6vs ipv3_i6vw dt2_d7n3 #)
}
}
}
};
1# -> case GHC.ForeignPtr.mallocPlainForeignPtrBytes2 of { }
})
of
{ (# _ [Occ=Dead], ipv1_i6A8 [Occ=Once] #) ->
ipv1_i6A8
}
}}]
map
= \ (f_a46u :: Word8 -> Word8) (ds_d6Xw :: ByteString) ->
case ds_d6Xw of { BS dt_d7n1 dt1_d7n2 dt2_d7n3 ->
case GHC.Magic.runRW#
@ ('GHC.Types.TupleRep
'[ 'GHC.Types.TupleRep '[], 'GHC.Types.LiftedRep])
@ (# GHC.Prim.State# GHC.Prim.RealWorld, ByteString #)
(\ (s_i7oL [OS=OneShot] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
case GHC.Prim.<# dt2_d7n3 0# of {
__DEFAULT ->
case GHC.Prim.newPinnedByteArray#
@ GHC.Prim.RealWorld dt2_d7n3 s_i7oL
of
{ (# ipv_i6vt, ipv1_i6vu #) ->
let {
ipv2_s9HJ :: GHC.Prim.Addr#
[LclId]
ipv2_s9HJ
= GHC.Prim.byteArrayContents#
(ipv1_i6vu
`cast` (UnsafeCo representational (GHC.Prim.MutableByteArray#
GHC.Prim.RealWorld) GHC.Prim.ByteArray#
:: GHC.Prim.MutableByteArray# GHC.Prim.RealWorld
~R# GHC.Prim.ByteArray#)) } in
join {
$w$j_sgj9 [InlPrag=NOUSERINLINE[2], Dmd=<C(S),C(U)>]
:: GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld, ByteString #)
[LclId[JoinId(1)], Arity=1, Str=<L,U>]
$w$j_sgj9 (w_sgj7 [OS=OneShot]
:: GHC.Prim.State# GHC.Prim.RealWorld)
= let {
ipv3_s9HI :: GHC.ForeignPtr.ForeignPtrContents
[LclId, Unf=OtherCon []]
ipv3_s9HI = GHC.ForeignPtr.PlainPtr ipv1_i6vu } in
case GHC.Prim.touch#
@ 'GHC.Types.LiftedRep
@ GHC.ForeignPtr.ForeignPtrContents
ipv3_s9HI
w_sgj7
of s'_i6vJ
{ __DEFAULT ->
case GHC.Prim.touch#
@ 'GHC.Types.LiftedRep
@ GHC.ForeignPtr.ForeignPtrContents
dt1_d7n2
s'_i6vJ
of s'1_i7oY
{ __DEFAULT ->
(# s'1_i7oY,
Data.ByteString.Internal.BS ipv2_s9HJ ipv3_s9HI dt2_d7n3 #)
}
} } in
joinrec {
$wmap__sgjq [InlPrag=NOUSERINLINE[2], Occ=LoopBreaker]
:: GHC.Prim.Int#
-> GHC.Prim.Addr#
-> GHC.Prim.Addr#
-> GHC.Prim.State# GHC.Prim.RealWorld
-> (# GHC.Prim.State# GHC.Prim.RealWorld, ByteString #)
[LclId[JoinId(4)],
Arity=4,
Str=<L,U><L,U><L,U><L,U>,
Unf=OtherCon []]
$wmap__sgjq (ww_sgjg :: GHC.Prim.Int#)
(ww1_sgjk :: GHC.Prim.Addr#)
(ww2_sgjo :: GHC.Prim.Addr#)
(w_sgjd :: GHC.Prim.State# GHC.Prim.RealWorld)
= case GHC.Prim.>=# ww_sgjg dt2_d7n3 of {
__DEFAULT ->
case GHC.Prim.readWord8OffAddr#
@ GHC.Prim.RealWorld
(GHC.Prim.plusAddr# ww1_sgjk ww_sgjg)
0#
w_sgjd
of
{ (# ipv4_i7qk, ipv5_i7ql #) ->
case f_a46u (GHC.Word.W8# ipv5_i7ql) of { W8# x_i7EL ->
case GHC.Prim.writeWord8OffAddr#
@ GHC.Prim.RealWorld
(GHC.Prim.plusAddr# ww2_sgjo ww_sgjg)
0#
x_i7EL
ipv4_i7qk
of s2_i7EN
{ __DEFAULT ->
jump $wmap__sgjq (GHC.Prim.+# ww_sgjg 1#) ww1_sgjk ww2_sgjo s2_i7EN
}
}
};
1# -> jump $w$j_sgj9 w_sgjd
}; } in
jump $wmap__sgjq 0# dt_d7n1 ipv2_s9HJ ipv_i6vt
};
1# ->
case GHC.ForeignPtr.mallocPlainForeignPtrBytes2 of wild1_00 { }
})
of
{ (# ipv_i6A7, ipv1_i6A8 #) ->
ipv1_i6A8
}
} Not entirely sure what lessons to draw from it, though it looks to me like the previous version doesn't have the worker wrapper transformation and the core inner loop take 3 arguments (both addresses and offset). |
Not much, it is too verbose to diff. Could you please regenerate Core using |
Here you go: Old: map :: (Word8 -> Word8) -> ByteString -> ByteString
map
= \ (f_a46t :: Word8 -> Word8) (ds_d6Xv :: ByteString) ->
case ds_d6Xv of { BS dt_d7n0 dt1_d7n1 dt2_d7n2 ->
case runRW#
(\ (s_i7oK :: State# RealWorld) ->
case <# dt2_d7n2 0# of {
__DEFAULT ->
case newPinnedByteArray# dt2_d7n2 s_i7oK of
{ (# ipv_i6vs, ipv1_i6vt #) ->
let {
ipv2_s9HI :: Addr#
ipv2_s9HI = byteArrayContents# (ipv1_i6vt `cast` <Co:5>) } in
join {
$w$j_sgj8 :: State# RealWorld -> (# State# RealWorld, ByteString #)
$w$j_sgj8 (w_sgj6 :: State# RealWorld)
= let {
ipv3_s9HH :: ForeignPtrContents
ipv3_s9HH = PlainPtr ipv1_i6vt } in
case touch# ipv3_s9HH w_sgj6 of s'_i6vI { __DEFAULT ->
case touch# dt1_d7n1 s'_i6vI of s'1_i7oX { __DEFAULT ->
(# s'1_i7oX, BS ipv2_s9HI ipv3_s9HH dt2_d7n2 #)
}
} } in
joinrec {
$wmap__sgjp
:: Int#
-> Addr#
-> Addr#
-> State# RealWorld
-> (# State# RealWorld, ByteString #)
$wmap__sgjp (ww_sgjf :: Int#)
(ww1_sgjj :: Addr#)
(ww2_sgjn :: Addr#)
(w_sgjc :: State# RealWorld)
= case >=# ww_sgjf dt2_d7n2 of {
__DEFAULT ->
case readWord8OffAddr# (plusAddr# ww1_sgjj ww_sgjf) 0# w_sgjc of
{ (# ipv4_i7qj, ipv5_i7qk #) ->
case f_a46t (W8# ipv5_i7qk) of { W8# x_i7EK ->
case writeWord8OffAddr#
(plusAddr# ww2_sgjn ww_sgjf) 0# x_i7EK ipv4_i7qj
of s2_i7EM
{ __DEFAULT ->
jump $wmap__sgjp (+# ww_sgjf 1#) ww1_sgjj ww2_sgjn s2_i7EM
}
}
};
1# -> jump $w$j_sgj8 w_sgjc
}; } in
jump $wmap__sgjp 0# dt_d7n0 ipv2_s9HI ipv_i6vs
};
1# -> case mallocPlainForeignPtrBytes2 of wild1_00 { }
})
of
{ (# ipv_i6A6, ipv1_i6A7 #) ->
ipv1_i6A7
}
} New: map :: (Word8 -> Word8) -> ByteString -> ByteString
map
= \ (w_sg44 :: Word8 -> Word8) (w1_sg45 :: ByteString) ->
case w1_sg45 of { BS ww1_sg48 ww2_sg49 ww3_sg4a ->
$wmap w_sg44 ww1_sg48 ww2_sg49 ww3_sg4a
}
$wmap
:: (Word8 -> Word8)
-> Addr# -> ForeignPtrContents -> Int# -> ByteString
$wmap
= \ (w_sg44 :: Word8 -> Word8)
(ww_sg48 :: Addr#)
(ww1_sg49 :: ForeignPtrContents)
(ww2_sg4a :: Int#) ->
case runRW#
(\ (s_i7oW :: State# RealWorld) ->
case <# ww2_sg4a 0# of {
__DEFAULT ->
case newPinnedByteArray# ww2_sg4a s_i7oW of
{ (# ipv_i6vK, ipv1_i6vL #) ->
let {
ipv2_s9Jg :: Addr#
ipv2_s9Jg = byteArrayContents# (ipv1_i6vL `cast` <Co:5>) } in
join {
$w$j_sg3W :: State# RealWorld -> (# State# RealWorld, ByteString #)
$w$j_sg3W (w1_sg3U :: State# RealWorld)
= let {
ipv3_s9Jf :: ForeignPtrContents
ipv3_s9Jf = PlainPtr ipv1_i6vL } in
case touch# ipv3_s9Jf w1_sg3U of s'_i6w0 { __DEFAULT ->
case touch# ww1_sg49 s'_i6w0 of s'1_i7p9 { __DEFAULT ->
(# s'1_i7p9, BS ipv2_s9Jg ipv3_s9Jf ww2_sg4a #)
}
} } in
joinrec {
$wmap__sg43
:: Int# -> State# RealWorld -> (# State# RealWorld, ByteString #)
$wmap__sg43 (ww3_sg41 :: Int#) (w1_sg3Y :: State# RealWorld)
= case >=# ww3_sg41 ww2_sg4a of {
__DEFAULT ->
case readWord8OffAddr# (plusAddr# ww_sg48 ww3_sg41) 0# w1_sg3Y of
{ (# ipv4_i7qv, ipv5_i7qw #) ->
case w_sg44 (W8# ipv5_i7qw) of { W8# x_i7tF ->
case writeWord8OffAddr#
(plusAddr# ipv2_s9Jg ww3_sg41) 0# x_i7tF ipv4_i7qv
of s2_i7tH
{ __DEFAULT ->
jump $wmap__sg43 (+# ww3_sg41 1#) s2_i7tH
}
}
};
1# -> jump $w$j_sg3W w1_sg3Y
}; } in
jump $wmap__sg43 0# ipv_i6vK
};
1# -> case mallocPlainForeignPtrBytes2 of wild_00 { }
})
of
{ (# ipv_i6Ao, ipv1_i6Ap #) ->
ipv1_i6Ap
} |
Thanks. All right, comparing Core we see that there are no new |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And please rebase, there is a conflict in benchmarks.
Ah, I realised that the accidentally missing INLINE was making a big difference and leading to things that are too good to be true. Once I add in INLINE here is the performance I get from the old and new map:
If I remove the INLINE pragma from each definition then I get:
This is presumably because when we don't have INLINE then both definitions can correctly get inlined even if unsaturated. This leads me to think that we could really, really do with having a look at using INLINABLE over INLINE on some of these functions as I think writing: map f $ bs over map f bs should not lead to a potentially 6x performance degradation. |
This would be extremely surprising, but I must admit that I do not know much about GHC optimizations. It would be important to experiment with |
@Boarders looks good to me except a merge conflict in |
@Boarders thanks, well done! |
This makes the suggested change from #338 so that constant arguments are not passed to helper functions. Here are the benchmarks for findIndex and findIndexEnd:
Here is the benchmarks for map: