-
Notifications
You must be signed in to change notification settings - Fork 416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implemented CountBy (Issue 104) #207
Changes from 12 commits
3588216
987ef8c
0526568
2da65a8
db337ac
4fadb74
6e218c8
9bc8c95
3389416
74964a7
ae13491
0162049
5e6808a
1e6b47a
203c5db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
#region License and Terms | ||
// MoreLINQ - Extensions to LINQ to Objects | ||
// Copyright (c) 2016 Leandro F. Vieira (leandromoh). All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
#endregion | ||
|
||
using NUnit.Framework; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using LinqEnumerable = System.Linq.Enumerable; | ||
|
||
namespace MoreLinq.Test | ||
{ | ||
[TestFixture] | ||
public class CountByTest | ||
{ | ||
[Test] | ||
[ExpectedException(typeof(ArgumentNullException))] | ||
public void CountByWithNullSequence() | ||
{ | ||
IEnumerable<int> sequence = null; | ||
sequence.CountBy(x => x % 2 == 0); | ||
} | ||
|
||
[Test] | ||
[ExpectedException(typeof(ArgumentNullException))] | ||
public void CountByWithNullProjection() | ||
{ | ||
Func<int, bool> projection = null; | ||
Enumerable.Range(1, 10).CountBy(projection); | ||
} | ||
|
||
[Test] | ||
public void CountBySimpleTest() | ||
{ | ||
var result = new[] { 1, 2, 3, 4, 5, 6, 1, 2, 3, 1, 1, 2 }.CountBy(c => c); | ||
|
||
var expectations = new List<KeyValuePair<int, int>>() | ||
{ | ||
{ 1, 4 }, | ||
{ 2, 3 }, | ||
{ 3, 2 }, | ||
{ 4, 1 }, | ||
{ 5, 1 }, | ||
{ 6, 1 }, | ||
}; | ||
|
||
result.AssertSequenceEqual(expectations); | ||
} | ||
|
||
[Test] | ||
public void CountByEvenOddTest() | ||
{ | ||
var result = Enumerable.Range(1, 100).CountBy(c => c % 2); | ||
|
||
var expectations = new List<KeyValuePair<int, int>>() | ||
{ | ||
{ 1, 50 }, | ||
{ 0, 50 }, | ||
}; | ||
|
||
result.AssertSequenceEqual(expectations); | ||
} | ||
|
||
[Test] | ||
public void CountByWithEqualityComparer() | ||
{ | ||
var result = new[] { "a", "B", "c", "A", "b", "A" }.CountBy(c => c, StringComparer.OrdinalIgnoreCase); | ||
|
||
var expectations = new List<KeyValuePair<string, int>>() | ||
{ | ||
{ "a", 3 }, | ||
{ "B", 2 }, | ||
{ "c", 1 }, | ||
}; | ||
|
||
result.AssertSequenceEqual(expectations); | ||
} | ||
|
||
[Test] | ||
public void CountByHasKeysOrderedLikeGroupBy() | ||
{ | ||
var randomSequence = MoreEnumerable.Random(0, 100).Take(100).ToArray(); | ||
|
||
var countByKeys = randomSequence.CountBy(x => x).Select(x => x.Key); | ||
var groupByKeys = randomSequence.GroupBy(x => x).Select(x => x.Key); | ||
|
||
countByKeys.AssertSequenceEqual(groupByKeys); | ||
} | ||
|
||
[Test] | ||
public void CountByIsLazy() | ||
{ | ||
new BreakingSequence<string>().CountBy(x => x.Length); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#region License and Terms | ||
// MoreLINQ - Extensions to LINQ to Objects | ||
// Copyright (c) 2016 Leandro F. Vieira (leandromoh). All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
#endregion | ||
|
||
namespace MoreLinq | ||
{ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Runtime.CompilerServices; | ||
|
||
static partial class MoreEnumerable | ||
{ | ||
/// <summary> | ||
/// Applies a key-generating function to each element of a sequence and returns a sequence of | ||
/// unique keys and their number of occurrences in the original sequence. | ||
/// </summary> | ||
/// <typeparam name="TSource">Type of the elements of the source sequence.</typeparam> | ||
/// <typeparam name="TKey">Type of the projected element.</typeparam> | ||
/// <param name="source">Source sequence.</param> | ||
/// <param name="keySelector">Function that transforms each item of source sequence into a key to be compared against the others.</param> | ||
/// <returns>A sequence of unique keys and their number of occurrences in the original sequence.</returns> | ||
public static IEnumerable<KeyValuePair<TKey, int>> CountBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector) | ||
{ | ||
return source.CountBy(keySelector, null); | ||
} | ||
|
||
/// <summary> | ||
/// Applies a key-generating function to each element of a sequence and returns a sequence of | ||
/// unique keys and their number of occurrences in the original sequence. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This does not mention how this overload is any different that the other & therefore could look odd on a page of summaries. It should mention the additional argument. Considering adding:
|
||
/// An additional argument specifies a comparer to use for testing equivalence of keys. | ||
/// </summary> | ||
/// <typeparam name="TSource">Type of the elements of the source sequence.</typeparam> | ||
/// <typeparam name="TKey">Type of the projected element.</typeparam> | ||
/// <param name="source">Source sequence.</param> | ||
/// <param name="keySelector">Function that transforms each item of source sequence into a key to be compared against the others.</param> | ||
/// <param name="comparer">The equality comparer to use to determine whether or not keys are equal. | ||
/// If null, the default equality comparer for <c>TSource</c> is used.</param> | ||
atifaziz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// <returns>A sequence of unique keys and their number of occurrences in the original sequence.</returns> | ||
public static IEnumerable<KeyValuePair<TKey, int>> CountBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey> comparer) | ||
{ | ||
if (source == null) throw new ArgumentNullException("source"); | ||
if (keySelector == null) throw new ArgumentNullException("keySelector"); | ||
|
||
return CountByImpl(source, keySelector, comparer); | ||
} | ||
|
||
private static IEnumerable<KeyValuePair<TKey, int>> CountByImpl<TSource, TKey>(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey> comparer) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that we have all the tests in place, it would be good to work in some optimisations. The current implementation does too many lookups:
Another optimisation could be is that as long as the current key is the same as the last, just increase the counter. This way, adjacent keys don't require containment tests on the dictionary. |
||
{ | ||
var dic = new Dictionary<TKey, int>(comparer); | ||
var keys = new List<TKey>(); | ||
var counts = new List<int>(); | ||
var havePrevKey = false; | ||
var prevKey = default(TKey); | ||
var index = 0; | ||
|
||
foreach (var item in source) | ||
{ | ||
var key = keySelector(item); | ||
|
||
if (// key same as the previous? then re-use the index | ||
(havePrevKey && dic.Comparer.GetHashCode(prevKey) == dic.Comparer.GetHashCode(key) | ||
&& dic.Comparer.Equals(prevKey, key)) | ||
// otherwise try & find index of the key | ||
|| dic.TryGetValue(key, out index)) | ||
{ | ||
counts[index]++; | ||
} | ||
else | ||
{ | ||
dic[key] = keys.Count; | ||
keys.Add(key); | ||
counts.Add(1); | ||
} | ||
|
||
prevKey = key; | ||
havePrevKey = true; | ||
} | ||
|
||
// The dictionary is no longer needed from this point forward so | ||
// lose the reference and make it available as food for the GC. | ||
// This optimization is designed to help cases where a slow running | ||
// loop over the yielded pairs could span GC cycles. However, | ||
// instead of doing simply the following: | ||
// | ||
// dic = null; | ||
// | ||
// the reference is nulled through a method that the JIT compiler | ||
// is told not to try and inline; done so assuming that the above | ||
// method could have been turned into a NOP (in theory). | ||
|
||
Null(ref dic); // dic = null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be less magic and more readable to simply split the counting loop to a separate function. For other readers (and myself, I had to google this), setting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was too terse I think: by split the counting loop into a separate function, I mean that that function runs eagerly, not lazily, thus avoiding this problem. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's an excellent suggestion! 👍 It doesn't take away the magic one would need to understand that is the raison d'être for split. Someone in the future could say, “This is stupid. Let me inline this for simplicity's sake!”
Yeah it would be, especially if it's been lifted into a field but it depends on how clever the compiler can get about tying the local variable lifetimes to various stages of the state machine it generates. Technically, the |
||
|
||
for (var i = 0; i < keys.Count; i++) | ||
yield return new KeyValuePair<TKey, int>(keys[i], counts[i]); | ||
} | ||
|
||
// ReSharper disable once RedundantAssignment | ||
[MethodImpl(MethodImplOptions.NoInlining)] | ||
static void Null<T>(ref T obj) where T : class { obj = null; } | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I change the last line of
CountByImpl
:to simply return
dic
instead:then this test (
CountByHasKeysOrderedLikeGroupBy
) doesn't fail.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In pratice, yes, it doesnt fail (because, currently, dictionary brings the results in order that keys were inserted), but as you once said yourself
So order in enumeration of dictionary can change anyday with a new implementation. To ensure CountBy always return in the order that keys were found the current last line is necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What
CountByIsLazy
test should exactly do? If I understood well, the implementation isn't lazy since I must iterate over all the IEnumerable before return any KeyValuePair.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CountBy
isn't lazy but it can and should be (likeGroupBy
). The iterator doesn't need to run until someone iterates the results! You can make it lazy very easily by usingyield
to return the results. So instead of the following as the last line ofCountByImpl
:Do instead:
Now the compiler will re-write the code to run during iteration, rendering it lazy!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But It isn't the same thing since Select is lazy (do exactly the
foreach yield
) ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how could I write a test that check if the return of CountBy is lazy (for CountByIsLazy) ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See some of the existing tests that test for laziness for inspiration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes,
Select
is lazy except you compute the result imperatively before you send them back viaSelect
. That computation is happening whenCountBy
is called when it should happen when the enumerable returned by your method is enumerated by the caller (and which could be some time after).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, thanks for the explanation.
CountByIsLazy
was implemented.