diff --git a/src/array/dictionary/iterator.rs b/src/array/dictionary/iterator.rs index ae7d6951dd5..cc7bb4d5d31 100644 --- a/src/array/dictionary/iterator.rs +++ b/src/array/dictionary/iterator.rs @@ -1,5 +1,6 @@ use crate::array::Array; use crate::bitmap::utils::{zip_validity, ZipValidity}; +use crate::scalar::Scalar; use crate::trusted_len::TrustedLen; use super::{DictionaryArray, DictionaryKey}; @@ -23,7 +24,7 @@ impl<'a, K: DictionaryKey> DictionaryValuesIter<'a, K> { } impl<'a, K: DictionaryKey> Iterator for DictionaryValuesIter<'a, K> { - type Item = Box; + type Item = Box; #[inline] fn next(&mut self) -> Option { @@ -56,10 +57,10 @@ impl<'a, K: DictionaryKey> DoubleEndedIterator for DictionaryValuesIter<'a, K> { } type ValuesIter<'a, K> = DictionaryValuesIter<'a, K>; -type ZipIter<'a, K> = ZipValidity<'a, Box, ValuesIter<'a, K>>; +type ZipIter<'a, K> = ZipValidity<'a, Box, ValuesIter<'a, K>>; impl<'a, K: DictionaryKey> IntoIterator for &'a DictionaryArray { - type Item = Option>; + type Item = Option>; type IntoIter = ZipIter<'a, K>; fn into_iter(self) -> Self::IntoIter { diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index d63d82bccb2..1d911a440b7 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use crate::{ bitmap::Bitmap, datatypes::DataType, + scalar::{new_scalar, Scalar}, types::{NativeType, NaturalDataType}, }; @@ -90,11 +91,11 @@ impl DictionaryArray { &self.values } - /// Returns the values of the [`DictionaryArray`]. + /// Returns the value of the [`DictionaryArray`] at position `i`. #[inline] - pub fn value(&self, index: usize) -> Box { + pub fn value(&self, index: usize) -> Box { let index = self.keys.value(index).to_usize().unwrap(); - self.values.clone().slice(index, 1) + new_scalar(self.values.as_ref(), index) } } diff --git a/src/array/equal/dictionary.rs b/src/array/equal/dictionary.rs index d1e91fcc9e9..8c879ff8370 100644 --- a/src/array/equal/dictionary.rs +++ b/src/array/equal/dictionary.rs @@ -1,5 +1,14 @@ use crate::array::{Array, DictionaryArray, DictionaryKey}; pub(super) fn equal(lhs: &DictionaryArray, rhs: &DictionaryArray) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + if !(lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len()) { + return false; + }; + + // if x is not valid and y is but its child is not, the slots are equal. + lhs.iter().zip(rhs.iter()).all(|(x, y)| match (&x, &y) { + (None, Some(y)) => !y.is_valid(), + (Some(x), None) => !x.is_valid(), + _ => x == y, + }) } diff --git a/tests/it/array/equal/dictionary.rs b/tests/it/array/equal/dictionary.rs index c93a62a64c6..8ae82e7d691 100644 --- a/tests/it/array/equal/dictionary.rs +++ b/tests/it/array/equal/dictionary.rs @@ -4,9 +4,9 @@ use arrow2::array::*; use super::test_equal; -fn create_dictionary_array(values: &[&str], keys: &[Option]) -> DictionaryArray { +fn create_dictionary_array(values: &[Option<&str>], keys: &[Option]) -> DictionaryArray { let keys = Int16Array::from(keys); - let values = Utf8Array::::from_slice(values); + let values = Utf8Array::::from(values); DictionaryArray::from_data(keys, Arc::new(values)) } @@ -14,45 +14,86 @@ fn create_dictionary_array(values: &[&str], keys: &[Option]) -> DictionaryA #[test] fn dictionary_equal() { // (a, b, c), (0, 1, 0, 2) => (a, b, a, c) - let a = create_dictionary_array(&["a", "b", "c"], &[Some(0), Some(1), Some(0), Some(2)]); + let a = create_dictionary_array( + &[Some("a"), Some("b"), Some("c")], + &[Some(0), Some(1), Some(0), Some(2)], + ); // different representation (values and keys are swapped), same result - let b = create_dictionary_array(&["a", "c", "b"], &[Some(0), Some(2), Some(0), Some(1)]); + let b = create_dictionary_array( + &[Some("a"), Some("c"), Some("b")], + &[Some(0), Some(2), Some(0), Some(1)], + ); test_equal(&a, &b, true); // different len - let b = create_dictionary_array(&["a", "c", "b"], &[Some(0), Some(2), Some(1)]); + let b = create_dictionary_array( + &[Some("a"), Some("c"), Some("b")], + &[Some(0), Some(2), Some(1)], + ); test_equal(&a, &b, false); // different key - let b = create_dictionary_array(&["a", "c", "b"], &[Some(0), Some(2), Some(0), Some(0)]); + let b = create_dictionary_array( + &[Some("a"), Some("c"), Some("b")], + &[Some(0), Some(2), Some(0), Some(0)], + ); test_equal(&a, &b, false); // different values, same keys - let b = create_dictionary_array(&["a", "b", "d"], &[Some(0), Some(1), Some(0), Some(2)]); + let b = create_dictionary_array( + &[Some("a"), Some("b"), Some("d")], + &[Some(0), Some(1), Some(0), Some(2)], + ); test_equal(&a, &b, false); } #[test] fn dictionary_equal_null() { // (a, b, c), (1, 2, 1, 3) => (a, b, a, c) - let a = create_dictionary_array(&["a", "b", "c"], &[Some(0), None, Some(0), Some(2)]); + let a = create_dictionary_array( + &[Some("a"), Some("b"), Some("c")], + &[Some(0), None, Some(0), Some(2)], + ); // equal to self test_equal(&a, &a, true); // different representation (values and keys are swapped), same result - let b = create_dictionary_array(&["a", "c", "b"], &[Some(0), None, Some(0), Some(1)]); + let b = create_dictionary_array( + &[Some("a"), Some("c"), Some("b")], + &[Some(0), None, Some(0), Some(1)], + ); test_equal(&a, &b, true); // different null position - let b = create_dictionary_array(&["a", "c", "b"], &[Some(0), Some(2), Some(0), None]); + let b = create_dictionary_array( + &[Some("a"), Some("c"), Some("b")], + &[Some(0), Some(2), Some(0), None], + ); test_equal(&a, &b, false); // different key - let b = create_dictionary_array(&["a", "c", "b"], &[Some(0), None, Some(0), Some(0)]); + let b = create_dictionary_array( + &[Some("a"), Some("c"), Some("b")], + &[Some(0), None, Some(0), Some(0)], + ); test_equal(&a, &b, false); // different values, same keys - let b = create_dictionary_array(&["a", "b", "d"], &[Some(0), None, Some(0), Some(2)]); + let b = create_dictionary_array( + &[Some("a"), Some("b"), Some("d")], + &[Some(0), None, Some(0), Some(2)], + ); test_equal(&a, &b, false); + + // different nulls in keys and values + let a = create_dictionary_array( + &[Some("a"), Some("b"), None], + &[Some(0), None, Some(0), Some(2)], + ); + let b = create_dictionary_array( + &[Some("a"), Some("b"), Some("c")], + &[Some(0), None, Some(0), None], + ); + test_equal(&a, &b, true); }