Skip to content

Commit

Permalink
ICU-22876 C++ UnicodeSet/USet easy item iteration
Browse files Browse the repository at this point in the history
See #3120
  • Loading branch information
markusicu committed Sep 11, 2024
1 parent 263c735 commit 37b4149
Show file tree
Hide file tree
Showing 9 changed files with 1,087 additions and 62 deletions.
114 changes: 113 additions & 1 deletion icu4c/source/common/unicode/uniset.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ class U_COMMON_API UnicodeSet final : public UnicodeFilter {
char16_t *pat = nullptr;
int32_t patLen = 0;

UVector* strings = nullptr; // maintained in sorted order
UVector* strings_ = nullptr; // maintained in sorted order
UnicodeSetStringSpan *stringSpan = nullptr;

/**
Expand Down Expand Up @@ -1102,6 +1102,118 @@ class U_COMMON_API UnicodeSet final : public UnicodeFilter {
*/
UChar32 charAt(int32_t index) const;

#ifndef U_HIDE_DRAFT_API
/**
* Returns a C++ "range" for iterating over the code points of this set.
*
* \code
* UnicodeSet set(u"[abcçカ🚴]", errorCode);
* for (UChar32 c : set.codePoints()) {
* printf("set.codePoint U+%04lx\n", (long)c);
* }
* \endcode
*
* @return a "range" object for iterating over the code points of this set.
* @draft ICU 76
* @see ranges
* @see strings
* @see begin
* @see end
*/
inline U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const {
return U_HEADER_NESTED_NAMESPACE::USetCodePoints(toUSet());
}

/**
* Returns a C++ "range" for iterating over the code point ranges of this set.
*
* \code
* UnicodeSet set(u"[abcçカ🚴]", errorCode);
* for (auto [start, end] : set.ranges()) {
* printf("set.range U+%04lx..U+%04lx\n", (long)start, (long)end);
* }
* for (auto range : set.ranges()) {
* for (UChar32 c : range) {
* printf("set.range.c U+%04lx\n", (long)c);
* }
* }
* \endcode
*
* @return a "range" object for iterating over the code point ranges of this set.
* @draft ICU 76
* @see codePoints
* @see strings
* @see begin
* @see end
*/
inline U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const {
return U_HEADER_NESTED_NAMESPACE::USetRanges(toUSet());
}

/**
* Returns a C++ "range" for iterating over the empty and multi-character strings of this set.
* Returns each string as a std::u16string_view without copying its contents.
*
* \code
* UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
* for (auto s : set.strings()) {
* UnicodeString us(s);
* std::string u8;
* printf("set.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());
* }
* \endcode
*
* @return a "range" object for iterating over the strings of this set.
* @draft ICU 76
* @see codePoints
* @see ranges
* @see begin
* @see end
*/
inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
}

/**
* Returns a C++ iterator for iterating over all of the elements of this set.
* Convenient all-in one iteration, but creates a UnicodeString for each
* code point or string.
* (Similar to how Java UnicodeSet *is an* Iterable<String>.)
*
* Code points are returned first, then empty and multi-character strings.
*
* \code
* UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
* for (auto el : set) {
* std::string u8;
* printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
* }
* \endcode
*
* @return an all-elements iterator.
* @draft ICU 76
* @see end
* @see codePoints
* @see ranges
* @see strings
*/
inline U_HEADER_NESTED_NAMESPACE::USetElementIterator begin() const {
return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).begin();
}

/**
* @return an exclusive-end sentinel for iterating over all of the elements of this set.
* @draft ICU 76
* @see begin
* @see codePoints
* @see ranges
* @see strings
*/
inline U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const {
return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).end();
}
#endif // U_HIDE_DRAFT_API

/**
* Adds the specified range to this set if it is not already
* present. If this set already contains the specified range,
Expand Down
Loading

0 comments on commit 37b4149

Please sign in to comment.