Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Leaving ICU behind (proof of concept) #68

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 0 additions & 38 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,44 +38,6 @@ set_target_properties(
include(CMakePackageConfigHelpers)
include(GNUInstallDirs)

if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
message(STATUS "Apple system detected.")
# People who run macOS often use brew.
if(EXISTS /opt/homebrew/opt/icu4c)
message(STATUS "icu is provided by homebrew at /opt/homebrew/opt/icu4c.")
## This is a bit awkward, but it is a lot better than asking the
## user to figure that out.
list(APPEND CMAKE_PREFIX_PATH "/opt/homebrew/opt/icu4c/include")
list(APPEND CMAKE_LIBRARY_PATH "/opt/homebrew/opt/icu4c/lib")
elseif(EXISTS /usr/local/opt/icu4c)
message(STATUS "icu is provided by homebrew at /usr/local/opt/icu4c.")
list(APPEND CMAKE_PREFIX_PATH "/usr/local/opt/icu4c/include")
list(APPEND CMAKE_LIBRARY_PATH "/usr/local/opt/icu4c/lib")
endif()
endif()

## If we put 'REQUIRED' under find_package, it is a hard error and
## we get no chance to provide instructions to the user.
find_package(ICU COMPONENTS uc i18n)

### If the user does not have ICU, let us help them with instructions:
if (NOT ICU_FOUND)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
if(EXISTS /opt/homebrew)
message(STATUS "Under macOS, you may install ICU with brew, using 'brew install icu4c'.")
else()
message(STATUS "Under macOS, you should install brew (see https://brew.sh) and then icu4c ('brew install icu4c').")
endif()
elif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
message(STATUS "Under Linux, you may be able to install ICU with a command such as 'apt-get install libicu-dev'." )
endif()
message(SEND_ERROR "ICU with components uc and i18n is required for building ada.")
return()
endif (NOT ICU_FOUND)
### Going forward, we have ICU for sure.

target_link_libraries(ada PRIVATE ICU::uc ICU::i18n)

install(
FILES include/ada.h
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
Expand Down
67 changes: 3 additions & 64 deletions src/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "ada/checkers.h"
#include "ada/unicode.h"
#include "ada/url.h"
#include "puny.cpp"

#include <array>
#include <algorithm>
Expand All @@ -14,18 +15,11 @@

#include <optional>
#include <string_view>
#include <unicode/utypes.h>
#include <unicode/uidna.h>
#include <unicode/utf8.h>

namespace ada::parser {

/**
* @see https://url.spec.whatwg.org/#concept-domain-to-ascii
*
* The only difference between domain_to_ascii and to_ascii is that
* to_ascii does not expect the input to be percent decoded. This is
* mostly used to conform with the test suite.
*/
bool to_ascii(std::optional<std::string>& out, const std::string_view plain, const bool be_strict, size_t first_percent) {
std::string percent_decoded_buffer;
Expand All @@ -34,70 +28,15 @@ namespace ada::parser {
percent_decoded_buffer = unicode::percent_decode(plain, first_percent);
input = percent_decoded_buffer;
}
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_NONTRANSITIONAL_TO_ASCII;

if (be_strict) {
options |= UIDNA_USE_STD3_RULES;
}

UIDNA* uidna = uidna_openUTS46(options, &status);
if (U_FAILURE(status)) {
return false;
}

UIDNAInfo info = UIDNA_INFO_INITIALIZER;
out = std::string(255, 0);
int32_t length = uidna_nameToASCII_UTF8(uidna,
input.data(),
int32_t(input.length()),
out.value().data(), 255,
&info,
&status);

if (status == U_BUFFER_OVERFLOW_ERROR) {
status = U_ZERO_ERROR;
out.value().resize(length);
length = uidna_nameToASCII_UTF8(uidna,
input.data(),
int32_t(input.length()),
out.value().data(), length,
&info,
&status);
}

// A label contains hyphen-minus ('-') in the third and fourth positions.
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
// A label starts with a hyphen-minus ('-').
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
// A label ends with a hyphen-minus ('-').
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;

if (!be_strict) {
// A non-final domain name label (or the whole domain name) is empty.
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
// A domain name label is longer than 63 bytes.
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
// A domain name is longer than 255 bytes in its storage form.
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
}

uidna_close(uidna);

if (U_FAILURE(status) || info.errors != 0 || length == 0) {
out = std::nullopt;
return false;
}

out.value().resize(length);
out = ada::puny::convert_domain_to_puny(input, be_strict);
if(!out.has_value()) { return false; }
if(std::any_of(out.value().begin(), out.value().end(), ada::unicode::is_forbidden_domain_code_point)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we move this inside the punycode?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this line is probably useless.

out = std::nullopt;
return false;
}
return true;
}


url parse_url(std::string_view user_input,
std::optional<ada::url> base_url,
ada::encoding_type encoding,
Expand Down
Loading