-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ARROW-6678: [C++][Parquet] Binary data stored in Parquet metadata mus…
…t be base64-encoded to be UTF-8 compliant I have added a simple base64 implementation (Zlib license) to arrow/vendored from https://github.com/ReneNyffenegger/cpp-base64 Closes #5493 from wesm/ARROW-6678 and squashes the following commits: c058e86 <Wes McKinney> Simplify, add MSVC exports 06f75cd <Wes McKinney> Fix Python unit test that needs to base64-decode now eabb121 <Wes McKinney> Fix LICENSE.txt, add iwyu export b3a584a <Wes McKinney> Add vendored base64 C++ implementation and ensure that Thrift KeyValue in Parquet metadata is UTF-8 Authored-by: Wes McKinney <[email protected]> Signed-off-by: Micah Kornfield <[email protected]>
- Loading branch information
1 parent
199d3cf
commit 4fe330a
Showing
7 changed files
with
206 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1874,3 +1874,31 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
---------------------------------------------------------------------- | ||
|
||
cpp/src/arrow/vendored/base64.cpp has the following license | ||
|
||
ZLIB License | ||
|
||
Copyright (C) 2004-2017 René Nyffenegger | ||
|
||
This source code is provided 'as-is', without any express or implied | ||
warranty. In no event will the author be held liable for any damages arising | ||
from the use of this software. | ||
|
||
Permission is granted to anyone to use this software for any purpose, including | ||
commercial applications, and to alter it and redistribute it freely, subject to | ||
the following restrictions: | ||
|
||
1. The origin of this source code must not be misrepresented; you must not | ||
claim that you wrote the original source code. If you use this source code | ||
in a product, an acknowledgment in the product documentation would be | ||
appreciated but is not required. | ||
|
||
2. Altered source versions must be plainly marked as such, and must not be | ||
misrepresented as being the original source code. | ||
|
||
3. This notice may not be removed or altered from any source distribution. | ||
|
||
René Nyffenegger [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#pragma once | ||
|
||
#include <string> | ||
|
||
#include "arrow/util/visibility.h" | ||
|
||
namespace arrow { | ||
namespace util { | ||
|
||
ARROW_EXPORT | ||
std::string base64_encode(unsigned char const*, unsigned int len); | ||
|
||
ARROW_EXPORT | ||
std::string base64_decode(std::string const& s); | ||
|
||
} // namespace util | ||
} // namespace arrow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
/* | ||
base64.cpp and base64.h | ||
base64 encoding and decoding with C++. | ||
Version: 1.01.00 | ||
Copyright (C) 2004-2017 René Nyffenegger | ||
This source code is provided 'as-is', without any express or implied | ||
warranty. In no event will the author be held liable for any damages | ||
arising from the use of this software. | ||
Permission is granted to anyone to use this software for any purpose, | ||
including commercial applications, and to alter it and redistribute it | ||
freely, subject to the following restrictions: | ||
1. The origin of this source code must not be misrepresented; you must not | ||
claim that you wrote the original source code. If you use this source code | ||
in a product, an acknowledgment in the product documentation would be | ||
appreciated but is not required. | ||
2. Altered source versions must be plainly marked as such, and must not be | ||
misrepresented as being the original source code. | ||
3. This notice may not be removed or altered from any source distribution. | ||
René Nyffenegger [email protected] | ||
*/ | ||
|
||
#include "arrow/util/base64.h" | ||
#include <iostream> | ||
|
||
namespace arrow { | ||
namespace util { | ||
|
||
static const std::string base64_chars = | ||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
"abcdefghijklmnopqrstuvwxyz" | ||
"0123456789+/"; | ||
|
||
|
||
static inline bool is_base64(unsigned char c) { | ||
return (isalnum(c) || (c == '+') || (c == '/')); | ||
} | ||
|
||
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) { | ||
std::string ret; | ||
int i = 0; | ||
int j = 0; | ||
unsigned char char_array_3[3]; | ||
unsigned char char_array_4[4]; | ||
|
||
while (in_len--) { | ||
char_array_3[i++] = *(bytes_to_encode++); | ||
if (i == 3) { | ||
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; | ||
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); | ||
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); | ||
char_array_4[3] = char_array_3[2] & 0x3f; | ||
|
||
for(i = 0; (i <4) ; i++) | ||
ret += base64_chars[char_array_4[i]]; | ||
i = 0; | ||
} | ||
} | ||
|
||
if (i) | ||
{ | ||
for(j = i; j < 3; j++) | ||
char_array_3[j] = '\0'; | ||
|
||
char_array_4[0] = ( char_array_3[0] & 0xfc) >> 2; | ||
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); | ||
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); | ||
|
||
for (j = 0; (j < i + 1); j++) | ||
ret += base64_chars[char_array_4[j]]; | ||
|
||
while((i++ < 3)) | ||
ret += '='; | ||
|
||
} | ||
|
||
return ret; | ||
|
||
} | ||
|
||
std::string base64_decode(std::string const& encoded_string) { | ||
size_t in_len = encoded_string.size(); | ||
int i = 0; | ||
int j = 0; | ||
int in_ = 0; | ||
unsigned char char_array_4[4], char_array_3[3]; | ||
std::string ret; | ||
|
||
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) { | ||
char_array_4[i++] = encoded_string[in_]; in_++; | ||
if (i ==4) { | ||
for (i = 0; i <4; i++) | ||
char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff; | ||
|
||
char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] & 0x30) >> 4); | ||
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); | ||
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; | ||
|
||
for (i = 0; (i < 3); i++) | ||
ret += char_array_3[i]; | ||
i = 0; | ||
} | ||
} | ||
|
||
if (i) { | ||
for (j = 0; j < i; j++) | ||
char_array_4[j] = base64_chars.find(char_array_4[j]) & 0xff; | ||
|
||
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); | ||
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); | ||
|
||
for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; | ||
} | ||
|
||
return ret; | ||
} | ||
|
||
} // namespace util | ||
} // namespace arrow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters