Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
co63oc committed Oct 10, 2024
1 parent 33616e3 commit 3921f96
Show file tree
Hide file tree
Showing 26 changed files with 932 additions and 795 deletions.
5 changes: 0 additions & 5 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,6 @@ foreach(OP_DEF_FILE ${OP_DEF_FILES})
endforeach()
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/op_def.pbtxt "{\"\",\"\"}};\n}")

cc_library(
string_array
SRCS string_array.cc
DEPS utf8proc phi common)

cc_library(
data_type
SRCS data_type.cc
Expand Down
20 changes: 13 additions & 7 deletions paddle/fluid/framework/feed_fetch_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ limitations under the License. */
#include "paddle/fluid/framework/string_array.h"
#include "paddle/phi/core/extended_tensor.h"

namespace paddle {
namespace framework {
using FeedType =
paddle::variant<phi::DenseTensor, Strings, phi::SparseCooTensor>;
namespace phi {
using FeedType = paddle::
variant<phi::DenseTensor, paddle::framework::Strings, phi::SparseCooTensor>;
using FetchType = paddle::variant<phi::DenseTensor,
phi::TensorArray,
framework::Vocab,
paddle::framework::Vocab,
phi::SparseCooTensor>;

template <>
Expand All @@ -40,9 +39,16 @@ struct PhiVectorType<FetchType> {
const char *type_name = "PhiVectorFetchType";
};

using FeedList = paddle::framework::PhiVector<FeedType>;
using FetchList = paddle::framework::PhiVector<FetchType>;
using FeedList = PhiVector<FeedType>;
using FetchList = PhiVector<FetchType>;
} // namespace phi

namespace paddle {
namespace framework {
using FeedType = phi::FeedType;
using FetchType = phi::FetchType;
using FeedList = phi::FeedList;
using FetchList = phi::FetchList;
using FetchUnmergedList = std::vector<std::vector<FetchType>>;

inline bool data_is_lod_tensor(const FetchType &data) {
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3253,6 +3253,9 @@ void OperatorWithKernel::BuildPhiKernelContext(
} else if (var->IsType<framework::Vocab>()) {
tensor_in = &(var->Get<framework::Vocab>());
phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
} else if (var->IsType<framework::Strings>()) {
tensor_in = &(var->Get<framework::Strings>());
phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
} else if (var->IsType<framework::FeedList>()) {
tensor_in = &(var->Get<framework::FeedList>());
phi_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
Expand Down
117 changes: 1 addition & 116 deletions paddle/fluid/framework/string_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,119 +14,4 @@ limitations under the License. */

#pragma once

#include <codecvt>
#include <iostream>
#include <locale>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/phi_tensor_base_vector.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/extended_tensor.h"

namespace paddle {
namespace framework {

// Note(YuanRisheng): Vocab is mainly used for faster_tokenizer_op and we don't
// recommend widely use it. Because faster_tokenizer_op may be deleted in the
// future and this class will be deleted.

class Vocab : public phi::ExtendedTensor,
public phi::TypeInfoTraits<phi::TensorBase, Vocab> {
public:
Vocab() = default;

Vocab(Vocab&& other) = default;

Vocab(const Vocab& other) = default;

Vocab& operator=(const Vocab& other) = default;

Vocab& operator=(Vocab&& other) = default;

Vocab& operator=(
const std::unordered_map<std::wstring, std::int32_t>& other) {
this->data_ = other;
return *this;
}

/// \brief Destroy the Vocab and release exclusive resources.
virtual ~Vocab() = default;

public:
/// \brief Returns the name of the class for type traits.
/// \return The name of the class.
static const char* name() { return "Vocab"; }

size_t size() const { return data_.size(); }

void clear() { data_.clear(); }

void emplace(const std::wstring& key, std::int32_t value) {
data_.emplace(key, value);
}

std::int32_t at(const std::wstring& key) { return data_.at(key); }

std::int32_t at(const std::wstring& key) const { return data_.at(key); }

std::unordered_map<std::wstring, std::int32_t>::iterator find(
const std::wstring& key) {
return data_.find(key);
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator find(
const std::wstring& key) const {
return data_.find(key);
}

std::unordered_map<std::wstring, std::int32_t>::iterator begin() {
return data_.begin();
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator begin() const {
return data_.begin();
}

std::unordered_map<std::wstring, std::int32_t>::iterator end() {
return data_.end();
}

std::unordered_map<std::wstring, std::int32_t>::const_iterator end() const {
return data_.end();
}

private:
std::unordered_map<std::wstring, std::int32_t> data_;
};

// Note(YuanRisheng): PhiVector is essentially a vector that only used for PHI
// Kernel. It can be used when you define a non-tensor type that needs to be
// stored in a vector as PHI kernel argument.

template <>
struct PhiVectorType<std::string> {
const char* type_name = "PhiVectorString";
};

using String = std::string;
using Strings = PhiVector<std::string>;

// Convert the std::string type to the std::string type.
bool ConvertStrToWstr(const std::string& src, std::wstring* res);
// Convert the std::wstring type to the std::string type.
void ConvertWstrToStr(const std::wstring& src, std::string* res);
// Normalization Form Canonical Decomposition.
void NFD(const std::string& s, std::string* ret);

// Write the data which is type of
// std::unordered_map<td::string, int32_t> to ostream.
void StringMapToStream(std::ostream& os,
const std::unordered_map<std::string, int32_t>& data);

// Read the data which is type of
// std::unordered_map<td::string, int32_t> from istream.
void StringMapFromStream(std::istream& is,
std::unordered_map<std::string, int32_t>* data);
} // namespace framework
} // namespace paddle
#include "paddle/phi/core/vocab/string_array.h"
12 changes: 7 additions & 5 deletions paddle/fluid/framework/tensor_ref_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@

#pragma once

#include "paddle/fluid/framework/phi_tensor_base_vector.h"

namespace paddle {
namespace framework {
#include "paddle/phi/core/vocab/phi_tensor_base_vector.h"

namespace phi {
template <>
struct PhiVectorType<const framework::Variable*> {
struct PhiVectorType<const paddle::framework::Variable*> {
const char* type_name = "VariableRefArray";
};
} // namespace phi

namespace paddle {
namespace framework {

using VariableRefArray = PhiVector<const framework::Variable*>;

Expand Down
2 changes: 0 additions & 2 deletions paddle/fluid/framework/type_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ bool TypeInfoTraits<BaseT, DerivedT>::classof(const BaseT* obj) {
}

template class TypeInfoTraits<phi::TensorBase, paddle::framework::RawTensor>;
template class TypeInfoTraits<phi::TensorBase, paddle::framework::Vocab>;
template class TypeInfoTraits<phi::TensorBase, paddle::framework::Strings>;
template class TypeInfoTraits<phi::TensorBase, paddle::framework::FeedList>;
template class TypeInfoTraits<phi::TensorBase, egr::VariableCompatTensor>;
template class TypeInfoTraits<phi::TensorBase, paddle::prim::DescTensor>;
Expand Down
17 changes: 15 additions & 2 deletions paddle/fluid/imperative/prepared_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_context.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/core/vocab/string_array.h"

COMMON_DECLARE_bool(use_mkldnn);

Expand Down Expand Up @@ -307,8 +308,14 @@ void BuildDygraphPhiKernelContext(const phi::KernelSignature& kernel_signature,
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
continue;
} else if (input_defs[i].type_index ==
std::type_index(typeid(
paddle::optional<std::vector<const phi::DenseTensor*>>))) {
std::type_index(
typeid(paddle::optional<phi::ExtendedTensor>)) ||
input_defs[i].type_index ==
std::type_index(typeid(paddle::optional<phi::Strings>)) ||
input_defs[i].type_index ==
std::type_index(
typeid(paddle::optional<
std::vector<const phi::DenseTensor*>>))) {
kernel_ctx->EmplaceBackInputWithoutSetRange(nullptr);
auto end_idx = start_idx + 1;
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
Expand Down Expand Up @@ -338,6 +345,12 @@ void BuildDygraphPhiKernelContext(const phi::KernelSignature& kernel_signature,
} else if (var.template IsType<phi::TensorArray>()) {
tensor_in = &(var.template Get<phi::TensorArray>());
kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in);
} else if (var.template IsType<phi::Vocab>()) {
tensor_in = &(var.template Get<phi::Vocab>());
kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in);
} else if (var.template IsType<phi::Strings>()) {
tensor_in = &(var.template Get<phi::Strings>());
kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in);
} else {
PADDLE_THROW(common::errors::Unimplemented(
"Unsupported input `%s` type when call pt kernel.",
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ op_library(generated_op UNITY SRCS generated_op1.cc generated_op2.cc generated_o
op_library(run_program_op DEPS executor_cache ${OP_HEADER_DEPS})
target_link_libraries(run_program_op phi common)
op_library(quantize_linear_op DEPS phi common)
op_library(save_combine_op DEPS string_array phi common)
op_library(load_combine_op DEPS string_array)
op_library(save_combine_op DEPS phi)
op_library(load_combine_op DEPS phi)

op_library(activation_op SRCS activation_op.cc DEPS ${OP_HEADER_DEPS})

Expand Down
33 changes: 33 additions & 0 deletions paddle/fluid/operators/ops_signature/faster_tokenizer_sig.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/core/compat/op_utils.h"

namespace phi {

KernelSignature FasterTokenizerOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("faster_tokenizer",
{"Vocab", "Text", "TextPair"},
{"do_lower_case",
"is_split_into_words",
"max_seq_len",
"pad_to_max_seq_len"},
{"InputIds", "SegmentIds"});
}

} // namespace phi

PD_REGISTER_ARG_MAPPING_FN(faster_tokenizer,
phi::FasterTokenizerOpArgumentMapping);
2 changes: 1 addition & 1 deletion paddle/fluid/operators/string/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/sequence_ops.
include(unity_build_rule.cmake)
endif()
register_operators(DEPS op_version_registry utf8proc string_array)
register_operators(DEPS op_version_registry phi)
Loading

0 comments on commit 3921f96

Please sign in to comment.