17 #include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h" 18 #include "fastdeploy/fastdeploy_model.h" 19 #include "fastdeploy/utils/unique_ptr.h" 23 #include <unordered_map> 31 struct FASTDEPLOY_DECL UIEResult {
36 std::unordered_map<std::string, std::vector<UIEResult>> relation_;
37 UIEResult() =
default;
38 UIEResult(
size_t start,
size_t end,
double probability, std::string text)
39 : start_(start), end_(end), probability_(probability), text_(text) {}
40 std::string Str()
const;
43 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& os,
44 const UIEResult& result);
45 FASTDEPLOY_DECL std::ostream& operator<<(
47 const std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>&
50 struct FASTDEPLOY_DECL SchemaNode {
52 std::vector<std::vector<std::string>> prefix_;
53 std::vector<std::vector<UIEResult*>> relations_;
54 std::vector<SchemaNode> children_;
55 SchemaNode() =
default;
56 SchemaNode(
const SchemaNode&) =
default;
57 explicit SchemaNode(
const std::string& name,
58 const std::vector<SchemaNode>& children = {})
59 : name_(name), children_(children) {}
60 void AddChild(
const std::string& schema) { children_.emplace_back(schema); }
61 void AddChild(
const SchemaNode& schema) { children_.push_back(schema); }
62 void AddChild(
const std::string& schema,
63 const std::vector<std::string>& children) {
64 SchemaNode schema_node(schema);
65 for (
auto& child : children) {
66 schema_node.children_.emplace_back(child);
68 children_.emplace_back(schema_node);
70 void AddChild(
const std::string& schema,
71 const std::vector<SchemaNode>& children) {
72 SchemaNode schema_node(schema);
73 schema_node.children_ = children;
74 children_.emplace_back(schema_node);
84 explicit Schema(
const std::string& schema,
const std::string& name =
"root");
85 explicit Schema(
const std::vector<std::string>& schema_list,
86 const std::string& name =
"root");
87 explicit Schema(
const std::vector<SchemaNode>& schema_list,
88 const std::string& name =
"root");
89 explicit Schema(
const SchemaNode& schema,
const std::string& name =
"root");
92 void CreateRoot(
const std::string& name);
93 std::unique_ptr<SchemaNode> root_;
94 friend class UIEModel;
97 struct FASTDEPLOY_DECL UIEModel :
public FastDeployModel {
99 UIEModel(
const std::string& model_file,
const std::string& params_file,
100 const std::string& vocab_file,
float position_prob,
101 size_t max_length,
const std::vector<std::string>& schema,
107 SchemaLanguage schema_language = SchemaLanguage::ZH);
108 UIEModel(
const std::string& model_file,
const std::string& params_file,
109 const std::string& vocab_file,
float position_prob,
110 size_t max_length,
const SchemaNode& schema,
int batch_size,
115 SchemaLanguage schema_language = SchemaLanguage::ZH);
116 UIEModel(
const std::string& model_file,
const std::string& params_file,
117 const std::string& vocab_file,
float position_prob,
118 size_t max_length,
const std::vector<SchemaNode>& schema,
124 SchemaLanguage schema_language = SchemaLanguage::ZH);
125 virtual std::string ModelName()
const {
return "UIEModel"; }
126 void SetSchema(
const std::vector<std::string>& schema);
127 void SetSchema(
const std::vector<SchemaNode>& schema);
128 void SetSchema(
const SchemaNode& schema);
130 bool ConstructTextsAndPrompts(
131 const std::vector<std::string>& raw_texts,
const std::string& node_name,
132 const std::vector<std::vector<std::string>> node_prefix,
133 std::vector<std::string>* input_texts, std::vector<std::string>* prompts,
134 std::vector<std::vector<size_t>>* input_mapping_with_raw_texts,
135 std::vector<std::vector<size_t>>* input_mapping_with_short_text);
136 void Preprocess(
const std::vector<std::string>& input_texts,
137 const std::vector<std::string>& prompts,
138 std::vector<fast_tokenizer::core::Encoding>* encodings,
139 std::vector<fastdeploy::FDTensor>* inputs);
141 const std::vector<fastdeploy::FDTensor>& outputs,
142 const std::vector<fast_tokenizer::core::Encoding>& encodings,
143 const std::vector<std::string>& short_input_texts,
144 const std::vector<std::string>& short_prompts,
145 const std::vector<std::vector<size_t>>& input_mapping_with_short_text,
146 std::vector<std::vector<UIEResult>>* results);
147 void ConstructChildPromptPrefix(
148 const std::vector<std::vector<size_t>>& input_mapping_with_raw_texts,
149 const std::vector<std::vector<UIEResult>>& results_list,
150 std::vector<std::vector<std::string>>* prefix);
151 void ConstructChildRelations(
152 const std::vector<std::vector<UIEResult*>>& old_relations,
153 const std::vector<std::vector<size_t>>& input_mapping_with_raw_texts,
154 const std::vector<std::vector<UIEResult>>& results_list,
155 const std::string& node_name,
156 std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
158 std::vector<std::vector<UIEResult*>>* new_relations);
160 Predict(
const std::vector<std::string>& texts,
161 std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
165 using IDX_PROB = std::pair<int64_t, float>;
167 bool operator()(
const std::pair<IDX_PROB, IDX_PROB>& lhs,
168 const std::pair<IDX_PROB, IDX_PROB>& rhs)
const;
170 using SPAN_SET = std::set<std::pair<IDX_PROB, IDX_PROB>, IdxProbCmp>;
172 fast_tokenizer::core::Offset offset_;
175 void SetValidBackend();
177 void AutoSplitter(
const std::vector<std::string>& texts,
size_t max_length,
178 std::vector<std::string>* short_texts,
179 std::vector<std::vector<size_t>>* input_mapping);
180 void AutoJoiner(
const std::vector<std::string>& short_texts,
181 const std::vector<std::vector<size_t>>& input_mapping,
182 std::vector<std::vector<UIEResult>>* results);
185 void GetCandidateIdx(
const float* probs, int64_t batch_size, int64_t seq_len,
186 std::vector<std::vector<IDX_PROB>>* candidate_idx_prob,
187 float threshold = 0.5)
const;
188 void GetSpan(
const std::vector<IDX_PROB>& start_idx_prob,
189 const std::vector<IDX_PROB>& end_idx_prob,
190 SPAN_SET* span_set)
const;
191 void GetSpanIdxAndProbs(
192 const SPAN_SET& span_set,
193 const std::vector<fast_tokenizer::core::Offset>& offset_mapping,
194 std::vector<SpanIdx>* span_idxs, std::vector<float>* probs)
const;
196 ConvertSpanToUIEResult(
const std::vector<std::string>& texts,
197 const std::vector<std::string>& prompts,
198 const std::vector<std::vector<SpanIdx>>& span_idxs,
199 const std::vector<std::vector<float>>& probs,
200 std::vector<std::vector<UIEResult>>* results)
const;
201 std::unique_ptr<Schema> schema_;
203 float position_prob_;
205 SchemaLanguage schema_language_;
206 fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer_;
Option object used when create a new Runtime object.
Definition: runtime_option.h:40
ModelFormat
Definition: enum_variables.h:67
Model with paddlepaddle format.
Definition: enum_variables.h:69
All C++ FastDeploy APIs are defined inside this namespace.
Definition: option.h:16