| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- /**
- * Copyright (c) 2017 Baidu.com, Inc. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * @author baidu aip
- */
- #ifndef __AIP_NLP_H__
- #define __AIP_NLP_H__
- #include "base/base.h"
- namespace aip {
- class Nlp: public AipBase
- {
- public:
-
- std::string _lexer =
- "https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer";
-
- std::string _wordembedding =
- "https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_vec";
-
- std::string _depparser =
- "https://aip.baidubce.com/rpc/2.0/nlp/v1/depparser";
-
- std::string _dnnlm_cn =
- "https://aip.baidubce.com/rpc/2.0/nlp/v2/dnnlm_cn";
-
- std::string _word_sim_embedding =
- "https://aip.baidubce.com/rpc/2.0/nlp/v1/word_emb_sim";
-
- std::string _simnet =
- "https://aip.baidubce.com/rpc/2.0/nlp/v2/simnet";
-
- std::string _comment_tag =
- "https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag";
-
- std::string _sentiment_classify =
- "https://aip.baidubce.com/rpc/2.0/nlp/v2/sentiment_classify";
-
- Nlp(const std::string & app_id, const std::string & ak, const std::string & sk): AipBase(app_id, ak, sk)
- {
- }
-
- /**
- * lexer
- * 词法分析接口向用户提供分词、词性标注、专名识别三大功能;能够识别出文本串中的基本词汇(分词),对这些词汇进行重组、标注组合后词汇的词性,并进一步识别出命名实体。
- * @param text 待分析文本(目前仅支持UTF8编码),长度不超过65536字节
- * options 可选参数:
- */
- Json::Value lexer(
- std::string const & text,
- const std::map<std::string, std::string> & options)
- {
- Json::Value data;
-
- data["text"] = text;
- std::map<std::string, std::string>::const_iterator it;
- for(it=options.begin(); it!=options.end(); it++)
- {
- data[it->first] = it->second;
- }
- Json::Value result =
- this->request(_lexer, null, data.toStyledString(), null);
- return result;
- }
-
- /**
- * wordembedding
- * 词向量表示接口提供中文词向量的查询功能。
- * @param word 文本内容(UTF8编码),最大64字节
- * options 可选参数:
- */
- Json::Value wordembedding(
- std::string const & word,
- const std::map<std::string, std::string> & options)
- {
- Json::Value data;
-
- data["word"] = word;
- std::map<std::string, std::string>::const_iterator it;
- for(it=options.begin(); it!=options.end(); it++)
- {
- data[it->first] = it->second;
- }
- Json::Value result =
- this->request(_wordembedding, null, data.toStyledString(), null);
- return result;
- }
-
- /**
- * depparser
- * 词向量表示接口提供中文词向量的查询功能。
- * @param text 待分析文本(目前仅支持UTF8编码),长度不超过256字节
- * options 可选参数:
- * mode 模型选择。默认值为0,可选值mode=0(对应web模型);mode=1(对应query模型)
- */
- Json::Value depparser(
- std::string const & text,
- const std::map<std::string, std::string> & options)
- {
- Json::Value data;
-
- data["text"] = text;
- std::map<std::string, std::string>::const_iterator it;
- for(it=options.begin(); it!=options.end(); it++)
- {
- data[it->first] = it->second;
- }
- Json::Value result =
- this->request(_depparser, null, data.toStyledString(), null);
- return result;
- }
-
- /**
- * dnnlm_cn
- * 中文DNN语言模型接口用于输出切词结果并给出每个词在句子中的概率值,判断一句话是否符合语言表达习惯。
- * @param text 文本内容(UTF8编码),最大10240字节,不需要切词
- * options 可选参数:
- */
- Json::Value dnnlm_cn(
- std::string const & text,
- const std::map<std::string, std::string> & options)
- {
- Json::Value data;
-
- data["text"] = text;
- std::map<std::string, std::string>::const_iterator it;
- for(it=options.begin(); it!=options.end(); it++)
- {
- data[it->first] = it->second;
- }
- Json::Value result =
- this->request(_dnnlm_cn, null, data.toStyledString(), null);
- return result;
- }
-
- /**
- * word_sim_embedding
- * 输入两个词,得到两个词的相似度结果。
- * @param word_1 词1(UTF8编码),最大64字节
- * @param word_2 词1(UTF8编码),最大64字节
- * options 可选参数:
- * mode 预留字段,可选择不同的词义相似度模型。默认值为0,目前仅支持mode=0
- */
- Json::Value word_sim_embedding(
- std::string const & word_1,
- std::string const & word_2,
- const std::map<std::string, std::string> & options)
- {
- Json::Value data;
-
- data["word_1"] = word_1;
- data["word_2"] = word_2;
- std::map<std::string, std::string>::const_iterator it;
- for(it=options.begin(); it!=options.end(); it++)
- {
- data[it->first] = it->second;
- }
- Json::Value result =
- this->request(_word_sim_embedding, null, data.toStyledString(), null);
- return result;
- }
-
- /**
- * simnet
- * 短文本相似度接口用来判断两个文本的相似度得分。
- * @param text_1 待比较文本1(UTF8编码),最大512字节
- * @param text_2 待比较文本2(UTF8编码),最大512字节
- * options 可选参数:
- * model 默认为"BOW",可选"BOW"、"CNN"与"GRNN"
- */
- Json::Value simnet(
- std::string const & text_1,
- std::string const & text_2,
- const std::map<std::string, std::string> & options)
- {
- Json::Value data;
-
- data["text_1"] = text_1;
- data["text_2"] = text_2;
- std::map<std::string, std::string>::const_iterator it;
- for(it=options.begin(); it!=options.end(); it++)
- {
- data[it->first] = it->second;
- }
- Json::Value result =
- this->request(_simnet, null, data.toStyledString(), null);
- return result;
- }
-
- /**
- * comment_tag
- * 评论观点抽取接口用来提取一条评论句子的关注点和评论观点,并输出评论观点标签及评论观点极性。
- * @param text 评论内容(UTF8编码),最大10240字节
- * options 可选参数:
- * type 评论行业类型,默认为4(餐饮美食)
- */
- Json::Value comment_tag(
- std::string const & text,
- const std::map<std::string, std::string> & options)
- {
- Json::Value data;
-
- data["text"] = text;
- std::map<std::string, std::string>::const_iterator it;
- for(it=options.begin(); it!=options.end(); it++)
- {
- data[it->first] = it->second;
- }
- Json::Value result =
- this->request(_comment_tag, null, data.toStyledString(), null);
- return result;
- }
-
- /**
- * sentiment_classify
- * 对包含主观观点信息的文本进行情感极性类别(积极、消极、中性)的判断,并给出相应的置信度。
- * @param text 文本内容(UTF8编码),最大102400字节
- * options 可选参数:
- */
- Json::Value sentiment_classify(
- std::string const & text,
- const std::map<std::string, std::string> & options)
- {
- Json::Value data;
-
- data["text"] = text;
- std::map<std::string, std::string>::const_iterator it;
- for(it=options.begin(); it!=options.end(); it++)
- {
- data[it->first] = it->second;
- }
- Json::Value result =
- this->request(_sentiment_classify, null, data.toStyledString(), null);
- return result;
- }
-
- };
- }
- #endif
|