|
|
@@ -1,13 +1,8 @@
|
|
1
|
1
|
package api.service.quality.impl;
|
|
2
|
|
-
|
|
3
|
|
-import api.entity.database.call.Translate;
|
|
4
|
|
-import api.entity.database.patient.Label;
|
|
5
|
|
-import api.entity.input.quality.RepTranslate;
|
|
6
|
|
-import api.entity.view.quality.ExcelSentiment;
|
|
|
2
|
+import api.entity.view.quality.TextSample;
|
|
7
|
3
|
import api.service.call.ITranslateService;
|
|
8
|
4
|
import api.service.patient.ILabelService;
|
|
9
|
5
|
import api.service.quality.IQualityModelService;
|
|
10
|
|
-
|
|
11
|
6
|
import opennlp.tools.doccat.*;
|
|
12
|
7
|
import opennlp.tools.tokenize.SimpleTokenizer;
|
|
13
|
8
|
import opennlp.tools.util.CollectionObjectStream;
|
|
|
@@ -15,14 +10,30 @@ import opennlp.tools.util.ObjectStream;
|
|
15
|
10
|
import opennlp.tools.util.model.ModelUtil;
|
|
16
|
11
|
import org.apache.poi.ss.usermodel.*;
|
|
17
|
12
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
18
|
|
-import org.apache.sis.util.resources.Vocabulary;
|
|
19
|
13
|
|
|
|
14
|
+import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
|
|
|
15
|
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
|
|
|
16
|
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
|
|
17
|
+import org.deeplearning4j.nn.conf.layers.EmbeddingLayer;
|
|
|
18
|
+import org.deeplearning4j.nn.conf.layers.LSTM;
|
|
|
19
|
+import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
|
|
|
20
|
+import org.deeplearning4j.nn.weights.WeightInit;
|
|
|
21
|
+import org.nd4j.evaluation.classification.Evaluation;
|
|
|
22
|
+import org.nd4j.linalg.activations.Activation;
|
|
|
23
|
+
|
|
|
24
|
+import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
|
|
25
|
+import org.nd4j.linalg.learning.config.Adam;
|
|
|
26
|
+import org.nd4j.linalg.lossfunctions.LossFunctions;
|
|
20
|
27
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
21
|
28
|
import org.springframework.stereotype.Service;
|
|
22
|
29
|
import org.springframework.transaction.annotation.Transactional;
|
|
23
|
30
|
import java.io.*;
|
|
24
|
31
|
import java.util.*;
|
|
25
|
32
|
|
|
|
33
|
+
|
|
|
34
|
+
|
|
|
35
|
+
|
|
|
36
|
+
|
|
26
|
37
|
@Transactional
|
|
27
|
38
|
@Service
|
|
28
|
39
|
public class QualityModelServiceImpl implements IQualityModelService {
|
|
|
@@ -58,7 +69,7 @@ public class QualityModelServiceImpl implements IQualityModelService {
|
|
58
|
69
|
);
|
|
59
|
70
|
// 3. 训练模型
|
|
60
|
71
|
DoccatModel model = DocumentCategorizerME.train(
|
|
61
|
|
- "en",
|
|
|
72
|
+ "zh",
|
|
62
|
73
|
sampleStream,
|
|
63
|
74
|
ModelUtil.createDefaultTrainingParameters(),
|
|
64
|
75
|
factory
|
|
|
@@ -80,7 +91,14 @@ public class QualityModelServiceImpl implements IQualityModelService {
|
|
80
|
91
|
@Override
|
|
81
|
92
|
public String OpenNLPDetection(String text)
|
|
82
|
93
|
{
|
|
83
|
|
- return GetSentiment(text);
|
|
|
94
|
+ if(isNumeric(text))
|
|
|
95
|
+ {
|
|
|
96
|
+ return "中立";
|
|
|
97
|
+ }
|
|
|
98
|
+ else
|
|
|
99
|
+ {
|
|
|
100
|
+ return GetSentiment(text);
|
|
|
101
|
+ }
|
|
84
|
102
|
}
|
|
85
|
103
|
private String GetSentiment(String text) {
|
|
86
|
104
|
try {
|
|
|
@@ -103,6 +121,13 @@ public class QualityModelServiceImpl implements IQualityModelService {
|
|
103
|
121
|
return e.getMessage();
|
|
104
|
122
|
}
|
|
105
|
123
|
}
|
|
|
124
|
+ //判断是否为纯数字
|
|
|
125
|
+ public boolean isNumeric(String str) {
|
|
|
126
|
+ if (str == null || str.isEmpty()) {
|
|
|
127
|
+ return false;
|
|
|
128
|
+ }
|
|
|
129
|
+ return str.matches("[0-9]+"); // 匹配1个或多个数字
|
|
|
130
|
+ }
|
|
106
|
131
|
|
|
107
|
132
|
private static List<DocumentSample> loadExcelData(String filePath) {
|
|
108
|
133
|
List<DocumentSample> samples = new ArrayList<>();
|
|
|
@@ -150,21 +175,6 @@ public class QualityModelServiceImpl implements IQualityModelService {
|
|
150
|
175
|
|
|
151
|
176
|
|
|
152
|
177
|
|
|
153
|
|
- // 文本向量化方法(需与训练时完全一致)
|
|
154
|
|
- private double[] vectorizeText1(String text) {
|
|
155
|
|
- // 此处应实现与训练一致的向量化逻辑
|
|
156
|
|
- // 示例:简单词频统计(需替换为实际处理逻辑)
|
|
157
|
|
- double[] features = new double[100];
|
|
158
|
|
- String[] words = text.toLowerCase().split("\\s+");
|
|
159
|
|
- for (String word : words) {
|
|
160
|
|
- int index = Math.abs(word.hashCode()) % 100;
|
|
161
|
|
- features[index] += 1.0;
|
|
162
|
|
- }
|
|
163
|
|
- return features;
|
|
164
|
|
- }
|
|
165
|
|
-
|
|
166
|
|
-
|
|
167
|
|
-
|
|
168
|
178
|
|
|
169
|
179
|
|
|
170
|
180
|
|