|
@@ -0,0 +1,380 @@
|
|
|
|
|
+package com.giantan.data.mds.service.impl;
|
|
|
|
|
+
|
|
|
|
|
+import cnnlp.util.MultiValueHashMap;
|
|
|
|
|
+import com.giantan.data.common.model.CollsSearchRequest;
|
|
|
|
|
+import com.giantan.data.index.dto.DocSearchResp;
|
|
|
|
|
+import com.giantan.data.mds.repository.MdDynamicRepository;
|
|
|
|
|
+import com.giantan.data.mds.repository.MdIndexer;
|
|
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
|
|
+
|
|
|
|
|
+import java.util.*;
|
|
|
|
|
+import java.util.stream.Collectors;
|
|
|
|
|
+
|
|
|
|
|
+@Service
|
|
|
|
|
+public class MdCollsSearchService {
|
|
|
|
|
+ private static final org.slf4j.Logger log
|
|
|
|
|
+ = org.slf4j.LoggerFactory.getLogger(MdCollsSearchService.class);
|
|
|
|
|
+
|
|
|
|
|
+ @Autowired
|
|
|
|
|
+ MdCollectionsService qaCollectionService;
|
|
|
|
|
+
|
|
|
|
|
+ @Autowired
|
|
|
|
|
+ MdDynamicRepository qaDocRepository;
|
|
|
|
|
+
|
|
|
|
|
+ @Autowired
|
|
|
|
|
+ MdIndexer hybridSearch;
|
|
|
|
|
+
|
|
|
|
|
+// @Autowired
|
|
|
|
|
+// GoeSimService goeSimService;
|
|
|
|
|
+
|
|
|
|
|
+// @Autowired
|
|
|
|
|
+// QaDocsService qaDocsService;
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ public List<DocSearchResp> federatedSearch(CollsSearchRequest req) throws Throwable {
|
|
|
|
|
+ List<String> colls = req.getCollections();
|
|
|
|
|
+ if (colls == null || colls.isEmpty()) {
|
|
|
|
|
+ throw new RuntimeException("No collections found");
|
|
|
|
|
+ }
|
|
|
|
|
+ List<DocSearchResp> rets = new ArrayList<>();
|
|
|
|
|
+
|
|
|
|
|
+ MultiValueHashMap mapping = new MultiValueHashMap();
|
|
|
|
|
+
|
|
|
|
|
+ List<String> collNames = new ArrayList<>();
|
|
|
|
|
+ for (String coll : colls) {
|
|
|
|
|
+ String collId = getStrOfCollId(coll);
|
|
|
|
|
+ String qasCollName = qaDocRepository.getMappingCollection(collId);
|
|
|
|
|
+ collNames.add(qasCollName);
|
|
|
|
|
+ mapping.put(qasCollName, collId);
|
|
|
|
|
+ }
|
|
|
|
|
+ req.setCollections(collNames);
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, List<DocSearchResp>> rs = hybridSearch.federatedSearch(req.toMap());
|
|
|
|
|
+ if (rs != null && !rs.isEmpty()) {
|
|
|
|
|
+ String q = req.getQuery();
|
|
|
|
|
+ rs.forEach((k, v) -> {
|
|
|
|
|
+ String[] collIds = mapping.get(k);
|
|
|
|
|
+ try {
|
|
|
|
|
+ List<DocSearchResp> rs1 = getEntitiesBySearch2(collIds, q, v);
|
|
|
|
|
+ if (rs1 != null && !rs1.isEmpty()) {
|
|
|
|
|
+ rets.addAll(rs1);
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Throwable e) {
|
|
|
|
|
+ throw new RuntimeException(e);
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (rets != null && !rets.isEmpty()) {
|
|
|
|
|
+ // 按 score 降序
|
|
|
|
|
+ rets.sort((a, b) -> {
|
|
|
|
|
+ double sa = toScore(a.getScore());
|
|
|
|
|
+ double sb = toScore(b.getScore());
|
|
|
|
|
+ return Double.compare(sb, sa);
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ return rets;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String toCollStr(Object o){
|
|
|
|
|
+ if (o instanceof String) {
|
|
|
|
|
+ return (String) o;
|
|
|
|
|
+ }else if (o instanceof Double) {
|
|
|
|
|
+ Double d = (Double) o;
|
|
|
|
|
+ String s = String.valueOf(d.longValue());
|
|
|
|
|
+ return s;
|
|
|
|
|
+ }else if (o instanceof Integer) {
|
|
|
|
|
+ return String.valueOf(o);
|
|
|
|
|
+ }
|
|
|
|
|
+ return String.valueOf(o);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ protected List<DocSearchResp> getEntitiesBySearch2(String[] collIds, String q, List<DocSearchResp> resps) throws Throwable {
|
|
|
|
|
+ if (resps.isEmpty()) {
|
|
|
|
|
+ return List.of();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ Set<String> collSet = new HashSet<>(Arrays.asList(collIds));
|
|
|
|
|
+ List<DocSearchResp> rets = new ArrayList<>();
|
|
|
|
|
+ for (DocSearchResp resp : resps) {
|
|
|
|
|
+ Map<String, Object> metadata = resp.getMetadata();
|
|
|
|
|
+
|
|
|
|
|
+ if (metadata != null) {
|
|
|
|
|
+ Object o = metadata.get(MdIndexer.COLL_ID);
|
|
|
|
|
+ if (o != null) {
|
|
|
|
|
+ String collId1 = toCollStr(o);
|
|
|
|
|
+ if (collSet.contains(collId1)) {
|
|
|
|
|
+ String collectionName = qaCollectionService.getCollectionName(toDocId(collId1));
|
|
|
|
|
+ metadata.put(MdIndexer.COLL_NAME, collectionName);
|
|
|
|
|
+ rets.add(resp);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return rets;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+// public List<GBaseKeyValue> hybridSearch(CollsSearchRequest query) throws Throwable {
|
|
|
|
|
+// List<String> colls = query.getCollections();
|
|
|
|
|
+// if (colls == null || colls.isEmpty()) {
|
|
|
|
|
+// //throw new RuntimeException("No collections found");
|
|
|
|
|
+// colls = new ArrayList<>();
|
|
|
|
|
+// List<GBaseKeyValue> allCollections = qaCollectionService.getAllCollections();
|
|
|
|
|
+// for (GBaseKeyValue coll : allCollections) {
|
|
|
|
|
+// colls.add(coll.getName());
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+//
|
|
|
|
|
+// List<GBaseKeyValue> rs = new ArrayList<>();
|
|
|
|
|
+//
|
|
|
|
|
+// for (String coll : colls) {
|
|
|
|
|
+// Map<String, Object> queryMap = query.toMap();
|
|
|
|
|
+// List<GBaseKeyValue> rs1 = hybridSearch(coll, queryMap);
|
|
|
|
|
+// if (rs1 != null) {
|
|
|
|
|
+// rs.addAll(rs1);
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// if (rs != null && !rs.isEmpty()) {
|
|
|
|
|
+// // 按 score 降序
|
|
|
|
|
+// rs.sort((a, b) -> {
|
|
|
|
|
+// double sa = toScore(a.get("score"));
|
|
|
|
|
+// double sb = toScore(b.get("score"));
|
|
|
|
|
+// return Double.compare(sb, sa);
|
|
|
|
|
+// });
|
|
|
|
|
+// }
|
|
|
|
|
+// return rs;
|
|
|
|
|
+// }
|
|
|
|
|
+
|
|
|
|
|
+ private double toScore(Object v) {
|
|
|
|
|
+ if (v == null) return Double.NEGATIVE_INFINITY;
|
|
|
|
|
+ if (v instanceof Number) return ((Number) v).doubleValue();
|
|
|
|
|
+ return Double.parseDouble(v.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+// public List<GBaseKeyValue> hybridSearch(String coll, Map<String, Object> query) throws Throwable {
|
|
|
|
|
+// String collId = getStrOfCollId(coll);
|
|
|
|
|
+//
|
|
|
|
|
+// String qasCollName = qaDocRepository.getMappingCollection(collId);
|
|
|
|
|
+//
|
|
|
|
|
+// // 这里要判断 collection 是不是 index 到一个同一个milvus的collection,如果 是的话,就要加上 qasName 做过滤
|
|
|
|
|
+// boolean isIcludeCollName = true;
|
|
|
|
|
+// query.put(HybridIndexer.TABLE_ID, collId);
|
|
|
|
|
+// List<DocSearchResp> resps = hybridSearch.hybridSearch(qasCollName, query, isIcludeCollName);
|
|
|
|
|
+//
|
|
|
|
|
+// String q = query.get("query").toString();
|
|
|
|
|
+// List<GBaseKeyValue> rets = getEntitiesBySearch(collId, q, resps);
|
|
|
|
|
+// return rets;
|
|
|
|
|
+// }
|
|
|
|
|
+
|
|
|
|
|
+ protected String getStrOfCollId(String coll) {
|
|
|
|
|
+ int id = qaCollectionService.getCollectionId(coll);
|
|
|
|
|
+ if (id <= 0) {
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ return Integer.toString(id);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private int toDocId(Object o) {
|
|
|
|
|
+ if (o instanceof Integer) {
|
|
|
|
|
+ return ((Integer) o).intValue();
|
|
|
|
|
+ } else {
|
|
|
|
|
+ return Integer.parseInt(o.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+// private List<GBaseKeyValue> getCollInfos(List<String> colls) throws Throwable {
|
|
|
|
|
+// List<GBaseKeyValue> allCollections = new ArrayList<GBaseKeyValue>();
|
|
|
|
|
+// if (colls.size() == 1 && (colls.get(0).equals("*") || colls.get(0).equalsIgnoreCase("all"))) {
|
|
|
|
|
+// allCollections = qaCollectionService.getAllCollections();
|
|
|
|
|
+// } else {
|
|
|
|
|
+// for (int i = 0; i < colls.size(); i++) {
|
|
|
|
|
+// GBaseKeyValue collObj = qaCollectionService.getKvByName(colls.get(i));
|
|
|
|
|
+// allCollections.add(collObj);
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// return allCollections;
|
|
|
|
|
+// }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ private void addIntList(List<Integer> is, Object v) {
|
|
|
|
|
+ if (v == null) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (v instanceof Integer) {
|
|
|
|
|
+ int id1 = (Integer) v;
|
|
|
|
|
+ if (!is.contains(id1)) {
|
|
|
|
|
+ is.add(id1);
|
|
|
|
|
+ }
|
|
|
|
|
+ } else if (v instanceof String) {
|
|
|
|
|
+ int id1 = Integer.parseInt((String) v);
|
|
|
|
|
+ if (!is.contains(id1)) {
|
|
|
|
|
+ is.add(id1);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+// protected List<GBaseKeyValue> getEntitiesBySearch(String collId, String q, List<DocSearchResp> resps) throws Throwable {
|
|
|
|
|
+// if (resps.isEmpty()) {
|
|
|
|
|
+// return List.of();
|
|
|
|
|
+// }
|
|
|
|
|
+// List<Integer> ids = new ArrayList<>();
|
|
|
|
|
+// List<GBaseKeyValue> rets = new ArrayList<>();
|
|
|
|
|
+// Map<Integer, Double> scoreMap = new HashMap<>();
|
|
|
|
|
+//
|
|
|
|
|
+// for (DocSearchResp resp : resps) {
|
|
|
|
|
+// Map<String, Object> metadata = resp.getMetadata();
|
|
|
|
|
+//
|
|
|
|
|
+// if (metadata != null) {
|
|
|
|
|
+// Object o = metadata.get(QaIndexer.COLL_ID);
|
|
|
|
|
+// if (o != null && o instanceof String collId1) {
|
|
|
|
|
+// if (collId.equals(collId1)) {
|
|
|
|
|
+// o = metadata.get(QaIndexer.DOC_ID);
|
|
|
|
|
+// addIntList(ids, o);
|
|
|
|
|
+// scoreMap.putIfAbsent(ids.get(ids.size() - 1), resp.getScore());
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// if (!ids.isEmpty()) {
|
|
|
|
|
+// List<GBaseKeyValue> rs2 = qaDocRepository.findAllByIds(collId, ids);
|
|
|
|
|
+// if (rs2 != null) {
|
|
|
|
|
+// for (GBaseKeyValue ro : rs2) {
|
|
|
|
|
+// // 用 oe 计算相似度
|
|
|
|
|
+// GoeSimService.Pair<String, Double> r1 = getOeSimilarity(q, ro);
|
|
|
|
|
+//
|
|
|
|
|
+// double score1 = scoreMap.get(ro.getIntId());
|
|
|
|
|
+//
|
|
|
|
|
+// if (score1 < r1.right()) {
|
|
|
|
|
+// score1 = r1.right();
|
|
|
|
|
+// }
|
|
|
|
|
+// ro.put("score", score1);
|
|
|
|
|
+// ro.put("matched", r1.left());
|
|
|
|
|
+// rets.add(ro);
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+//
|
|
|
|
|
+// return rets;
|
|
|
|
|
+// }
|
|
|
|
|
+
|
|
|
|
|
+// private GoeSimService.Pair<String, Double> getOeSimilarity(String q, GBaseKeyValue qa) {
|
|
|
|
|
+// List<String> ls = new ArrayList<>();
|
|
|
|
|
+// Object o = qa.get("name");
|
|
|
|
|
+// if (o != null) {
|
|
|
|
|
+// ls.add(o.toString());
|
|
|
|
|
+// }
|
|
|
|
|
+// Object o1 = qa.get("altlabels");
|
|
|
|
|
+// if (o1 != null && o1 instanceof List) {
|
|
|
|
|
+// List<String> ls1 = (List<String>) o1;
|
|
|
|
|
+// ls.addAll(ls1);
|
|
|
|
|
+// }
|
|
|
|
|
+// GoeSimService.Pair r = goeSimService.search(q, ls);
|
|
|
|
|
+// return r;
|
|
|
|
|
+// }
|
|
|
|
|
+
|
|
|
|
|
+// public List<GBaseKeyValue> federatedSearch(CollsSearchRequest req) throws Throwable {
|
|
|
|
|
+// List<String> colls = req.getCollections();
|
|
|
|
|
+// if (colls == null || colls.isEmpty()) {
|
|
|
|
|
+// throw new RuntimeException("No collections found");
|
|
|
|
|
+// }
|
|
|
|
|
+// List<GBaseKeyValue> rets = new ArrayList<>();
|
|
|
|
|
+//
|
|
|
|
|
+// MultiValueHashMap mapping = new MultiValueHashMap();
|
|
|
|
|
+//
|
|
|
|
|
+// List<String> collNames = new ArrayList<>();
|
|
|
|
|
+// for (String coll : colls) {
|
|
|
|
|
+// String collId = getStrOfCollId(coll);
|
|
|
|
|
+// String qasCollName = qaDocRepository.getMappingCollection(collId);
|
|
|
|
|
+// collNames.add(qasCollName);
|
|
|
|
|
+// mapping.put(qasCollName, collId);
|
|
|
|
|
+// }
|
|
|
|
|
+// req.setCollections(collNames);
|
|
|
|
|
+//
|
|
|
|
|
+// Map<String, List<DocSearchResp>> rs = hybridSearch.federatedSearch(req.toMap());
|
|
|
|
|
+// if (rs != null && !rs.isEmpty()) {
|
|
|
|
|
+// String q = req.getQuery();
|
|
|
|
|
+// rs.forEach((k, v) -> {
|
|
|
|
|
+// String[] collIds = mapping.get(k);
|
|
|
|
|
+// try {
|
|
|
|
|
+// List<GBaseKeyValue> rs1 = getEntitiesBySearch2(collIds, q, v);
|
|
|
|
|
+// if (rs1 != null && !rs1.isEmpty()) {
|
|
|
|
|
+// rets.addAll(rs1);
|
|
|
|
|
+// }
|
|
|
|
|
+// } catch (Throwable e) {
|
|
|
|
|
+// throw new RuntimeException(e);
|
|
|
|
|
+// }
|
|
|
|
|
+// });
|
|
|
|
|
+// }
|
|
|
|
|
+//
|
|
|
|
|
+// if (rets != null && !rets.isEmpty()) {
|
|
|
|
|
+// // 按 score 降序
|
|
|
|
|
+// rets.sort((a, b) -> {
|
|
|
|
|
+// double sa = toScore(a.get("score"));
|
|
|
|
|
+// double sb = toScore(b.get("score"));
|
|
|
|
|
+// return Double.compare(sb, sa);
|
|
|
|
|
+// });
|
|
|
|
|
+// }
|
|
|
|
|
+// return rets;
|
|
|
|
|
+// }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+// protected List<GBaseKeyValue> getEntitiesBySearch2(String[] collIds, String q, List<DocSearchResp> resps) throws Throwable {
|
|
|
|
|
+// if (resps.isEmpty()) {
|
|
|
|
|
+// return List.of();
|
|
|
|
|
+// }
|
|
|
|
|
+//
|
|
|
|
|
+// List<GBaseKeyValue> rets = new ArrayList<>();
|
|
|
|
|
+//
|
|
|
|
|
+// MultiValueHashMap mapping = new MultiValueHashMap();
|
|
|
|
|
+// Set<String> collSet = new HashSet<>(Arrays.asList(collIds));
|
|
|
|
|
+//
|
|
|
|
|
+// Map<String, Double> scoreMap = new HashMap<>();
|
|
|
|
|
+//
|
|
|
|
|
+// for (DocSearchResp resp : resps) {
|
|
|
|
|
+// Map<String, Object> metadata = resp.getMetadata();
|
|
|
|
|
+//
|
|
|
|
|
+// if (metadata != null) {
|
|
|
|
|
+// Object o = metadata.get(QaIndexer.COLL_ID);
|
|
|
|
|
+// if (o != null && o instanceof String collId1) {
|
|
|
|
|
+//
|
|
|
|
|
+// if (collSet.contains(collId1)) {
|
|
|
|
|
+// o = metadata.get(QaIndexer.DOC_ID);
|
|
|
|
|
+// //addIntList(ids, o);
|
|
|
|
|
+// //scoreMap.putIfAbsent(ids.get(ids.size() - 1), resp.getScore());
|
|
|
|
|
+// String did1 = o.toString();
|
|
|
|
|
+// mapping.put(collId1, o.toString());
|
|
|
|
|
+// scoreMap.putIfAbsent(collId1 + ":" + did1, resp.getScore());
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+// Set<Map.Entry<String, String[]>> ens = mapping.entrySet();
|
|
|
|
|
+// for (Map.Entry<String, String[]> e : ens) {
|
|
|
|
|
+// String[] value = e.getValue();
|
|
|
|
|
+// List<Integer> idList = Arrays.stream(value)
|
|
|
|
|
+// .map(Integer::valueOf)
|
|
|
|
|
+// .collect(Collectors.toList());
|
|
|
|
|
+//
|
|
|
|
|
+// String collId = e.getKey();
|
|
|
|
|
+// List<GBaseKeyValue> rs2 = qaDocRepository.findAllByIds(collId, idList);
|
|
|
|
|
+// if (rs2 != null) {
|
|
|
|
|
+// for (GBaseKeyValue ro : rs2) {
|
|
|
|
|
+// // 用 oe 计算相似度
|
|
|
|
|
+// GoeSimService.Pair<String, Double> r1 = getOeSimilarity(q, ro);
|
|
|
|
|
+// double score1 = scoreMap.get(collId + ":" + ro.getIntId());
|
|
|
|
|
+//
|
|
|
|
|
+// if (score1 < r1.right()) {
|
|
|
|
|
+// score1 = r1.right();
|
|
|
|
|
+// }
|
|
|
|
|
+// ro.put("score", score1);
|
|
|
|
|
+// ro.put("matched", r1.left());
|
|
|
|
|
+// rets.add(ro);
|
|
|
|
|
+// }
|
|
|
|
|
+// }
|
|
|
|
|
+//
|
|
|
|
|
+// }
|
|
|
|
|
+// return rets;
|
|
|
|
|
+// }
|
|
|
|
|
+}
|