Przeglądaj źródła

增加了HybridSearch(连Qas Server),完善Task框架

dwp 7 miesięcy temu
rodzic
commit
674eb8d364
61 zmienionych plików z 2390 dodań i 875 usunięć
  1. 49 0
      gtbook/src/test/java/org/cnnlp/data/util/WikidataParentFetcher.java
  2. 74 0
      gtbook/src/test/java/org/cnnlp/data/util/WikidataTranslator.java
  3. 2 2
      server/pom.xml
  4. 0 49
      server/src/main/java/com/giantan/ai/common/config/DataSourceConfig.java
  5. 0 66
      server/src/main/java/com/giantan/ai/common/config/DataSourceProperties.java
  6. 1 0
      server/src/main/java/com/giantan/ai/common/exception/GlobalExceptionHandler.java
  7. 17 0
      server/src/main/java/com/giantan/ai/common/util/JsonUtil.java
  8. 131 0
      server/src/main/java/com/giantan/data/index/HybridSearch.java
  9. 52 0
      server/src/main/java/com/giantan/data/index/HybridSearch2Controller.java
  10. 108 0
      server/src/main/java/com/giantan/data/index/HybridSearchController.java
  11. 19 0
      server/src/main/java/com/giantan/data/index/IHybridSearch.java
  12. 84 0
      server/src/main/java/com/giantan/data/index/IndexUtils.java
  13. 1 1
      server/src/main/java/com/giantan/data/index/dto/DocReq.java
  14. 56 0
      server/src/main/java/com/giantan/data/index/dto/DocResp.java
  15. 35 0
      server/src/main/java/com/giantan/data/index/dto/DocSearchResp.java
  16. 0 10
      server/src/main/java/com/giantan/data/kvs/kvstore/IDataSource.java
  17. 0 26
      server/src/main/java/com/giantan/data/kvs/pg/AgtypeAwareDataSource.java
  18. 0 31
      server/src/main/java/com/giantan/data/kvs/pg/AgtypeConnectionProvider.java
  19. 0 53
      server/src/main/java/com/giantan/data/kvs/pg/CustomHikariDataSource.java
  20. 0 105
      server/src/main/java/com/giantan/data/kvs/pg/GSysTables.java
  21. 0 49
      server/src/main/java/com/giantan/data/kvs/pg/PgDataSource.java
  22. 1 1
      server/src/main/java/com/giantan/data/kvs/repository/GDynamicRepository.java
  23. 0 12
      server/src/main/java/com/giantan/data/kvs/repository/GRepository.java
  24. 3 3
      server/src/main/java/com/giantan/data/mds/MdsApplication.java
  25. 45 16
      server/src/main/java/com/giantan/data/mds/bot/GChatClient.java
  26. 86 29
      server/src/main/java/com/giantan/data/mds/chunk/DynamicChunkRepository.java
  27. 44 30
      server/src/main/java/com/giantan/data/mds/chunk/MdChunkRepository.java
  28. 14 24
      server/src/main/java/com/giantan/data/mds/config/TaskConfiguration.java
  29. 24 0
      server/src/main/java/com/giantan/data/mds/controller/ChunkController.java
  30. 9 2
      server/src/main/java/com/giantan/data/mds/controller/DownloadController.java
  31. 51 0
      server/src/main/java/com/giantan/data/mds/controller/StatusController.java
  32. 41 9
      server/src/main/java/com/giantan/data/mds/controller/TaskController.java
  33. 0 12
      server/src/main/java/com/giantan/data/mds/service/IHybridSearch.java
  34. 5 2
      server/src/main/java/com/giantan/data/mds/service/IMdChunksService.java
  35. 0 69
      server/src/main/java/com/giantan/data/mds/service/impl/HybridSearch.java
  36. 47 2
      server/src/main/java/com/giantan/data/mds/service/impl/MdChunksService.java
  37. 2 2
      server/src/main/java/com/giantan/data/mds/service/impl/MdDocsService.java
  38. 57 0
      server/src/main/java/com/giantan/data/mds/task/PersistentTaskService.java
  39. 14 13
      server/src/main/java/com/giantan/data/mds/task/impl/BaseTaskHandler.java
  40. 281 5
      server/src/main/java/com/giantan/data/mds/task/impl/ChunksTaskHandler.java
  41. 0 13
      server/src/main/java/com/giantan/data/mds/task/impl/KeywordsTaskHandler.java
  42. 166 0
      server/src/main/java/com/giantan/data/mds/task/impl/MdsTaskHandler.java
  43. 15 2
      server/src/main/java/com/giantan/data/mds/task/impl/SliceTaskHandler.java
  44. 16 0
      server/src/main/java/com/giantan/data/tasks/IPersistentTaskService.java
  45. 9 1
      server/src/main/java/com/giantan/data/tasks/ITaskManager.java
  46. 14 0
      server/src/main/java/com/giantan/data/tasks/Readme.java
  47. 74 49
      server/src/main/java/com/giantan/data/tasks/TaskContext.java
  48. 8 3
      server/src/main/java/com/giantan/data/tasks/TaskEvent.java
  49. 10 32
      server/src/main/java/com/giantan/data/tasks/TaskEventListener.java
  50. 60 9
      server/src/main/java/com/giantan/data/tasks/TaskManager.java
  51. 12 0
      server/src/main/java/com/giantan/data/tasks/TaskObjectStatus.java
  52. 66 0
      server/src/main/java/com/giantan/data/tasks/TaskOperationsStatus.java
  53. 1 1
      server/src/main/java/com/giantan/data/tasks/TaskType.java
  54. 187 3
      server/src/main/java/com/giantan/data/tasks/repository/DynamicTaskRepository.java
  55. 0 30
      server/src/main/java/com/giantan/data/tasks/repository/JsonMapper2.java
  56. 18 1
      server/src/main/java/com/giantan/data/tasks/repository/PersistentTaskManager.java
  57. 153 0
      server/src/main/java/com/giantan/data/tasks/repository/TaskConverter.java
  58. 5 3
      server/src/main/java/com/giantan/data/tasks/repository/TaskStatusHistory.java
  59. 95 95
      server/src/main/java/com/giantan/data/tasks/repository/TaskStatusHistoryRepository.java
  60. 105 0
      server/src/test/java/com/giantan/data/mds/MapDoubleToInt.java
  61. 23 10
      server/src/test/java/com/giantan/data/mds/MdsApplicationTests.java

+ 49 - 0
gtbook/src/test/java/org/cnnlp/data/util/WikidataParentFetcher.java

@@ -0,0 +1,49 @@
+package org.cnnlp.data.util;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.InputStream;
+import java.net.HttpURLConnection;
+import java.net.URLEncoder;
+import java.net.URL;
+
+public class WikidataParentFetcher {
+
+    private static final ObjectMapper MAPPER = new ObjectMapper();
+
+    public static void getAllParents(String qid) {
+        try {
+            String sparql = String.format("""
+                SELECT ?parent ?parentLabel WHERE {
+                  wd:%s wdt:P279+ ?parent.
+                  SERVICE wikibase:label { bd:serviceParam wikibase:language "zh,en". }
+                }
+            """, qid);
+
+            String endpoint = "https://query.wikidata.org/sparql?query=" + URLEncoder.encode(sparql, "UTF-8") + "&format=json";
+
+            HttpURLConnection conn = (HttpURLConnection) new URL(endpoint).openConnection();
+            conn.setRequestProperty("User-Agent", "Java Wikidata Client");
+            conn.setRequestMethod("GET");
+
+            try (InputStream is = conn.getInputStream()) {
+                JsonNode root = MAPPER.readTree(is);
+                JsonNode bindings = root.path("results").path("bindings");
+
+                for (JsonNode node : bindings) {
+                    String parentUrl = node.path("parent").path("value").asText();
+                    String label = node.path("parentLabel").path("value").asText();
+                    System.out.println(parentUrl + " → " + label);
+                }
+            }
+
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    public static void main(String[] args) {
+        getAllParents("Q106402388"); // 替换为你的实体ID
+    }
+}

+ 74 - 0
gtbook/src/test/java/org/cnnlp/data/util/WikidataTranslator.java

@@ -0,0 +1,74 @@
+package org.cnnlp.data.util;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.InputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+
+public class WikidataTranslator {
+
+    private static final ObjectMapper MAPPER = new ObjectMapper();
+
+    /**
+     * 获取 Wikidata 中文 label
+     *
+     * @param qid Wikidata 实体 ID,例如 Q42
+     * @return 中文 label,如果不存在则返回 null
+     */
+    //"https://www.wikidata.org/w/api.php?action=wbgetentities&ids=%s&languages=zh&format=json",
+    public static String getChineseLabel(String qid) {
+        try {
+            String apiUrl = String.format(
+                    "https://www.wikidata.org/w/api.php?action=wbgetentities&ids=%s&format=json",
+                    qid
+            );
+
+            HttpURLConnection conn = (HttpURLConnection) new URL(apiUrl).openConnection();
+            conn.setRequestMethod("GET");
+            conn.setRequestProperty("User-Agent", "Java Wikidata Client");
+
+            try (InputStream is = conn.getInputStream()) {
+                JsonNode root = MAPPER.readTree(is);
+                JsonNode entities = root.path("entities").path(qid).path("labels").path("zh").path("value");
+                if (!entities.isMissingNode()) {
+                    return entities.asText();
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return null;
+    }
+
+    /**
+     * Fallback: 简单机器翻译示例(可以替换为真实 MT API)
+     */
+    public static String machineTranslate(String text, String targetLang) {
+        // TODO: 调用真实翻译 API
+        return "[翻译]" + text;
+    }
+
+    /**
+     * 综合获取中文 label
+     */
+    public static String getLabel(String qid, String fallbackEnglishLabel) {
+        String zhLabel = getChineseLabel(qid);
+        if (zhLabel != null) {
+            return zhLabel;
+        }
+        // TODO: 可进一步尝试中文维基百科查询
+        return machineTranslate(fallbackEnglishLabel, "zh");
+    }
+
+    public static void main(String[] args) {
+        String qid = "Q42"; // 例如道格拉斯·亚当斯
+        String englishLabel = "Douglas Adams";
+
+        qid = "Q106402388";//"Q112210971";  //United States Army aviation company
+        englishLabel="United States Army aviation company";
+        String zh = getLabel(qid, englishLabel);
+        System.out.println("中文 label: " + zh);
+    }
+}

+ 2 - 2
server/pom.xml

@@ -9,14 +9,14 @@
         <version>1.0.0</version>
     </parent>
 
-    <version>1.6.3</version>
+    <version>2.0.0</version>
     <artifactId>mdserver</artifactId>
 
     <properties>
         <maven.compiler.source>17</maven.compiler.source>
         <maven.compiler.target>17</maven.compiler.target>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <spring.boot.version>3.5.0</spring.boot.version>
+        <spring.boot.version>3.5.4</spring.boot.version>
         <spring-ai.version>1.0.1</spring-ai.version>
     </properties>
 

+ 0 - 49
server/src/main/java/com/giantan/ai/common/config/DataSourceConfig.java

@@ -1,49 +0,0 @@
-package com.giantan.ai.common.config;
-
-import com.giantan.data.kvs.pg.AgtypeAwareDataSource;
-import com.giantan.data.kvs.kvstore.IDataSource;
-import com.zaxxer.hikari.HikariConfig;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.context.annotation.Bean;
-import org.springframework.context.annotation.Configuration;
-
-import javax.sql.DataSource;
-import java.sql.Connection;
-import java.sql.SQLException;
-
-//@Configuration
-public class DataSourceConfig {
-
-    //@Autowired
-    DataSourceProperties dataSourceProperties;
-
-    public DataSourceConfig() {
-
-    }
-
-    @Bean
-    //@ConfigurationProperties("spring.datasource")
-    public DataSource dataSource() {
-        //return DataSourceBuilder.create().type(HikariDataSource.class).build();
-        HikariConfig config = new HikariConfig();
-        config.setJdbcUrl(dataSourceProperties.getUrl());
-        config.setUsername(dataSourceProperties.getUsername());
-        config.setPassword(dataSourceProperties.getPassword());
-        //config.setConnectionInitSql("CREATE EXTENSION IF NOT EXISTS age;LOAD 'age';SET search_path = ag_catalog, \"$user\", public;");
-        config.setConnectionInitSql("LOAD 'age';SET search_path = ag_catalog, \"$user\", public;");
-
-        //return new CustomHikariDataSource(config);
-        return new AgtypeAwareDataSource(config);
-    }
-
-    @Bean
-    public IDataSource myDataSource() {
-        IDataSource conn = new IDataSource() {
-            @Override
-            public Connection getConnection() throws SQLException {
-                return dataSource().getConnection();
-            }
-        };
-        return conn;
-    }
-}

+ 0 - 66
server/src/main/java/com/giantan/ai/common/config/DataSourceProperties.java

@@ -1,66 +0,0 @@
-package com.giantan.ai.common.config;
-
-//import com.giantan.ai.kvstore.IDataSource;
-//import com.giantan.ai.pg.PgDataSource;
-//import lombok.Data;
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.boot.context.properties.ConfigurationProperties;
-import org.springframework.stereotype.Component;
-
-//import java.sql.Connection;
-//import java.sql.SQLException;
-
-@Component
-@ConfigurationProperties(prefix = "spring.datasource")
-//@Data
-public class DataSourceProperties {
-
-    @Value("${spring.datasource.url}")
-    private String url;
-
-    @Value("${spring.datasource.username}")
-    private String username;
-
-    @Value("${spring.datasource.password}")
-    private String password;
-    public DataSourceProperties() {}
-    public DataSourceProperties(String url, String username, String password) {
-        this.url = url;
-        this.username = username;
-        this.password = password;
-    }
-
-    public String getUrl() {
-        return url;
-    }
-
-    public void setUrl(String url) {
-        this.url = url;
-    }
-
-    public String getUsername() {
-        return username;
-    }
-
-    public void setUsername(String username) {
-        this.username = username;
-    }
-
-    public String getPassword() {
-        return password;
-    }
-
-    public void setPassword(String password) {
-        this.password = password;
-    }
-
-//    public IDataSource getConnection() {
-//        IDataSource conn = new IDataSource() {
-//            @Override
-//            public Connection getConnection() throws SQLException {
-//                return PgDataSource.getAgeConnection();
-//            }
-//        };
-//        return conn;
-//    }
-}

+ 1 - 0
server/src/main/java/com/giantan/ai/common/exception/GlobalExceptionHandler.java

@@ -35,6 +35,7 @@ public class GlobalExceptionHandler {
     public ResponseEntity<R<Object>> handleGenericException(Exception ex) {
         ex.printStackTrace();
         R<Object> response = R.fail(500, "服务器内部错误");
+
         return new ResponseEntity<>(response, HttpStatus.INTERNAL_SERVER_ERROR);
     }
 

+ 17 - 0
server/src/main/java/com/giantan/ai/common/util/JsonUtil.java

@@ -4,12 +4,29 @@ import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 
 public class JsonUtil {
     static ObjectMapper objectMapper = new ObjectMapper();
 
+    public static String toJson(Object obj) {
+        try {
+            return objectMapper.writeValueAsString(obj);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public static <T> T fromJson(String json, TypeReference<T> typeRef) {
+        try {
+            return objectMapper.readValue(json, typeRef);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
     public static String toJsonString(Object obj) throws JsonProcessingException {
         String s = objectMapper.writeValueAsString(obj);
         return s;

+ 131 - 0
server/src/main/java/com/giantan/data/index/HybridSearch.java

@@ -0,0 +1,131 @@
+package com.giantan.data.index;
+
+
+import com.giantan.ai.common.util.JsonUtil;
+import com.giantan.data.index.dto.DocReq;
+import com.giantan.data.index.dto.DocResp;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+@Service
+public class HybridSearch implements IHybridSearch {
+
+    @Value("${qas.url}")
+    String url = "http://120.78.4.46:7387/v1/collections/";
+    //String url = "http://120.78.4.46:7387/v1/embeddings/embed";
+    String clientInfo = "myClient/11.0.2";
+
+    private List<DocResp> toDocResps(Object o) {
+        List<DocResp> ls = new ArrayList<>();
+        if (o != null && o instanceof List) {
+            List<Map<String, Object>> docList = (List<Map<String, Object>>) o;
+            for (Map<String, Object> doc : docList) {
+                Map<String, Object> cDoc = (Map<String, Object>) IndexUtils.deepCopyAndConvert(doc);
+                DocResp dr = DocResp.fromMap(cDoc);
+                ls.add(dr);
+            }
+        }
+        return ls;
+    }
+
+    @Override
+    public List<DocResp> add(String coll, List<DocReq> docs) throws IOException, InterruptedException {
+        String body = JsonUtil.toJsonString(docs);
+
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(url + coll + "/documents/insert"))
+                .header("Content-Type", "application/json")
+                .header("User-Agent", clientInfo)
+                .method("POST", HttpRequest.BodyPublishers.ofString(body))
+                .build();
+        HttpResponse<String> response = HttpClient.newHttpClient().send(request, HttpResponse.BodyHandlers.ofString());
+        //System.out.println(response.body());
+        Map<String, Object> ret = JsonUtil.fromJsonString(response.body());
+        Object o = ret.get("data");
+        List<DocResp> ls = toDocResps(o);
+        return ls;
+    }
+
+
+    @Override
+    public int delete(String coll, List<String> ids) throws IOException, InterruptedException {
+        String body = JsonUtil.toJsonString(ids);
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(url + coll + "/documents"))
+                .header("Content-Type", "application/json")
+                .header("User-Agent", clientInfo)
+                .method("DELETE", HttpRequest.BodyPublishers.ofString(body))
+                .build();
+        HttpResponse<String> response = HttpClient.newHttpClient().send(request, HttpResponse.BodyHandlers.ofString());
+        //System.out.println(response.body());
+        Map<String, Object> ret = JsonUtil.fromJsonString(response.body());
+        Object o = ret.get("data");
+        if (o != null && o instanceof Integer) {
+            return (Integer) o;
+        }
+        return 0;
+    }
+
+
+    @Override
+    public List<DocResp> getDocumentsByIds(String coll, List<String> ids) throws IOException, InterruptedException {
+        String body = JsonUtil.toJsonString(ids);
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(url + coll + "/documents/batch"))
+                .header("Content-Type", "application/json")
+                // .header("User-Agent", clientInfo)
+                .header("User-Agent", clientInfo)
+                .method("POST", HttpRequest.BodyPublishers.ofString(body))
+                .build();
+        HttpResponse<String> response = HttpClient.newHttpClient().send(request, HttpResponse.BodyHandlers.ofString());
+        String body1 = response.body();
+        //System.out.println(body1);
+        Map<String, Object> ret = JsonUtil.fromJsonString(body1);
+        Object o = ret.get("data");
+        List<DocResp> ls = toDocResps(o);
+        return ls;
+    }
+
+    @Override
+    public List<String> getAllIds(String coll) throws IOException, InterruptedException {
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(url + coll + "/documents/ids"))
+                .header("Content-Type", "application/json")
+                .header("User-Agent", clientInfo)
+                .method("GET", HttpRequest.BodyPublishers.noBody())
+                .build();
+        HttpResponse<String> response = HttpClient.newHttpClient().send(request, HttpResponse.BodyHandlers.ofString());
+
+        Map<String, Object> ret = JsonUtil.fromJsonString(response.body());
+        Object o = ret.get("data");
+        if (o != null && o instanceof List) {
+            return (List<String>) o;
+        }
+        return new ArrayList<>();
+    }
+
+    @Override
+    public boolean deleteAll(String coll) throws IOException, InterruptedException {
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(url + coll + "/clear"))
+                .header("User-Agent", clientInfo)
+                .method("DELETE", HttpRequest.BodyPublishers.noBody())
+                .build();
+        HttpResponse<String> response = HttpClient.newHttpClient().send(request, HttpResponse.BodyHandlers.ofString());
+        //System.out.println(response.body());
+        Map<String, Object> ret = JsonUtil.fromJsonString(response.body());
+        Object o = ret.get("data");
+        return IndexUtils.toBoolean(o);
+    }
+
+
+}

+ 52 - 0
server/src/main/java/com/giantan/data/index/HybridSearch2Controller.java

@@ -0,0 +1,52 @@
+package com.giantan.data.index;
+
+import com.giantan.ai.common.reponse.R;
+import com.giantan.data.index.dto.DocResp;
+import com.giantan.data.kvs.constant.KvConstants;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+
+
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+import org.springframework.web.client.RestTemplate;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Collections;
+import java.util.List;
+
+@RestController
+    @RequestMapping(KvConstants.API_PREFIX + "/collections/{coll}/se")
+    public class HybridSearch2Controller {
+        private static final org.slf4j.Logger log
+                = org.slf4j.LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+        @Autowired
+        IHybridSearch hybridSearch;
+
+        // /by-ids
+        @PostMapping("/documents/batch")
+        public ResponseEntity<R> getDocumentsByIds(@PathVariable String coll, @RequestBody List<String> ids) throws IOException, InterruptedException {
+            List<DocResp> docs = hybridSearch.getDocumentsByIds(coll, ids);
+            return ResponseEntity.ok(R.data(docs));
+        }
+
+        @GetMapping("/{id}")
+        public ResponseEntity<R> getDocuments(@PathVariable String coll, @PathVariable String id) throws IOException, InterruptedException {
+            List<DocResp> docs = hybridSearch.getDocumentsByIds(coll, List.of(id));
+            if (docs.isEmpty()) {
+                return ResponseEntity.ok(R.data(null));
+            }
+            return ResponseEntity.ok(R.data(docs.get(0)));
+        }
+
+        @GetMapping("/ids")
+        public ResponseEntity<R> getAllDocumentIds(@PathVariable String coll) throws IOException, InterruptedException {
+            List<String> ids = hybridSearch.getAllIds(coll);
+            return ResponseEntity.ok(R.data(ids));
+
+        }
+    }

+ 108 - 0
server/src/main/java/com/giantan/data/index/HybridSearchController.java

@@ -0,0 +1,108 @@
+package com.giantan.data.index;
+
+import com.giantan.data.kvs.constant.KvConstants;
+import jakarta.servlet.http.HttpServletRequest;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.http.HttpEntity;
+import org.springframework.http.HttpHeaders;
+import org.springframework.http.HttpMethod;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+import org.springframework.web.client.RestTemplate;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Collections;
+
+@RestController
+@RequestMapping(KvConstants.API_PREFIX + "/collections/{coll}/indexes")
+public class HybridSearchController {
+    private static final org.slf4j.Logger log
+            = org.slf4j.LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+//    @Autowired
+//    IHybridSearch hybridSearch;
+//
+//    // /by-ids
+//    @PostMapping("/documents/batch")
+//    public ResponseEntity<R> getDocumentsByIds(@PathVariable String coll, @RequestBody List<String> ids) throws IOException, InterruptedException {
+//        List<DocResp> docs = hybridSearch.getDocumentsByIds(coll, ids);
+//        return ResponseEntity.ok(R.data(docs));
+//    }
+//
+//    @GetMapping("/{id}")
+//    public ResponseEntity<R> getDocuments( @PathVariable String coll, @PathVariable String id) throws IOException, InterruptedException {
+//        List<DocResp> docs = hybridSearch.getDocumentsByIds(coll, List.of(id));
+//        if (docs.isEmpty()) {
+//            return ResponseEntity.ok(R.data(null));
+//        }
+//        return ResponseEntity.ok(R.data(docs.get(0)));
+//    }
+//
+//    @GetMapping("/ids")
+//    public ResponseEntity<R> getAllDocumentIds( @PathVariable String coll) {
+//        List<String> ids = hybridSearch.getAllIds(coll);
+//        return ResponseEntity.ok(R.data(ids));
+//
+//    }
+
+    @Value("${qas.url}")
+    String url = "http://120.78.4.46:7387/v1/collections/";
+
+    private final RestTemplate restTemplate = new RestTemplate();
+
+    //private static final String SRC_PREFIX = "http://127.0.0.1:18211/v1/md";
+    private String TARGET_PREFIX = "http://120.78.4.46:7387/v1";
+
+    @RequestMapping("/**")
+    public ResponseEntity<byte[]> proxyAll(HttpServletRequest request,
+                                           @RequestBody(required = false) byte[] body) {
+
+        String requestUri = request.getRequestURI(); // 原始 URI
+        String query = request.getQueryString();
+
+        // 替换前缀
+        //String targetPath = requestUri.replaceFirst("/v1/md", "");
+        int ii = requestUri.indexOf(KvConstants.API_PREFIX);
+
+        // 去掉 /indexes
+        String targetPath = requestUri.substring(ii+KvConstants.API_PREFIX.length());
+        targetPath = targetPath.replaceFirst("/indexes", "");
+
+        TARGET_PREFIX = url.replaceFirst("/collections","");
+        // 拼接目标 URL
+        String targetUrl = TARGET_PREFIX + targetPath + (query != null ? "?" + query : "");
+
+
+        // 构造请求头
+        HttpHeaders headers = new HttpHeaders();
+        Collections.list(request.getHeaderNames())
+                .forEach(name -> headers.add(name, request.getHeader(name)));
+
+        // 读取 HTTP 方法
+        HttpMethod method = HttpMethod.valueOf(request.getMethod());
+
+        // 打印请求日志
+        int requestSize = body != null ? body.length : 0;
+        log.info("[Qas] " + method + " " + targetUrl + " | request size: " + requestSize + " bytes");
+
+        // 构造请求实体
+        HttpEntity<byte[]> entity = new HttpEntity<>(body, headers);
+
+        // 转发请求
+        ResponseEntity<byte[]> response = restTemplate.exchange(
+                targetUrl,
+                method,
+                entity,
+                byte[].class
+        );
+
+        // 打印响应日志
+        int responseSize = response.getBody() != null ? response.getBody().length : 0;
+        log.info("[Qas] response status: " + response.getStatusCode() + " | response size: " + responseSize + " bytes");
+
+        // 返回原始响应
+        return ResponseEntity
+                .status(response.getStatusCode())
+                .headers(response.getHeaders())
+                .body(response.getBody());
+    }
+}

+ 19 - 0
server/src/main/java/com/giantan/data/index/IHybridSearch.java

@@ -0,0 +1,19 @@
+package com.giantan.data.index;
+
+import com.giantan.data.index.dto.DocReq;
+import com.giantan.data.index.dto.DocResp;
+
+import java.io.IOException;
+import java.util.List;
+
+public interface IHybridSearch {
+    List<DocResp> add(String coll, List<DocReq> docs) throws IOException, InterruptedException;
+
+    int delete(String coll, List<String> ids) throws IOException, InterruptedException;
+
+    List<DocResp> getDocumentsByIds(String coll, List<String> ids) throws IOException, InterruptedException;
+
+    List<String> getAllIds(String coll) throws IOException, InterruptedException;
+
+    boolean deleteAll(String coll) throws IOException, InterruptedException;
+}

+ 84 - 0
server/src/main/java/com/giantan/data/index/IndexUtils.java

@@ -0,0 +1,84 @@
+package com.giantan.data.index;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class IndexUtils {
+
+    public static Object toIntOrLong(Double d){
+        if (d > Integer.MAX_VALUE || d < Integer.MIN_VALUE){
+            return d.intValue();
+        }
+        return d.longValue();
+    }
+
+    @SuppressWarnings("unchecked")
+    public static void convertDoubleToInt(Object obj) {
+        if (obj instanceof Map<?, ?> map) {
+            Map<String, Object> typedMap = (Map<String, Object>) map;
+            typedMap.forEach((k, v) -> {
+                if (v instanceof Double d) {
+                    if (d % 1 == 0) {
+                        typedMap.put((String) k, toIntOrLong(d)); // 转换为 Long
+                    }
+                } else if (v instanceof Map<?, ?> || v instanceof List<?>) {
+                    convertDoubleToInt(v); // 递归处理
+                }
+            });
+        } else if (obj instanceof List<?> list) {
+            for (int i = 0; i < list.size(); i++) {
+                Object v = list.get(i);
+                if (v instanceof Double d) {
+                    if (d % 1 == 0) {
+                        List<Object> typedList = (List<Object>)list;
+                        typedList.set(i, toIntOrLong(d)); // 转换为 Long
+                    }
+                } else if (v instanceof Map<?, ?> || v instanceof List<?>) {
+                    convertDoubleToInt(v);
+                }
+            }
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public static Object deepCopyAndConvert(Object obj) {
+        if (obj instanceof Map<?, ?> map) {
+            Map<String, Object> newMap = new HashMap<>();
+            for (Map.Entry<?, ?> entry : map.entrySet()) {
+                String key = (String) entry.getKey();
+                Object value = entry.getValue();
+                newMap.put(key, deepCopyAndConvert(value));
+            }
+            return newMap;
+        } else if (obj instanceof List<?> list) {
+            List<Object> newList = new ArrayList<>();
+            for (Object item : list) {
+                newList.add(deepCopyAndConvert(item));
+            }
+            return newList;
+        } else if (obj instanceof Double d) {
+            if (d % 1 == 0) {
+                return toIntOrLong(d); // 转换为 Long or Integer
+            } else {
+                return d; // 保持原 Double
+            }
+        } else {
+            return obj; // 其他类型保持不变
+        }
+    }
+
+
+    public static boolean toBoolean(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (obj instanceof Boolean b) {
+            return b == Boolean.TRUE;
+        }else if (obj instanceof String s) {
+            return Boolean.parseBoolean(s);
+        }
+        return false;
+    }
+}

+ 1 - 1
server/src/main/java/com/giantan/data/mds/service/impl/DocReq.java → server/src/main/java/com/giantan/data/index/dto/DocReq.java

@@ -1,4 +1,4 @@
-package com.giantan.data.mds.service.impl;
+package com.giantan.data.index.dto;
 
 import lombok.Data;
 import java.util.List;

+ 56 - 0
server/src/main/java/com/giantan/data/index/dto/DocResp.java

@@ -0,0 +1,56 @@
+package com.giantan.data.index.dto;
+
+import lombok.Data;
+
+import java.util.List;
+import java.util.Map;
+
+@Data
+public class DocResp {
+    protected String id;
+    protected String text;
+    protected List<String> tags;
+    protected Map<String, Object> metadata;
+    //    protected long createTime;
+    protected String createTime;
+
+    public DocResp() {
+    }
+
+    public DocResp(String id, String text, List<String> tags, Map<String, Object> metadata) {
+        this.id = id;
+        this.text = text;
+        this.tags = tags;
+        this.metadata = metadata;
+    }
+
+    public DocResp(String id, String text, List<String> tags, Map<String, Object> metadata, String createTime) {
+        this.id = id;
+        this.text = text;
+        this.tags = tags;
+        this.metadata = metadata;
+        this.createTime = createTime;
+    }
+
+    public static DocResp fromMap(Map<String, Object> map) {
+        DocResp req = new DocResp();
+        req.id = map.get("id").toString();
+        req.text = (String) map.get("text");
+
+        Object o = map.get("tags");
+        if (o != null) {
+            req.tags = (List<String>) o;
+        }
+
+        o = map.get("metadata");
+        if (o != null) {
+            req.metadata = (Map<String, Object>) o;
+        }
+
+        o = map.get("createTime");
+        if (o != null) {
+            req.createTime = (String) o;
+        }
+        return req;
+    }
+}

+ 35 - 0
server/src/main/java/com/giantan/data/index/dto/DocSearchResp.java

@@ -0,0 +1,35 @@
+package com.giantan.data.index.dto;
+
+import lombok.Data;
+
+import java.util.List;
+import java.util.Map;
+
+@Data
+public class DocSearchResp extends DocResp {
+    protected double score;
+
+    public DocSearchResp() {
+    }
+
+    public DocSearchResp(String id, String text, List<String> tags, Map<String, Object> metadata, double score) {
+        super(id, text, tags, metadata);
+        this.score = score;
+    }
+
+    public DocSearchResp(String id, String text, List<String> tags, Map<String, Object> metadata, String createTime, double score) {
+        super(id, text, tags, metadata, createTime);
+        this.score = score;
+    }
+
+    @Override
+    public String toString() {
+        return "DocSearchResp{" +
+                "id='" + id + '\'' +
+                ", text='" + text + '\'' +
+                ", tags=" + tags +
+                ", metadata=" + metadata +
+                ", score=" + score +
+                '}';
+    }
+}

+ 0 - 10
server/src/main/java/com/giantan/data/kvs/kvstore/IDataSource.java

@@ -1,10 +0,0 @@
-package com.giantan.data.kvs.kvstore;
-
-import java.sql.Connection;
-import java.sql.SQLException;
-
-public interface IDataSource {
-
-   Connection getConnection() throws SQLException;
-
-}

+ 0 - 26
server/src/main/java/com/giantan/data/kvs/pg/AgtypeAwareDataSource.java

@@ -1,26 +0,0 @@
-package com.giantan.data.kvs.pg;
-
-import com.zaxxer.hikari.HikariConfig;
-import com.zaxxer.hikari.HikariDataSource;
-
-import java.sql.Connection;
-import java.sql.SQLException;
-
-public class AgtypeAwareDataSource extends HikariDataSource {
-
-    public AgtypeAwareDataSource(HikariConfig config) {
-        super(config);
-    }
-
-    @Override
-    public Connection getConnection() throws SQLException {
-        Connection conn = super.getConnection();
-        return AgtypeConnectionProvider.prepare(conn);
-    }
-
-    @Override
-    public Connection getConnection(String username, String password) throws SQLException {
-        Connection conn = super.getConnection(username, password);
-        return AgtypeConnectionProvider.prepare(conn);
-    }
-}

+ 0 - 31
server/src/main/java/com/giantan/data/kvs/pg/AgtypeConnectionProvider.java

@@ -1,31 +0,0 @@
-package com.giantan.data.kvs.pg;
-
-
-import org.apache.age.jdbc.base.Agtype;
-import org.postgresql.jdbc.PgConnection;
-
-import java.sql.Connection;
-import java.sql.SQLException;
-import java.util.Collections;
-import java.util.Set;
-import java.util.WeakHashMap;
-
-public class AgtypeConnectionProvider {
-
-    // 用于标记哪些连接已经注册过 agtype
-    private static final Set<PgConnection> registeredConnections =
-            Collections.newSetFromMap(new WeakHashMap<>());
-
-    public static Connection prepare(Connection conn) throws SQLException {
-        PgConnection pgConn = conn.unwrap(PgConnection.class);
-
-        synchronized (registeredConnections) {
-            if (!registeredConnections.contains(pgConn)) {
-                pgConn.addDataType("agtype", Agtype.class);
-                registeredConnections.add(pgConn);
-            }
-        }
-
-        return conn;
-    }
-}

+ 0 - 53
server/src/main/java/com/giantan/data/kvs/pg/CustomHikariDataSource.java

@@ -1,53 +0,0 @@
-package com.giantan.data.kvs.pg;
-
-import com.zaxxer.hikari.HikariConfig;
-import com.zaxxer.hikari.HikariDataSource;
-import org.apache.age.jdbc.base.Agtype;
-import org.postgresql.jdbc.PgConnection;
-
-import java.sql.Connection;
-import java.sql.SQLException;
-import java.util.Collections;
-import java.util.Set;
-import java.util.WeakHashMap;
-
-public class CustomHikariDataSource extends HikariDataSource {
-
-    private static final Set<PgConnection> registeredConnections =
-            Collections.newSetFromMap(new WeakHashMap<>());
-
-    public CustomHikariDataSource() {}
-    public CustomHikariDataSource(HikariConfig config) {
-        super(config);
-    }
-
-    @Override
-    public Connection getConnection() throws SQLException {
-        Connection con = super.getConnection();
-
-        // unwrap 获取底层 PgConnection 并处理
-        PgConnection pgConn = con.unwrap(PgConnection.class);
-
-        //PostgreSQL JDBC 驱动默认不持久化 ClientInfo 设置
-        //pgConn.setClientInfo(...) 实际上是一个 空操作(no-op),不会真的存储或返回设置的值。
-//        if (pgConn != null) {
-//            // 防止重复注册
-//            String marker = pgConn.getClientInfo("agtype-registered");
-//            System.out.println("marker=" + marker);
-//            if (!"true".equals(marker)) {
-//                pgConn.addDataType("agtype", Agtype.class);
-//                pgConn.setClientInfo("agtype-registered", "true");
-//            }
-//            //pgConn.addDataType("agtype", Agtype.class);
-//        }
-        synchronized (registeredConnections) {
-            // 利用 Connection 上的 hashCode 做注册标记(不影响池复用)
-            if (!registeredConnections.contains(pgConn)) {
-                //System.out.println("Registered connection: " + pgConn);
-                pgConn.addDataType("agtype", Agtype.class);
-                registeredConnections.add(pgConn);
-            }
-        }
-        return con;
-    }
-}

+ 0 - 105
server/src/main/java/com/giantan/data/kvs/pg/GSysTables.java

@@ -1,105 +0,0 @@
-package com.giantan.data.kvs.pg;
-
-import java.sql.*;
-import java.util.List;
-
-//// gkg_catalog
-public class GSysTables {
-
-    private static final org.slf4j.Logger log
-            = org.slf4j.LoggerFactory.getLogger(GSysTables.class);
-
-//    public static PgConnection connection;
-//    static final String DB_URL = "jdbc:postgresql://localhost:5432/gkg3";
-//    static final String USER = "postgres";
-//    static final String PASS = "123456";
-
-    public static Connection getConnection() throws SQLException {
-        //return DriverManager.getConnection(DB_URL, USER, PASS);
-        return PgDataSource.getConnection();
-    }
-
-    // 创建 schema
-    public static void createSchema(String schemaName) {
-        String sql = "CREATE SCHEMA IF NOT EXISTS " + schemaName;
-        try (Connection conn = getConnection();
-             PreparedStatement stmt = conn.prepareStatement(sql)) {
-            stmt.executeUpdate();
-            log.info("Schema " + schemaName + " created successfully.");
-        } catch (SQLException e) {
-            e.printStackTrace();
-        }
-    }
-
-    // 删除 schema
-    public static void dropSchema(String schemaName, boolean cascade) {
-        String sql = "DROP SCHEMA IF EXISTS " + schemaName;
-        if (cascade) {
-            sql += " CASCADE";  // 删除该 schema 中所有对象
-        }
-        try (Connection conn = getConnection();
-             PreparedStatement stmt = conn.prepareStatement(sql)) {
-            stmt.executeUpdate();
-            log.info("Schema " + schemaName + " dropped successfully.");
-        } catch (SQLException e) {
-            e.printStackTrace();
-        }
-    }
-
-    // 查询所有 schema
-    public static void listSchemas() {
-        String sql = "SELECT schema_name FROM information_schema.schemata";
-        try (Connection conn = getConnection();
-             PreparedStatement stmt = conn.prepareStatement(sql);
-             ResultSet rs = stmt.executeQuery()) {
-            while (rs.next()) {
-                String schemaName = rs.getString("schema_name");
-                log.info("Schema: " + schemaName);
-            }
-        } catch (SQLException e) {
-            e.printStackTrace();
-        }
-    }
-
-    // 重命名 schema
-    public static void renameSchema(String oldSchemaName, String newSchemaName) {
-        String sql = "ALTER SCHEMA " + oldSchemaName + " RENAME TO " + newSchemaName;
-        try (Connection conn = getConnection();
-             PreparedStatement stmt = conn.prepareStatement(sql)) {
-            stmt.executeUpdate();
-            log.info("Schema " + oldSchemaName + " renamed to " + newSchemaName + ".");
-        } catch (SQLException e) {
-            e.printStackTrace();
-        }
-    }
-
-    public static void executeSql(String sql) {
-        try (Connection conn = getConnection();
-             Statement stmt = conn.createStatement()) {
-            // 启用 hstore 扩展
-            //stmt.execute("CREATE EXTENSION IF NOT EXISTS hstore;");
-            stmt.execute(sql);
-            //System.out.println("Table 'kv_db.test1' created successfully (if not exists).");
-
-        } catch (SQLException e) {
-            e.printStackTrace();
-            throw new RuntimeException("Failed to create table", e);
-        }
-    }
-
-    public static void executeSql(List<String> sqls) {
-        try (Connection conn = getConnection();
-             Statement stmt = conn.createStatement()) {
-            // 启用 hstore 扩展
-            //stmt.execute("CREATE EXTENSION IF NOT EXISTS hstore;");
-            for (String sql : sqls) {
-                log.info("Executing sql: " + sql);
-                stmt.execute(sql);
-            }
-            log.info("Successfully execute " + sqls.size() + " statements.");
-        } catch (SQLException e) {
-            //log.error("Failed to create table", e);
-            throw new RuntimeException("Failed to execute sql ", e);
-        }
-    }
-}

+ 0 - 49
server/src/main/java/com/giantan/data/kvs/pg/PgDataSource.java

@@ -1,49 +0,0 @@
-package com.giantan.data.kvs.pg;
-
-import com.giantan.ai.common.config.DataSourceProperties;
-import com.zaxxer.hikari.HikariConfig;
-import com.zaxxer.hikari.HikariDataSource;
-import jakarta.annotation.PostConstruct;
-import lombok.RequiredArgsConstructor;
-import org.apache.age.jdbc.base.Agtype;
-import org.postgresql.jdbc.PgConnection;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.stereotype.Component;
-
-import java.sql.Connection;
-import java.sql.SQLException;
-
-//@Component
-@RequiredArgsConstructor
-public class PgDataSource {
-
-    //@Autowired
-    private DataSourceProperties properties;
-    private static HikariDataSource ds;
-
-    @PostConstruct
-    public void init() {
-        HikariConfig config = new HikariConfig();
-        config.setJdbcUrl(properties.getUrl());
-        config.setUsername(properties.getUsername());
-        config.setPassword(properties.getPassword());
-        //config.setConnectionInitSql("CREATE EXTENSION IF NOT EXISTS age;LOAD 'age';SET search_path = ag_catalog, \"$user\", public;");
-        config.setConnectionInitSql("LOAD 'age';SET search_path = ag_catalog, \"$user\", public;");
-
-        ds = new HikariDataSource(config);
-       }
-
-    public static Connection getAgeConnection() throws SQLException {
-        Connection con = ds.getConnection();
-        Connection realConnection = con.unwrap(PgConnection.class);
-        if (realConnection instanceof PgConnection) {
-            ((PgConnection)realConnection).addDataType("agtype", Agtype.class);
-        }
-        return con;
-    }
-
-    public static Connection getConnection() throws SQLException {
-        Connection con = ds.getConnection();
-        return con;
-    }
-}

+ 1 - 1
server/src/main/java/com/giantan/data/kvs/repository/GDynamicRepository.java

@@ -256,7 +256,7 @@ public class GDynamicRepository implements IGDynamicRepository {
     @Override
     public List<Integer> saveAll(String collId, List<GBaseKeyValue> kvs) throws Throwable {
         //String sql = String.format("INSERT INTO %s.%s (gid, name, attributes) VALUES (?, ?, ?::jsonb)", schema, tablePrefix);
-        String sql = String.format("INSERT INTO %s.%s (gid, name, altlabels, mark, description, tags, path, attributes) VALUES (?, ?, ?, ?, ?, ?, ?, ?::jsonb) ;", schema, tableName(collId));
+        String sql = String.format("INSERT INTO %s.%s (gid, name, altlabels, mark, description, tags, path, attributes) VALUES (?, ?, ?, ?, ?, ?, ?, ?::jsonb) ", schema, tableName(collId));
 
         List<GEntity> entities = GConverter.toEntity(kvs, idGenerator);
 

+ 0 - 12
server/src/main/java/com/giantan/data/kvs/repository/GRepository.java

@@ -146,18 +146,6 @@ public class GRepository implements IGkvRepository {
         }
     }
 
-//    private Array toSqlArray(List<String> ls) {
-//        if (ls == null) {
-//            return null;
-//        }
-//        try {
-//            // 使用 JdbcTemplate 连接的 DataSource 创建 Array
-//            return jdbc.getDataSource().getConnection().createArrayOf("TEXT", ls.toArray(new String[0]));
-//        } catch (SQLException e) {
-//            throw new RuntimeException("Error creating SQL Array", e);
-//        }
-//    }
-
     private Array toSqlArray(List<String> ls) {
         if (ls == null) {
             return null;

+ 3 - 3
server/src/main/java/com/giantan/data/mds/MdsApplication.java

@@ -1,7 +1,6 @@
 package com.giantan.data.mds;
 
 
-import com.giantan.data.mds.task.impl.BaseTaskHandler;
 import org.springframework.boot.SpringApplication;
 import org.springframework.boot.autoconfigure.SpringBootApplication;
 import org.springframework.context.annotation.ComponentScan;
@@ -9,14 +8,15 @@ import org.springframework.scheduling.annotation.EnableAsync;
 
 @SpringBootApplication
 @EnableAsync
-//@ComponentScan(basePackages = "com.giantan")
+@ComponentScan(basePackages = {"com.giantan.data.mds", "com.giantan.data.index"})
+//@ComponentScan("com.giantan.data.se")
 public class MdsApplication {
     private static final org.slf4j.Logger log
             = org.slf4j.LoggerFactory.getLogger(MdsApplication.class);
 
     public static void main(String[] args) {
         SpringApplication.run(MdsApplication.class, args);
-        log.info("Mds server started. Version 1.6.3");
+        log.info("Mds server started. Version 2.0.0");
     }
 
 }

+ 45 - 16
server/src/main/java/com/giantan/data/mds/bot/GChatClient.java

@@ -54,20 +54,32 @@ public class GChatClient {
     ChatClient openAiChatClient;
 
     //
+
+    private String getJsonString(String js) {
+        if (js == null) return null;
+        int i1 = js.indexOf("{");
+        int i2 = js.lastIndexOf("}");
+        if (i1 >= 0 && i2 >= 0 && i1 < i2) {
+            return js.substring(i1, i2 + 1);
+        }
+        return null;
+    }
+
     public String ask(String question) {
         //SystemPromptTemplate st = new SystemPromptTemplate();
         ChatClient.CallResponseSpec ret = deepSeekChatClient.prompt(question).call();
         String s = ret.content();
-        if (s != null){
-            boolean b = s.startsWith("```json");
-            if (b){
-                s = s.substring("```json".length());
-            }
-            b = s.endsWith("```");
-            if (b){
-                s = s.substring(0, s.length()-3);
-            }
-        }
+//        if (s != null){
+//            boolean b = s.startsWith("```json");
+//            if (b){
+//                s = s.substring("```json".length());
+//            }
+//            b = s.endsWith("```");
+//            if (b){
+//                s = s.substring(0, s.length()-3);
+//            }
+//        }
+        s = getJsonString(s);
         return s;
     }
 
@@ -84,15 +96,32 @@ public class GChatClient {
 //        return ret;
 //    }
 
-    public Map<String, Object> getKeywordsAndQuestions(String text,Map<String,String> metadata) throws JsonProcessingException {
+    public Map<String, Object> getKeywordsAndQuestions(String text, Map<String, String> metadata) {
         String prompt = ExtractPrompts.getKeywordAndQuestionPrompt(text, metadata);
         //String ret = askOpenai(prompt);
-        String ret = ask(prompt);
-        if (ret == null) {
-            return null;
+        for (int i = 0; i < 2; i++) {
+            String ret = ask(prompt);
+            if (ret == null) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                }
+            } else {
+                //System.out.println(ret);
+                try {
+                    Map<String, Object> map = JsonUtil.fromJsonString(ret);
+                    if (map != null) {
+                        return map;
+                    }
+                } catch (Exception e) {
+                    try {
+                        Thread.sleep(100);
+                    } catch (InterruptedException e1) {
+                    }
+                }
+            }
         }
-        //System.out.println(ret);
-        return JsonUtil.fromJsonString(ret);
+        return null;
     }
 
 }

+ 86 - 29
server/src/main/java/com/giantan/data/mds/chunk/DynamicChunkRepository.java

@@ -4,7 +4,6 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.giantan.ai.util.id.IdGenerator;
 import com.giantan.ai.util.id.UuidGenerator;
-import com.giantan.data.kvs.kvstore.GBaseKeyValue;
 import com.giantan.data.util.JdbcUtils;
 import org.springframework.jdbc.core.JdbcTemplate;
 
@@ -89,22 +88,80 @@ public class DynamicChunkRepository {
     }
 
     public List<MdChunk> findAll(String collId) {
-        String sql = String.format("SELECT * FROM %s ORDER BY chunk_index",tableName(collId));
+        String sql = String.format("SELECT * FROM %s ORDER BY chunk_index", tableName(collId));
         return jdbc.query(sql, this::mapRow);
     }
 
     public MdChunk findById(String collId, Long id) {
-        String sql = String.format("SELECT * FROM %s WHERE id = ?",tableName(collId));
+        String sql = String.format("SELECT * FROM %s WHERE id = ?", tableName(collId));
         List<MdChunk> rets = jdbc.query(sql, new Object[]{id}, this::mapRow);
         return rets.isEmpty() ? null : rets.get(0);
     }
 
+    public List<String> findUidsByMdId(String collId, int mdId) {
+        String sql1 = "SELECT section_path FROM %s WHERE md_id = ? ORDER BY section_path";
+        String sql = String.format(sql1, tableName(collId));
+        return jdbc.queryForList(sql, String.class, mdId);
+    }
+
+    public String findUidById(String collId, Long id) {
+        String sql1 = "SELECT section_path FROM %s WHERE id = ?";
+        String sql = String.format(sql1, tableName(collId));
+        List<String> rets = jdbc.queryForList(sql, String.class, id);
+        return rets.isEmpty() ? null : rets.get(0);
+    }
+
+    public List<String> findUidsByIds(String collId, List<Long> ids) {
+        String sql1 = """
+                SELECT section_path
+                FROM %s
+                WHERE id = ANY (?)
+                ORDER BY array_position(?, id)
+                """;
+        String sql = String.format(sql1, tableName(collId));
+        Long[] idArray = ids.toArray(new Long[0]);
+        return jdbc.queryForList(sql, String.class, idArray, idArray);
+    }
+
     public long deleteByMdId(String collId, Integer mdId) {
         //String sql = "DELETE FROM %s WHERE md_id = ?";
         String sql = String.format("DELETE FROM %s WHERE md_id = ?", tableName(collId));
         return jdbc.update(sql, mdId);
     }
 
+//    public int deleteByIds1(String collId, List<Long> ids) {
+//        String sql1 = "DELETE FROM %s WHERE id = ANY(?)";
+//        String sql = String.format(sql1, tableName(collId));
+//        Long[] idArray = ids.toArray(new Long[0]);
+//        return jdbc.update(sql, (Object) idArray);
+//    }
+
+
+    //
+    public int deleteByIds(String collId, List<Long> ids) {
+        if (ids == null || ids.isEmpty()) {
+            return 0;
+        }
+
+        String sql1 = "DELETE FROM %s WHERE id = ANY(?)";
+        String sql = String.format(sql1, tableName(collId));
+
+        final int batchSize = 1000;
+        int totalDeleted = 0;
+        if (ids.size() > batchSize) {
+            for (int i = 0; i < ids.size(); i += batchSize) {
+                List<Long> batch = ids.subList(i, Math.min(i + batchSize, ids.size()));
+                Long[] idArray = batch.toArray(new Long[0]);
+                totalDeleted += jdbc.update(sql, (Object) idArray);
+            }
+        } else {
+            Long[] idArray = ids.toArray(new Long[0]);
+            totalDeleted = jdbc.update(sql, (Object) idArray);
+        }
+        return totalDeleted;
+    }
+
+
 //    public Integer save(String collId, MdChunk chunk) {
 //        String sql1 = """
 //                    INSERT INTO %s (
@@ -137,12 +194,12 @@ public class DynamicChunkRepository {
 
     public Integer save(String collId, MdChunk chunk) {
         String sqlTemplate = """
-            INSERT INTO %s (
-                md_id, chunk_index, content, plain_text, embedding, chunk_type,
-                paragraph_start, paragraph_end, offset_start, offset_end, section_path,
-                keywords, metadata, extra
-            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?::jsonb, ?::jsonb)
-            """;
+                INSERT INTO %s (
+                    md_id, chunk_index, content, plain_text, embedding, chunk_type,
+                    paragraph_start, paragraph_end, offset_start, offset_end, section_path,
+                    keywords, metadata, extra
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?::jsonb, ?::jsonb)
+                """;
         String sql = String.format(sqlTemplate, tableName(collId));
 
         return jdbc.update(con -> {
@@ -189,7 +246,6 @@ public class DynamicChunkRepository {
     }
 
 
-
     public List<Integer> saveAll(String collId, List<MdChunk> chunks) {
         String sql1 = """
                     INSERT INTO %s (
@@ -222,7 +278,7 @@ public class DynamicChunkRepository {
                 ps.setObject(13, toJson(chunk.getMetadata()));
                 ps.setObject(14, toJson(chunk.getExtra()));
 
-                JdbcUtils.setStringArray(ps,12,chunk.getKeywords());
+                JdbcUtils.setStringArray(ps, 12, chunk.getKeywords());
             });
             for (int j = 0; j < batched.length; j++) {
                 for (int k = 0; k < batched[j].length; k++) {
@@ -245,21 +301,22 @@ public class DynamicChunkRepository {
 //        }
 //    }
 
-    public List<Map<String,Object>> getKeywordsByMdId(String collId, Integer mdId) {
+
+    public List<Map<String, Object>> getKeywordsByMdId(String collId, Integer mdId) {
         String sql1 = "SELECT id,keywords FROM %s WHERE md_id = ?";
         String sql = String.format(sql1, tableName(collId));
-        List<Map<String,Object>> keywordsList = jdbc.query(
+        List<Map<String, Object>> keywordsList = jdbc.query(
                 sql,
                 new Object[]{mdId},
                 (rs, rowNum) -> {
-                    Map<String,Object> map = new HashMap<>();
+                    Map<String, Object> map = new HashMap<>();
                     long id = rs.getLong("id");
                     map.put("id", id);
                     Array array = rs.getArray("keywords");
                     if (array != null) {
-                        map.put("keywords",(String[]) array.getArray());
-                    }else{
-                        map.put("keywords",null);
+                        map.put("keywords", (String[]) array.getArray());
+                    } else {
+                        map.put("keywords", null);
                     }
                     //return new String[0];
                     return map;
@@ -296,21 +353,21 @@ public class DynamicChunkRepository {
 
     public int updateKeywords(String collId, Long id, List<String> keywords, Map<String, Object> metadata) {
         String sqlTemplate = """
-            UPDATE %s
-            SET 
-                keywords = COALESCE(?, keywords),
-                metadata = CASE 
-                              WHEN ?::jsonb IS NOT NULL 
-                              THEN metadata || ?::jsonb
-                              ELSE metadata
-                           END
-            WHERE id = ?
-            """;
+                UPDATE %s
+                SET 
+                    keywords = COALESCE(?, keywords),
+                    metadata = CASE 
+                                  WHEN ?::jsonb IS NOT NULL 
+                                  THEN metadata || ?::jsonb
+                                  ELSE metadata
+                               END
+                WHERE id = ?
+                """;
 
         String sql = String.format(sqlTemplate, tableName(collId));
         String json = toJson(metadata); // 可能为 null
 
-        return jdbc.update(con -> {
+        int updated = jdbc.update(con -> {
             PreparedStatement ps = con.prepareStatement(sql);
 
             // keywords → text[]
@@ -333,6 +390,7 @@ public class DynamicChunkRepository {
             ps.setLong(4, id);
             return ps;
         });
+        return updated;
     }
 
 
@@ -387,5 +445,4 @@ public class DynamicChunkRepository {
         }
     }
 
-
 }

+ 44 - 30
server/src/main/java/com/giantan/data/mds/chunk/MdChunkRepository.java

@@ -6,10 +6,7 @@ import com.giantan.data.util.JdbcUtils;
 import org.springframework.jdbc.core.JdbcTemplate;
 import org.springframework.stereotype.Repository;
 
-import java.sql.Array;
-import java.sql.Connection;
-import java.sql.ResultSet;
-import java.sql.SQLException;
+import java.sql.*;
 import java.util.List;
 import java.util.Map;
 
@@ -32,33 +29,50 @@ public class MdChunkRepository {
             ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?::jsonb, ?::jsonb)
         """;
 
-        int updated = jdbcTemplate.update(sql,
-                chunk.getMdId(),
-                chunk.getChunkIndex(),
-                chunk.getContent(),
-                chunk.getPlainText(),
-                chunk.getEmbedding(),
-                chunk.getChunkType(),
-                chunk.getParagraphStart(),
-                chunk.getParagraphEnd(),
-                chunk.getOffsetStart(),
-                chunk.getOffsetEnd(),
-                chunk.getSectionPath(),
-                toSqlArray(chunk.getKeywords()),
-                toJson(chunk.getMetadata()),
-                toJson(chunk.getExtra())
-        );
-    }
+//        int updated = jdbcTemplate.update(sql,
+//                chunk.getMdId(),
+//                chunk.getChunkIndex(),
+//                chunk.getContent(),
+//                chunk.getPlainText(),
+//                chunk.getEmbedding(),
+//                chunk.getChunkType(),
+//                chunk.getParagraphStart(),
+//                chunk.getParagraphEnd(),
+//                chunk.getOffsetStart(),
+//                chunk.getOffsetEnd(),
+//                chunk.getSectionPath(),
+//                toSqlArray(chunk.getKeywords()),
+//                toJson(chunk.getMetadata()),
+//                toJson(chunk.getExtra())
+//        );
+
+
+        jdbcTemplate.update(connection -> {
+            PreparedStatement ps = connection.prepareStatement(sql);
+            ps.setInt(1, chunk.getMdId());
+            ps.setInt(2, chunk.getChunkIndex());
+            ps.setString(3, chunk.getContent());
+            ps.setString(4, chunk.getPlainText());
+            ps.setString(5, chunk.getEmbedding());
+            ps.setString(6, chunk.getChunkType());
+            ps.setObject(7, chunk.getParagraphStart());
+            ps.setObject(8, chunk.getParagraphEnd());
+            ps.setObject(9, chunk.getOffsetStart());
+            ps.setObject(10, chunk.getOffsetEnd());
+            ps.setString(11, chunk.getSectionPath());
+
+            // keywords 数组
+            ps.setArray(12, connection.createArrayOf("text",
+                    chunk.getKeywords() != null ? chunk.getKeywords().toArray() : new String[0]));
+
+            // metadata JSON
+            ps.setString(13, toJson(chunk.getMetadata()));
+
+            // extra JSON
+            ps.setString(14, toJson(chunk.getExtra()));
 
-    private Array toSqlArray(List<String> ls) {
-        if (ls == null) {
-            return null;
-        }
-        try (Connection conn = jdbcTemplate.getDataSource().getConnection()) {
-            return conn.createArrayOf("TEXT", ls.toArray(new String[0]));
-        } catch (SQLException e) {
-            throw new RuntimeException("Error creating SQL Array", e);
-        }
+            return ps;
+        });
     }
 
     public List<MdChunk> findByMdId(int mdId) {

+ 14 - 24
server/src/main/java/com/giantan/data/mds/config/TaskConfiguration.java

@@ -1,18 +1,13 @@
 package com.giantan.data.mds.config;
 
 import com.giantan.data.mds.bot.GChatClient;
-import com.giantan.data.mds.service.IHybridSearch;
+import com.giantan.data.index.IHybridSearch;
+import com.giantan.data.mds.repository.MdDynamicTaskRepository;
 import com.giantan.data.mds.service.IMdChunksService;
 import com.giantan.data.mds.service.IMdFilesService;
 import com.giantan.data.mds.service.IVectorization;
-import com.giantan.data.mds.task.impl.EmbeddingTaskHandler;
-import com.giantan.data.mds.task.impl.IndexTaskHandler;
-import com.giantan.data.tasks.TaskEventListener;
-import com.giantan.data.tasks.ITaskHandler;
-import com.giantan.data.tasks.TaskHandlerRegistry;
-import com.giantan.data.tasks.TaskManager;
-import com.giantan.data.mds.task.impl.KeywordsTaskHandler;
-import com.giantan.data.mds.task.impl.SliceTaskHandler;
+import com.giantan.data.mds.task.impl.*;
+import com.giantan.data.tasks.*;
 import com.google.common.eventbus.AsyncEventBus;
 import com.google.common.eventbus.EventBus;
 import org.springframework.beans.factory.annotation.Autowired;
@@ -46,6 +41,9 @@ class TaskConfiguration {
     @Autowired
     IHybridSearch hybridSearch;
 
+    @Autowired
+    IPersistentTaskService persistentTaskService;
+
     @Bean
     public Executor taskExecutor() {
         //return Executors.newFixedThreadPool(10);
@@ -64,7 +62,9 @@ class TaskConfiguration {
 
     @Bean
     public TaskManager taskManager(EventBus eventBus) {
-        return new TaskManager(eventBus);
+        TaskManager taskManager = new TaskManager(eventBus);
+        taskManager.setPersistentTaskService(persistentTaskService);
+        return taskManager;
     }
 
     @Bean
@@ -80,22 +80,12 @@ class TaskConfiguration {
     }
 
     @Bean
-    public SliceTaskHandler sliceTaskHandler() {
-        return new SliceTaskHandler(mdFilesService,mdChunksService);
-    }
-
-    @Bean
-    public KeywordsTaskHandler keywordsTaskHandler(){
-        return new KeywordsTaskHandler(mdChunksService,gChatClient);
-    }
-
-    @Bean
-    public EmbeddingTaskHandler embeddingTaskHandler(){
-        return new EmbeddingTaskHandler(mdChunksService,vectorizationService);
+    public MdsTaskHandler mdTaskHandler() {
+        return new MdsTaskHandler(mdFilesService,mdChunksService);
     }
 
     @Bean
-    public IndexTaskHandler indexTaskHandler(){
-        return new IndexTaskHandler(mdChunksService,hybridSearch);
+    public ChunksTaskHandler chunkTaskHandler() {
+        return new ChunksTaskHandler(mdChunksService,vectorizationService,hybridSearch,gChatClient);
     }
 }

+ 24 - 0
server/src/main/java/com/giantan/data/mds/controller/ChunkController.java

@@ -56,4 +56,28 @@ public class ChunkController {
         return R.data(Map.of("deleted",true));
     }
 
+    @DeleteMapping("/{id}")
+    public R deleteByIds(@PathVariable String coll, @PathVariable List<String> ids) {
+        long ret = 0;
+        try {
+            ret = mdChunksService.deleteByIds(coll, ids);
+        } catch (Throwable e) {
+            throw new RuntimeException(e);
+        }
+        return R.data(ret);
+    }
+
+    @DeleteMapping("/by-md/{mdId}")
+    public R deleteByMdId(@PathVariable String coll, @PathVariable Integer id) {
+        long ret = 0;
+        try {
+            ret = mdChunksService.deleteByMdId(coll, id);
+        } catch (Throwable e) {
+            throw new RuntimeException(e);
+        }
+        return R.data(ret);
+    }
+
+
+
 }

+ 9 - 2
server/src/main/java/com/giantan/data/mds/controller/DownloadController.java

@@ -5,11 +5,14 @@ import com.giantan.data.kvs.constant.KvConstants;
 import com.giantan.data.mds.service.IMdFilesService;
 import jakarta.servlet.http.HttpServletResponse;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.http.ContentDisposition;
+import org.springframework.http.HttpHeaders;
 import org.springframework.web.bind.annotation.*;
 import org.apache.commons.io.IOUtils;
 
 import java.io.InputStream;
 import java.net.URLConnection;
+import java.nio.charset.StandardCharsets;
 
 @RestController
 @RequestMapping(KvConstants.API_PREFIX + "/collections/{collId}")
@@ -33,8 +36,12 @@ public class DownloadController {
             if (contentType == null) contentType = "application/octet-stream";
             response.setContentType(contentType);
 
-            response.setHeader("Content-Disposition", "attachment; filename=\"" + filename + "\"");
-
+            //response.setHeader("Content-Disposition", "attachment; filename=\"" + filename + "\"");
+            ContentDisposition contentDisposition = ContentDisposition
+                    .attachment()
+                    .filename(filename, StandardCharsets.UTF_8)
+                    .build();
+            response.setHeader(HttpHeaders.CONTENT_DISPOSITION, contentDisposition.toString());
             // 把文件内容写入 response 输出流
             IOUtils.copy(stream, response.getOutputStream());
             response.flushBuffer();

+ 51 - 0
server/src/main/java/com/giantan/data/mds/controller/StatusController.java

@@ -0,0 +1,51 @@
+package com.giantan.data.mds.controller;
+
+import com.zaxxer.hikari.HikariDataSource;
+import jakarta.annotation.PostConstruct;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.lang.invoke.MethodHandles;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+@RestController
+@RequestMapping("/v1/server")
+public class StatusController {
+    private static final org.slf4j.Logger log
+            = org.slf4j.LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    private final HikariDataSource dataSource;
+
+    public StatusController(HikariDataSource dataSource) {
+        this.dataSource = dataSource;
+    }
+
+    /**
+     * 启动时预热 Hikari 连接池
+     */
+    @PostConstruct
+    public void init() {
+        try (var conn = dataSource.getConnection()) {
+            // 触发 HikariDataSource 初始化
+            log.info("HikariDataSource initialized at startup");
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to initialize HikariDataSource", e);
+        }
+    }
+
+    @GetMapping("/hikari/status")
+    public Map<String, Object> getStatus() {
+        var mxBean = dataSource.getHikariPoolMXBean();
+        if (mxBean == null) {
+            return Map.of("status", "HikariDataSource not initialized yet");
+        }
+        LinkedHashMap<String, Object> map = new LinkedHashMap<>();
+        map.put("total", mxBean.getTotalConnections());
+        map.put("active", mxBean.getActiveConnections());
+        map.put("idle", mxBean.getIdleConnections());
+        map.put("waiting", mxBean.getThreadsAwaitingConnection());
+        return map;
+    }
+}

+ 41 - 9
server/src/main/java/com/giantan/data/mds/controller/TaskController.java

@@ -1,16 +1,15 @@
 package com.giantan.data.mds.controller;
 
 import com.giantan.data.kvs.constant.KvConstants;
-import com.giantan.data.tasks.TaskContext;
+import com.giantan.data.tasks.*;
+import com.giantan.data.tasks.repository.TaskStatusHistory;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.format.annotation.DateTimeFormat;
 import org.springframework.web.bind.annotation.RequestMapping;
 import org.springframework.web.bind.annotation.RestController;
 import org.springframework.web.bind.annotation.*;
 
-import com.giantan.data.tasks.TaskManager;
-import com.giantan.data.tasks.TaskType;
-import com.giantan.data.tasks.TaskState;
-
+import java.time.LocalDateTime;
 import java.util.*;
 
 @RestController
@@ -74,7 +73,7 @@ public class TaskController {
     }
 
     @GetMapping("/{id}/status")
-    public Map<String, TaskState> status(@PathVariable String collId, @PathVariable String id) {
+    public Map<String, TaskOperationsStatus> status(@PathVariable String collId, @PathVariable String id) {
         TaskContext ctx = manager.getTask(collId, id);
         return ctx != null ? ctx.getObjectStatus() : Collections.emptyMap();
     }
@@ -94,9 +93,6 @@ public class TaskController {
     @GetMapping
     public Collection<TaskContext> listAllTasks(@PathVariable String collId) {
         return manager.allTasks(collId);
-//                .stream()
-//                .filter(t -> collId.equals(t.getCollection()))
-//                .collect(Collectors.toList());
     }
 
     @GetMapping("/status/{status}")
@@ -105,4 +101,40 @@ public class TaskController {
         return manager.findByStatus(collId, status);
     }
 
+    @GetMapping("/history")
+    public List<TaskStatusHistory> getTasks(@PathVariable String collId,
+                                            @RequestParam(value = "createdAtStart", required = false) String createdAtStart,
+                                            @RequestParam(value = "createdAtEnd", required = false) String createdAtEnd,
+                                            @RequestParam(value = "status", required = false) String status
+    ) {
+        LocalDateTime startTime = null;
+        if (createdAtStart != null) {
+            startTime = LocalDateTime.parse(createdAtStart);
+        }
+
+        LocalDateTime endTime = null;
+        if (createdAtEnd != null) {
+            endTime = LocalDateTime.parse(createdAtEnd);
+        }
+        return manager.getHistoryTasks(collId,startTime, endTime, status);
+    }
+
+    @DeleteMapping("/history/cleanup")
+    public int deleteHistory(@PathVariable String collId,
+                             @RequestParam(value = "createdAtStart", required = false) String createdAtStart,
+                             @RequestParam(value = "createdAtEnd", required = false) String createdAtEnd,
+                             @RequestParam(value = "status", required = false) String status
+    ) {
+        LocalDateTime startTime = null;
+        if (createdAtStart != null) {
+            startTime = LocalDateTime.parse(createdAtStart);
+        }
+
+        LocalDateTime endTime = null;
+        if (createdAtEnd != null) {
+            endTime = LocalDateTime.parse(createdAtEnd);
+        }
+        return manager.deleteHistoryTasks(collId,startTime, endTime, status);
+    }
+
 }

+ 0 - 12
server/src/main/java/com/giantan/data/mds/service/IHybridSearch.java

@@ -1,12 +0,0 @@
-package com.giantan.data.mds.service;
-
-import com.giantan.data.mds.service.impl.DocReq;
-
-import java.io.IOException;
-import java.util.List;
-
-public interface IHybridSearch {
-    List<DocReq> add(String coll, List<DocReq> docs) throws IOException, InterruptedException;
-
-    int delete(String coll, List<String> ids) throws IOException, InterruptedException;
-}

+ 5 - 2
server/src/main/java/com/giantan/data/mds/service/IMdChunksService.java

@@ -4,12 +4,13 @@ import com.giantan.data.kvs.kvstore.GBaseKeyValue;
 import com.giantan.data.mds.chunk.MdChunk;
 import org.cnnlp.data.document.GDocument;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 
 public interface IMdChunksService {
 
-    long deleteByMdId(String coll, Integer mdId);
+    long deleteByMdId(String coll, Integer mdId) throws IOException, InterruptedException;
 
     List<Integer> saveAll(String coll, List<MdChunk> chunks) throws Throwable;
 
@@ -25,7 +26,9 @@ public interface IMdChunksService {
 
     int setEmbedding(String coll, Long id, String es);
 
-    void deleteAll(String coll);
+    void deleteAll(String coll) throws IOException, InterruptedException;
+
+    long deleteByIds(String coll, List<String> ids) throws IOException, InterruptedException;
 
     //String getEmbedding(String coll, Long id);
 

+ 0 - 69
server/src/main/java/com/giantan/data/mds/service/impl/HybridSearch.java

@@ -1,69 +0,0 @@
-package com.giantan.data.mds.service.impl;
-
-
-import com.giantan.ai.common.util.JsonUtil;
-import com.giantan.data.mds.service.IHybridSearch;
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.stereotype.Service;
-
-import java.io.IOException;
-import java.net.URI;
-import java.net.http.HttpClient;
-import java.net.http.HttpRequest;
-import java.net.http.HttpResponse;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-@Service
-public class HybridSearch implements IHybridSearch {
-
-    @Value("${qas.url}")
-    String url = "http://120.78.4.46:7387/v1/collections/";
-    //String url = "http://120.78.4.46:7387/v1/embeddings/embed";
-
-    @Override
-    public List<DocReq> add(String coll, List<DocReq> docs) throws IOException, InterruptedException {
-        String body = JsonUtil.toJsonString(docs);
-
-        HttpRequest request = HttpRequest.newBuilder()
-                .uri(URI.create(url+coll+"/documents/insert"))
-                .header("Content-Type", "application/json")
-                .header("User-Agent", "myClient/11.0.2")
-                .method("POST", HttpRequest.BodyPublishers.ofString(body))
-                .build();
-        HttpResponse<String> response = HttpClient.newHttpClient().send(request, HttpResponse.BodyHandlers.ofString());
-        //System.out.println(response.body());
-        Map<String, Object> ret = JsonUtil.fromJsonString(response.body());
-        Object o = ret.get("data");
-        List<DocReq> ls = new ArrayList<>();
-        if (o != null && o instanceof List) {
-            List<Map<String, Object>> docList = (List<Map<String, Object>>) o;
-            for (Map<String, Object> doc : docList) {
-                DocReq dr = DocReq.fromMap(doc);
-                ls.add(dr);
-            }
-        }
-        return ls;
-    }
-
-
-    @Override
-    public int delete(String coll, List<String> ids) throws IOException, InterruptedException {
-        String body = JsonUtil.toJsonString(ids);
-        HttpRequest request = HttpRequest.newBuilder()
-                .uri(URI.create(url+coll+"/documents"))
-                .header("Content-Type", "application/json")
-                .header("User-Agent", "myClient/11.0.2")
-                .method("DELETE", HttpRequest.BodyPublishers.ofString(body))
-                .build();
-        HttpResponse<String> response = HttpClient.newHttpClient().send(request, HttpResponse.BodyHandlers.ofString());
-        //System.out.println(response.body());
-        Map<String, Object> ret = JsonUtil.fromJsonString(response.body());
-        Object o = ret.get("data");
-        if (o != null && o instanceof Integer) {
-            return (Integer) o;
-        }
-        return 0;
-    }
-}

+ 47 - 2
server/src/main/java/com/giantan/data/mds/service/impl/MdChunksService.java

@@ -1,5 +1,7 @@
 package com.giantan.data.mds.service.impl;
 
+import com.giantan.data.index.HybridSearch;
+import com.giantan.data.index.IHybridSearch;
 import com.giantan.data.mds.chunk.MdChunk;
 import com.giantan.data.mds.repository.MdDynamicChunkRepository;
 import com.giantan.data.mds.service.IMdChunksService;
@@ -7,6 +9,8 @@ import jakarta.annotation.PostConstruct;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 
+import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
@@ -19,6 +23,11 @@ public class MdChunksService implements IMdChunksService {
     @Autowired
     MdDynamicChunkRepository mdDynamicChunkRepository;
 
+    @Autowired
+    IHybridSearch hybridSearch;
+
+    boolean isIndexDeleted = true;
+
     public MdChunksService() {
 
     }
@@ -28,8 +37,14 @@ public class MdChunksService implements IMdChunksService {
     }
 
     @Override
-    public long deleteByMdId(String coll, Integer mdId) {
+    public long deleteByMdId(String coll, Integer mdId) throws IOException, InterruptedException {
         int collId = mdCollectionsService.getCollectionId(coll);
+        if (isIndexDeleted) {
+            List<String> uids = mdDynamicChunkRepository.findUidsByMdId(Integer.toString(collId), mdId);
+            if (uids != null && !uids.isEmpty()) {
+                hybridSearch.delete(coll, uids);
+            }
+        }
         long r = mdDynamicChunkRepository.deleteByMdId(Integer.toString(collId), mdId);
         return r;
     }
@@ -79,11 +94,41 @@ public class MdChunksService implements IMdChunksService {
     }
 
     @Override
-    public void deleteAll(String coll) {
+    public void deleteAll(String coll) throws IOException, InterruptedException {
         int collId = mdCollectionsService.getCollectionId(coll);
+        if (isIndexDeleted) {
+            hybridSearch.deleteAll(coll);
+        }
         mdDynamicChunkRepository.deleteAll(Integer.toString(collId));
     }
 
+    @Override
+    public long deleteByIds(String coll, List<String> ids) throws IOException, InterruptedException {
+        int collId = mdCollectionsService.getCollectionId(coll);
+        List<String> uids = null;
+        if (isIndexDeleted) {
+            uids = mdDynamicChunkRepository.findUidsByIds(Integer.toString(collId), toListLong(ids));
+            if (uids != null && !uids.isEmpty()) {
+                hybridSearch.delete(coll, uids);
+            }
+        }
+        int r = 0;
+        if (uids != null && !uids.isEmpty()) {
+            r = mdDynamicChunkRepository.deleteByIds(Integer.toString(collId),toListLong(ids));
+        }
+        return r;
+    }
+
+    private List<Long> toListLong(List<String> ss) {
+        if (ss == null || ss.isEmpty()) {
+            return null;
+        }
+        List<Long> ls = new ArrayList<>();
+        for (String s : ss) {
+            ls.add(Long.parseLong(s));
+        }
+        return ls;
+    }
 //    private MdChunk mapRow(GDocument rs) throws SQLException {
 //        MdChunk c = new MdChunk();
 //        //c.setId(rs.getLong("id"));

+ 2 - 2
server/src/main/java/com/giantan/data/mds/service/impl/MdDocsService.java

@@ -41,8 +41,8 @@ public class MdDocsService implements IMdDocsService {
     public static final String RESP_MATCH = "match";
     public static final String RESP_HEADINGS = "headings";
 
-    @Autowired
-    private TaskStatusManager taskStatusManager;
+    //@Autowired
+    //private TaskStatusManager taskStatusManager;
 
     @Autowired
     S3GkbService gkbStorer;

+ 57 - 0
server/src/main/java/com/giantan/data/mds/task/PersistentTaskService.java

@@ -0,0 +1,57 @@
+package com.giantan.data.mds.task;
+
+import com.giantan.data.mds.repository.MdDynamicTaskRepository;
+import com.giantan.data.mds.service.impl.MdCollectionsService;
+import com.giantan.data.tasks.IPersistentTaskService;
+import com.giantan.data.tasks.TaskContext;
+import com.giantan.data.tasks.repository.TaskConverter;
+import com.giantan.data.tasks.repository.TaskStatusHistory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import java.time.LocalDateTime;
+import java.util.List;
+
+@Service
+public class PersistentTaskService implements IPersistentTaskService {
+
+    @Autowired
+    MdCollectionsService mdCollectionsService;
+
+    @Autowired
+    MdDynamicTaskRepository mdDynamicTaskRepository;
+
+    public PersistentTaskService() {
+    }
+
+    @Override
+    public int save(String coll, TaskContext task) {
+        int collId = mdCollectionsService.getCollectionId(coll);
+        TaskStatusHistory history = TaskConverter.toHistory(task);
+        int saved = mdDynamicTaskRepository.save(Integer.toString(collId), history);
+        return saved;
+    }
+
+    @Override
+    public int updateTaskStatus(String coll, TaskContext task) {
+        int collId = mdCollectionsService.getCollectionId(coll);
+        TaskStatusHistory history = TaskConverter.toHistory(task);
+        int saved = mdDynamicTaskRepository.updateTaskStatus(Integer.toString(collId), history);
+        return saved;
+    }
+
+    @Override
+    public List<TaskStatusHistory> getHistoryTasks(String coll, LocalDateTime startTime, LocalDateTime endTime, String status) {
+        int collId = mdCollectionsService.getCollectionId(coll);
+        List<TaskStatusHistory> rs = mdDynamicTaskRepository.getHistoryTasks(Integer.toString(collId),startTime,endTime,status);
+        return rs;
+    }
+
+    @Override
+    public int deleteHistoryTasks(String coll, LocalDateTime startTime, LocalDateTime endTime, String status) {
+        int collId = mdCollectionsService.getCollectionId(coll);
+        int rs = mdDynamicTaskRepository.deleteHistoryTasks(Integer.toString(collId),startTime,endTime,status);
+        return rs;
+    }
+
+}

+ 14 - 13
server/src/main/java/com/giantan/data/mds/task/impl/BaseTaskHandler.java

@@ -1,9 +1,6 @@
 package com.giantan.data.mds.task.impl;
 
-import com.giantan.data.tasks.ITaskHandler;
-import com.giantan.data.tasks.TaskContext;
-import com.giantan.data.tasks.TaskState;
-import com.giantan.data.tasks.TaskType;
+import com.giantan.data.tasks.*;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -26,15 +23,19 @@ public abstract class BaseTaskHandler implements ITaskHandler {
             = org.slf4j.LoggerFactory.getLogger(BaseTaskHandler.class);
 
     protected void preProcess(final TaskContext taskContext) {
-        List<Object> objects = null;
+        List<Object> objects = new ArrayList<>();
+        taskContext.setObjectIds(objects);
         Map<String, Object> payload = taskContext.getParams();
         if (payload.containsKey(MD_IDS)) {
-            objects = (List<Object>) payload.remove(MD_IDS);
+            Object o =  payload.get(MD_IDS);
+            if (o != null && o instanceof List) {
+                objects = (List<Object>) o;
+            }
             taskContext.setObjectIds(objects);
         } else if (payload.containsKey(MD_START_ID) && payload.containsKey(MD_END_ID)) {
-            int from = (int) payload.remove(MD_START_ID);
-            int to = (int) payload.remove(MD_END_ID);
-            objects = new ArrayList<>();
+            int from = (int) payload.get(MD_START_ID);
+            int to = (int) payload.get(MD_END_ID);
+            //objects = new ArrayList<>();
             //for (int i = from; i <= to; i++) {
             for (int i = from; i < to; i++) {
                 objects.add(i);
@@ -78,7 +79,7 @@ public abstract class BaseTaskHandler implements ITaskHandler {
             //long endTime = System.currentTimeMillis()-startTime;
             //log.info(getType() + " task: {} finished. Used time = {}", context.getTaskId(), endTime);
             context.setStatus(TaskState.SUCCESS);
-            log.info(getType() + " task: {} finished. No mdIds provided.", context.getTaskId());
+            log.info(getType() + " task: {} finished. No mdIds/chunkIds provided.", context.getTaskId());
             return;
         }
         log.info(getType() + " task: {} started", context.getTaskId());
@@ -91,15 +92,15 @@ public abstract class BaseTaskHandler implements ITaskHandler {
             }
             try {
                 doing(context, objectId);
-                context.logSuccess(objectId.toString());
+                //context.logSuccess(objectId.toString());
             } catch (Exception e) {
-                context.logFailure(objectId.toString(), e.getMessage());
+                //context.logFailure(objectId.toString(), e.getMessage());
                 log.error(getType() + " task: {} (objectId: {}) error: {}", context.getTaskId(), objectId, e.getMessage());
             }
         }
         if (!isCanceled) {
 
-            Map<String, TaskState> objectStatus = context.getObjectStatus();
+            Map<String, TaskOperationsStatus> objectStatus = context.getObjectStatus();
             long endTime = System.currentTimeMillis()-startTime;
             if (objectStatus.containsValue(TaskState.FAILED)){
                 context.setStatus(TaskState.FAILED);

+ 281 - 5
server/src/main/java/com/giantan/data/mds/task/impl/ChunksTaskHandler.java

@@ -1,20 +1,43 @@
 package com.giantan.data.mds.task.impl;
 
-import com.giantan.data.mds.service.IHybridSearch;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.giantan.ai.common.util.JsonUtil;
+import com.giantan.data.mds.bot.GChatClient;
+import com.giantan.data.mds.chunk.MdChunk;
+import com.giantan.data.index.IHybridSearch;
 import com.giantan.data.mds.service.IMdChunksService;
 import com.giantan.data.mds.service.IVectorization;
+import com.giantan.data.index.dto.DocReq;
+import com.giantan.data.index.dto.DocResp;
 import com.giantan.data.tasks.TaskContext;
 import com.giantan.data.tasks.TaskType;
 
+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 public class ChunksTaskHandler extends BaseTaskHandler {
 
+    private static final org.slf4j.Logger log
+            = org.slf4j.LoggerFactory.getLogger(ChunksTaskHandler.class);
+
     IMdChunksService mdChunksService;
     IVectorization vectorizationService;
     IHybridSearch hybridSearch;
+    GChatClient gChatClient;
+
+    public ChunksTaskHandler(IMdChunksService mdChunksService,
+                             IVectorization vectorizationService,
+                             IHybridSearch hybridSearch,
+                             GChatClient gChatClient
+    ) {
+        this.mdChunksService = mdChunksService;
+        this.vectorizationService = vectorizationService;
+        this.hybridSearch = hybridSearch;
+        this.gChatClient = gChatClient;
+    }
 
     @Override
     public TaskType getType() {
@@ -23,22 +46,27 @@ public class ChunksTaskHandler extends BaseTaskHandler {
 
     protected void preProcess(final TaskContext context) {
         List<Object> objects = new ArrayList<>();
+        context.setObjectIds(objects);
+
         String coll = context.getCollection();
         Map<String, Object> payload = context.getParams();
 
         if (payload.containsKey(CHUNK_IDS)) {
-            objects = (List<Object>) payload.get(CHUNK_IDS);
+            Object o = payload.get(CHUNK_IDS);
             //context.setObjectIds(objects);
+            if (o != null && o instanceof List) {
+                objects = (List<Object>) o;
+            }
         } else if (payload.containsKey(CHUNK_START_ID) && payload.containsKey(CHUNK_END_ID)) {
             int from = (int) payload.get(CHUNK_START_ID);
             int to = (int) payload.get(CHUNK_END_ID);
-            objects = new ArrayList<>();
+            //objects = new ArrayList<>();
             //for (int i = from; i <= to; i++) {
             for (int i = from; i < to; i++) {
                 objects.add(i);
             }
             //context.setObjectIds(objects);
-        }else if (payload.containsKey(MD_IDS)) {
+        } else if (payload.containsKey(MD_IDS)) {
             List<Object> mdIds = (List<Object>) payload.get(MD_IDS);
             for (Object mdId : mdIds) {
                 List<Map<String, Object>> rets = mdChunksService.getKeywordsByMdId(coll, toInt(mdId));
@@ -49,7 +77,7 @@ public class ChunksTaskHandler extends BaseTaskHandler {
                     }
                 }
             }
-        }else if (payload.containsKey(MD_START_ID) && payload.containsKey(MD_END_ID)) {
+        } else if (payload.containsKey(MD_START_ID) && payload.containsKey(MD_END_ID)) {
             int from = (int) payload.get(MD_START_ID);
             int to = (int) payload.get(MD_END_ID);
             for (int i = from; i < to; i++) {
@@ -68,6 +96,254 @@ public class ChunksTaskHandler extends BaseTaskHandler {
     @Override
     public void doing(TaskContext context, Object objectId) {
         List<String> operations = context.getOperations();
+        for (String operation : operations) {
+            doing(context, objectId, operation);
+        }
+    }
+
+    public void doing(TaskContext context, Object objectId, String operation) {
+        if (operation.equalsIgnoreCase("keywords")) {
+            extractKeywords(context, objectId, "keywords");
+        } else if (operation.equalsIgnoreCase("vectorization")) {
+            vectorization(context, objectId, "vectorization");
+        } else if (operation.equalsIgnoreCase("indexCreate")) {
+            indexCreate(context, objectId, "indexCreate");
+        } else if (operation.equalsIgnoreCase("indexDelete")) {
+            indexDelete(context, objectId, "indexDelete");
+        }
+    }
+
+    private void indexDelete(TaskContext context, Object objectId, String operation) {
+        try {
+            String coll = context.getCollection();
+            Long chunkId = toLong(objectId);
+            MdChunk chunk = mdChunksService.findById(coll, chunkId);
+            String uid = chunk.getSectionPath();
+            hybridSearch.delete(coll, List.of(uid));
+            context.logSuccess(objectId.toString(), operation);
+        } catch (Exception e) {
+            context.logFailure(objectId.toString(), operation, e.getMessage());
+            e.printStackTrace();
+            throw new RuntimeException(e);
+        }
+    }
+
+    private void indexCreate(TaskContext context, Object objectId, String operation) {
+        try {
+            String coll = context.getCollection();
+            Map<String, Object> params = context.getParams();
+            Long chunkId = toLong(objectId);
+            MdChunk chunk = mdChunksService.findById(coll, chunkId);
+
+            String plainText = chunk.getPlainText();
+            Object o = params.get("embeddingMetadata");
+
+            Map<String, Object> extra = null;
+            if (o != null && o instanceof Map) {
+                Map<String, String> mapping = (Map<String, String>) o;
+                //Map<String, String> kvs = getChunkMetadata(mapping, chunkMetadata);
+                extra = buildExtra(mapping, chunk);
+            }
 
+            Boolean skipChatGptIfExists = (Boolean) params.getOrDefault("skipChatGptIfExists", true);
+
+            callIndex(coll, chunkId, chunk, extra);
+            context.logSuccess(objectId.toString(), operation);
+        } catch (Exception e) {
+            context.logFailure(objectId.toString(), operation, e.getMessage());
+            e.printStackTrace();
+            throw new RuntimeException(e);
+        }
+    }
+
+    private void vectorization(TaskContext context, Object objectId, String operation) {
+        try {
+            String coll = context.getCollection();
+            Map<String, Object> params = context.getParams();
+            Long chunkId = toLong(objectId);
+            MdChunk chunk = mdChunksService.findById(coll, chunkId);
+            String plainText = chunk.getPlainText();
+
+            Object o = params.get("embeddingMetadata");
+
+            Map<String, Object> extra = null;
+            if (o != null && o instanceof Map) {
+                Map<String, String> mapping = (Map<String, String>) o;
+                //Map<String, String> kvs = getChunkMetadata(mapping, chunkMetadata);
+                extra = buildExtra(mapping, chunk);
+            }
+
+            //System.out.println("kvs=" + kvs);
+
+            Boolean skipChatGptIfExists = (Boolean) params.getOrDefault("skipChatGptIfExists", true);
+            //System.out.println("skipChatGptIfExists=" + skipChatGptIfExists);
+
+            String embedding = chunk.getEmbedding();
+            if (skipChatGptIfExists) {
+                if (embedding != null) {
+
+                } else {
+                    // 调用LLM
+                    callEmbedding(coll, chunkId, plainText, extra);
+                }
+            } else {
+                // 调用LLM
+                callEmbedding(coll, chunkId, plainText, extra);
+            }
+            context.logSuccess(objectId.toString(), operation);
+        } catch (Exception e) {
+            context.logFailure(objectId.toString(), operation, e.getMessage());
+            e.printStackTrace();
+            throw new RuntimeException(e);
+        }
     }
+
+    private void extractKeywords(TaskContext context, Object objectId, String operation) {
+        try {
+            String coll = context.getCollection();
+            Map<String, Object> params = context.getParams();
+            Long chunkId = toLong(objectId);
+            MdChunk chunk = mdChunksService.findById(coll, chunkId);
+            //System.out.println("chunk=" + chunk);
+            Map<String, Object> metadata = chunk.getMetadata();
+            //System.out.println("metadata=" + metadata);
+            String plainText = chunk.getPlainText();
+            //System.out.println("plainText=" + plainText);
+
+            Map<String, String> kvs = new HashMap<>();
+
+            Object o = params.get("chunkMetadata");
+            if (o != null && o instanceof Map) {
+                Map<String, String> mapping = (Map<String, String>) o;
+                kvs = getChunkMetadata(mapping, metadata);
+            }
+
+            Boolean skipChatGptIfExists = (Boolean) params.getOrDefault("skipChatGptIfExists", true);
+            //System.out.println("skipChatGptIfExists=" + skipChatGptIfExists);
+            List<String> keywords = chunk.getKeywords();
+            if (skipChatGptIfExists) {
+                if (keywords != null && keywords.size() > 0) {
+
+                } else {
+                    // 调用LLM
+                    callLLM(coll, chunkId, plainText, kvs);
+                }
+            } else {
+                // 调用LLM
+                callLLM(coll, chunkId, plainText, kvs);
+            }
+            context.logSuccess(objectId.toString(), operation);
+        } catch (Exception e) {
+            context.logFailure(objectId.toString(), operation, e.getMessage());
+            e.printStackTrace();
+            throw new RuntimeException(e);
+        }
+    }
+
+    private void callLLM(String coll, Long chunkId, String plainText, Map<String, String> kvs) throws JsonProcessingException {
+        //long t = System.currentTimeMillis();
+        Map<String, Object> kws = gChatClient.getKeywordsAndQuestions(plainText, kvs);
+        //t = System.currentTimeMillis() - t;
+        //System.out.println("kws=" + kws);
+        //System.out.println("used time = " + t);
+        if (kws != null && kws.size() > 0) {
+            List<String> keywords = null;
+            List<String> questions = null;
+            Object o = kws.get("keywords");
+            if (o != null && o instanceof List) {
+                keywords = (List<String>) o;
+            }
+            o = kws.get("questions");
+            if (o != null && o instanceof List) {
+                questions = (List<String>) o;
+            }
+
+            if (keywords != null && questions != null) {
+                int i = mdChunksService.updateKeywordsOrMetadata(coll, chunkId, keywords, Map.of("llm_questions", questions));
+                //System.out.println("i=" + i);
+            }
+        }
+    }
+
+
+    private Map<String, String> getChunkMetadata(Map<String, String> mapping, Map<String, Object> metadata) {
+        Map<String, String> m2 = new HashMap<String, String>();
+        mapping.forEach((k, v) -> {
+            Object o = metadata.get(k);
+            if (o != null) {
+                if (o instanceof String) {
+                    m2.put(v, (String) o);
+                } else if (o instanceof List) {
+                    List l = (List) o;
+                    if (l.size() > 0) {
+                        m2.put(v, String.join(",", l));
+                    }
+                }
+            }
+        });
+        return m2;
+    }
+
+    private int callEmbedding(String coll, Long chunkId, String plainText, Map<String, Object> extra) throws IOException, InterruptedException {
+        StringBuilder sb = new StringBuilder();
+        extra.forEach((k, v) -> {
+            sb.append(k).append(": ").append(v).append("\n");
+        });
+
+        sb.append(plainText);
+        float[] fs = vectorizationService.singleVectorization(sb.toString());
+        if (fs != null) {
+            String js = JsonUtil.toJsonString(fs);
+            int r = mdChunksService.setEmbedding(coll, chunkId, js);
+            return r;
+        }
+        return 0;
+    }
+
+    private void callIndex(String coll, Long chunkId, MdChunk chunk, Map<String, Object> extra) throws IOException, InterruptedException {
+
+        Map<String, Object> metadata = chunk.getMetadata();
+        metadata.put("chunkId", chunk.getChunkIndex());
+        metadata.put("mdId", chunk.getMdId());
+        metadata.put("iid", chunk.getId());
+        metadata.put("offsetStart", chunk.getOffsetStart());
+        metadata.put("offsetEnd", chunk.getOffsetEnd());
+        metadata.put("content", chunk.getContent());
+
+        DocReq doc = new DocReq();
+        doc.setId(chunk.getSectionPath());
+        doc.setText(chunk.getPlainText());
+        doc.setEmbedding(chunk.getEmbedding());
+        doc.setMetadata(metadata);
+
+        List<DocResp> ret = hybridSearch.add(coll, List.of(doc));
+        //System.out.println(ret);
+    }
+
+    private Map<String, Object> buildExtra(Map<String, String> mapping, MdChunk chunk) {
+        Map<String, Object> extra = new HashMap<String, Object>();
+        Map<String, Object> metadata = chunk.getMetadata();
+        mapping.forEach((k, v) -> {
+            if (k.equals("keywords")) {
+                List<String> keywords = chunk.getKeywords();
+                if (keywords != null && keywords.size() > 0) {
+                    extra.put(v, String.join(",", keywords));
+                }
+            } else {
+                Object o = metadata.get(k);
+                if (o != null) {
+                    if (o instanceof String) {
+                        extra.put(v, (String) o);
+                    } else if (o instanceof List) {
+                        List l = (List) o;
+                        if (l.size() > 0) {
+                            extra.put(v, String.join("\n", l));
+                        }
+                    }
+                }
+            }
+        });
+        return extra;
+    }
+
 }

+ 0 - 13
server/src/main/java/com/giantan/data/mds/task/impl/KeywordsTaskHandler.java

@@ -24,19 +24,6 @@ public class KeywordsTaskHandler extends BaseTaskHandler {
         this.mdChunksService = mdChunksService;
         this.gChatClient = gChatClient;
     }
-//    @Override
-//    public void handle(TaskContext context) {
-//        for (Object objectId : context.getObjectIds()) {
-//            if (context.isCancelled()) break;
-//            try {
-//                System.out.println("Extract keywords: " + objectId);
-//                Thread.sleep(300); // simulate
-//                context.logSuccess(objectId.toString());
-//            } catch (Exception e) {
-//                context.logFailure(objectId.toString(), e.getMessage());
-//            }
-//        }
-//    }
 
     // mdIds  chunkIds
     protected void preProcess(final TaskContext context) {

+ 166 - 0
server/src/main/java/com/giantan/data/mds/task/impl/MdsTaskHandler.java

@@ -0,0 +1,166 @@
+package com.giantan.data.mds.task.impl;
+
+import com.giantan.ai.common.util.JsonUtil;
+import com.giantan.data.kvs.kvstore.GBaseKeyValue;
+import com.giantan.data.mds.chunk.MdChunk;
+import com.giantan.data.mds.service.IMdChunksService;
+import com.giantan.data.mds.service.IMdFilesService;
+import com.giantan.data.tasks.TaskContext;
+import com.giantan.data.tasks.TaskType;
+import org.cnnlp.data.document.GDocConstants;
+import org.cnnlp.data.document.GDocument;
+import org.cnnlp.data.splitter.IMdChunking;
+import org.cnnlp.data.splitter.MdChunking;
+import org.cnnlp.data.splitter.SplitUtils;
+import org.cnnlp.data.util.BaseParameters;
+
+import java.io.IOException;
+import java.util.*;
+
+public class MdsTaskHandler extends BaseTaskHandler {
+
+    private static final org.slf4j.Logger log
+            = org.slf4j.LoggerFactory.getLogger(MdsTaskHandler.class);
+
+    private static final String MD_TYPE = "mdType";
+    private static final String CHUNK_METADATA = "chunkMetadata";
+    private static final String FILE_NAME = "_fileName";
+    //private static final String CHUNK_ULID = "_ulid";
+
+    IMdFilesService mdFilesService;
+    IMdChunksService mdChunksService;
+
+    public MdsTaskHandler(IMdFilesService mdFilesService, IMdChunksService mdChunksService) {
+        this.mdFilesService = mdFilesService;
+        this.mdChunksService = mdChunksService;
+    }
+
+    @Override
+    public TaskType getType() {
+        return TaskType.MD;
+    }
+
+    @Override
+    public void doing(TaskContext context, Object objectId) {
+        List<String> operations = context.getOperations();
+        for (String operation : operations) {
+            doing(context, objectId, operation);
+        }
+    }
+
+    public void doing(TaskContext context, Object objectId, String operation) {
+        if (operation.equalsIgnoreCase("slice")) {
+            slicing(context, objectId, "slice");
+        }
+    }
+
+    public void slicing(TaskContext context, Object objectId, String operation) {
+
+        try {
+            String coll = context.getCollection();
+            Map<String, Object> params = context.getParams();
+
+            String mdId = objectId.toString();
+            GBaseKeyValue mdMeta = mdFilesService.findByMdid(coll, mdId);
+            if (mdMeta != null) {
+                String text = mdFilesService.getMdFileContent(coll, mdId);
+                String mdType = getMdType(params);
+                String name = mdMeta.getName();
+                String baseName = SplitUtils.getBaseName(name);
+
+                List<GDocument> chunks = splitToChunks(text, mdType, params);
+
+                if (chunks != null) {
+                    // 删除 gid 的chunks
+                    // 存 chunks
+                    //System.out.println(chunks.get(0));
+                    Map<String, Object> chunkMetadata = new HashMap<>();
+                    Object mo = params.get(CHUNK_METADATA);
+                    if (mo != null && mo instanceof Map) {
+                        chunkMetadata.putAll((Map) mo);
+                    }
+                    mdChunksService.deleteByMdId(coll, toInt(mdId));
+                    List<MdChunk> mdChunks = new ArrayList<MdChunk>();
+                    for (int i = 0; i < chunks.size(); i++) {
+                        MdChunk chunk = toChunk(toInt(mdId), chunks.get(i), i, baseName, chunkMetadata);
+                        mdChunks.add(chunk);
+                    }
+
+                    List<Integer> rets = mdChunksService.saveAll(coll, mdChunks);
+                    //System.out.println("saved="+rets.size());
+                }
+            }
+            context.logSuccess(mdId,operation);
+        } catch (Throwable e) {
+            context.logFailure(objectId.toString(),operation,e.getMessage());
+            throw new RuntimeException(e);
+        }
+    }
+
+    private String getMdType(Map<String, Object> params) {
+        String type = MdChunking.MD_TYPE_SIMPLE;
+        if (params != null) {
+            Object o = params.get(MD_TYPE);
+            if (o != null) {
+                type = o.toString();
+            }
+        }
+        return type;
+    }
+
+
+    protected MdChunk toChunk(Integer mdId, GDocument doc, int idx, String baseName, Map<String, Object> userMetadata) {
+        MdChunk chunk = new MdChunk();
+        chunk.setMdId(mdId);
+        chunk.setChunkIndex(idx);
+        chunk.setPlainText(JsonUtil.cleanControlChars(doc.getText()));
+        chunk.setSectionPath(doc.getId());
+        chunk.setCreatedAt(new Date().toInstant());
+
+        Map<String, Object> metadata = doc.getMetadata();
+        Object o = metadata.remove(GDocConstants.RAW_CONTENT);
+        if (o != null) {
+            chunk.setContent(JsonUtil.cleanControlChars(o.toString()));
+        }
+
+        o = metadata.remove(GDocConstants.START_OFFSET);
+        if (o != null) {
+            chunk.setOffsetStart(toInt(o));
+        }
+        o = metadata.remove(GDocConstants.END_OFFSET);
+        if (o != null) {
+            chunk.setOffsetEnd(toInt(o));
+        }
+
+        o = metadata.remove(GDocConstants.FROM_IDX);
+        if (o != null) {
+            chunk.setParagraphStart(toInt(o));
+        }
+        o = metadata.remove(GDocConstants.TO_IDX);
+        if (o != null) {
+            chunk.setParagraphEnd(toInt(o));
+        }
+
+        Map<String, Object> metadata1 = chunk.getMetadata();
+        if (metadata1 == null) {
+            metadata1 = new HashMap<String, Object>();
+            chunk.setMetadata(metadata1);
+        }
+
+        if (userMetadata.size() > 0) {
+            metadata1.putAll(userMetadata);
+        }
+        metadata1.put(FILE_NAME, baseName);
+        metadata1.putAll(metadata);
+
+        return chunk;
+    }
+
+    public List<GDocument> splitToChunks(String text, String type, Map<String, Object> params) throws IOException {
+        BaseParameters params2 = new BaseParameters(params);
+        params2.put(MD_TYPE, type);
+        IMdChunking chunker = new MdChunking();
+        List<GDocument> chunks = chunker.chunking(text, params2);
+        return chunks;
+    }
+}

+ 15 - 2
server/src/main/java/com/giantan/data/mds/task/impl/SliceTaskHandler.java

@@ -65,6 +65,19 @@ public class SliceTaskHandler extends BaseTaskHandler {
 
     @Override
     public void doing(TaskContext context, Object objectId) {
+        List<String> operations = context.getOperations();
+        for (String operation : operations) {
+            doing(context, objectId, operation);
+        }
+    }
+
+    public void doing(TaskContext context, Object objectId, String operation) {
+        if (operation.equals("slice")) {
+            slicing(context, objectId, "slice");
+        }
+    }
+
+    public void slicing(TaskContext context, Object objectId, String operation) {
 
         //System.out.println("Slicing object: " + objectId);
         try {
@@ -101,9 +114,9 @@ public class SliceTaskHandler extends BaseTaskHandler {
                     //System.out.println("saved="+rets.size());
                 }
             }
-        } catch (Exception e) {
-            throw new RuntimeException(e);
+            context.logSuccess(mdId,operation);
         } catch (Throwable e) {
+            context.logFailure(objectId.toString(),operation,e.getMessage());
             throw new RuntimeException(e);
         }
     }

+ 16 - 0
server/src/main/java/com/giantan/data/tasks/IPersistentTaskService.java

@@ -0,0 +1,16 @@
+package com.giantan.data.tasks;
+
+import com.giantan.data.tasks.repository.TaskStatusHistory;
+
+import java.time.LocalDateTime;
+import java.util.List;
+
+public interface IPersistentTaskService {
+    int save(String coll, TaskContext task);
+
+    int updateTaskStatus(String coll, TaskContext context);
+
+    List<TaskStatusHistory> getHistoryTasks(String coll, LocalDateTime startTime, LocalDateTime endTime, String status);
+
+    int deleteHistoryTasks(String coll, LocalDateTime startTime, LocalDateTime endTime, String status);
+}

+ 9 - 1
server/src/main/java/com/giantan/data/tasks/ITaskManager.java

@@ -1,13 +1,15 @@
 package com.giantan.data.tasks;
 
+import com.giantan.data.tasks.repository.TaskStatusHistory;
 import com.google.common.eventbus.EventBus;
 
+import java.time.LocalDateTime;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
 public interface ITaskManager {
-    String submit(String coll, TaskType type, List<Object> objectIds, Map<String, Object> params);
+    String submit(String coll, TaskType type, Map<String, Object> params, List<String> operations);
 
     boolean cancel(String coll, String taskId);
 
@@ -26,4 +28,10 @@ public interface ITaskManager {
     boolean delete(String coll, String id);
 
     Collection<TaskContext> findByStatus(String coll, String status);
+
+    void updateTaskStatus(TaskContext context);
+
+    List<TaskStatusHistory> getHistoryTasks(String coll, LocalDateTime startTime, LocalDateTime endTime, String status);
+
+    int deleteHistoryTasks(String collId, LocalDateTime startTime, LocalDateTime endTime, String status);
 }

+ 14 - 0
server/src/main/java/com/giantan/data/tasks/Readme.java

@@ -1,5 +1,8 @@
 package com.giantan.data.tasks;
 
+import com.google.common.eventbus.AllowConcurrentEvents;
+import com.google.common.eventbus.Subscribe;
+
 public class Readme {
     // 要修改的地方
     // 1、public enum TaskType {
@@ -8,4 +11,15 @@ public class Readme {
     // 2、实现以上的ITaskHandler
     // 3、TaskConfiguration.java 是 @Configuration
     // 4、TaskController
+
+
+    // TaskEventListener listener = new TaskEventListener(registry, manager);
+    //        eventBus.register(listener); // 注册监听器
+    //TaskEventListener
+    // 必须加上 @AllowConcurrentEvents 才会并发执行
+    //
+    //    @Subscribe
+    //    @AllowConcurrentEvents
+    //    public void onTask(TaskEvent event) {
+
 }

+ 74 - 49
server/src/main/java/com/giantan/data/tasks/TaskContext.java

@@ -1,6 +1,5 @@
 package com.giantan.data.tasks;
 
-import com.giantan.data.tasks.repository.TaskStatusHistory;
 
 import java.io.Serializable;
 import java.time.Instant;
@@ -16,76 +15,58 @@ public class TaskContext implements Serializable {
 
     private final String taskId;
     private final String collection;
+
     private final TaskType type;
     private List<Object> objectIds;
     private List<String> operations;
     private final Map<String, Object> params;
     private final AtomicBoolean cancelled = new AtomicBoolean(false);
     private volatile TaskState status;
-    private final Map<String, TaskState> objectStatus = new ConcurrentHashMap<>();
+    //private final Map<String, TaskState> objectStatus = new ConcurrentHashMap<>();
+    private final Map<String, TaskOperationsStatus> objectStatus = new ConcurrentHashMap<>();
     private Map<String, Object> extra = new HashMap<>();
     private volatile String error;
-
     private Instant createdAt;
     private Instant completedAt;
     private int retryCount;
     private int maxRetries;
     private long retryDelayMillis;
 
-    public TaskContext(String taskId, String collection,TaskType type, List<Object> objectIds, Map<String, Object> params) {
+    public TaskContext(String taskId, String collection, TaskType type, Map<String, Object> params) {
         this.taskId = taskId;
         this.collection = collection;
         this.type = type;
-        this.objectIds = objectIds;
+        this.objectIds = new ArrayList<>();
         this.params = params;
         this.createdAt = Instant.now();
         this.status = TaskState.PENDING;
     }
 
-    public static TaskContext from(TaskStatusHistory history) {
-        TaskContext ctx = new TaskContext(
-                history.getTaskId(),
-                history.getCollection(),
-                TaskType.valueOf(history.getTaskType()),
-                new ArrayList<>(history.getObjectIds()),
-                history.getParams()
-        );
-        ctx.status = history.getStatus();
-        ctx.error = history.getError();
-        ctx.createdAt = history.getCreatedAt();
-        ctx.completedAt = history.getCompletedAt();
-        ctx.retryCount = history.getRetryCount();
-        ctx.maxRetries = history.getMaxRetries();
-        ctx.retryDelayMillis = history.getRetryDelayMillis();
-
-        // 将 object_statuses 映射为 objectStatus 中的 status 字段
-        Map<String, TaskObjectStatus> detailed = history.getObjectStatuses();
-        for (Map.Entry<String, TaskObjectStatus> e : detailed.entrySet()) {
-            ctx.objectStatus.put(e.getKey(), e.getValue().getStatus());
-        }
 
-        return ctx;
+    public record Pair<L, R>(L left, R right) {
     }
 
-    public record Pair<L, R>(L left, R right) {}
-
     public String getTaskId() {
         return taskId;
     }
 
-    public void setObjectIds(List<Object> objectIds) {
+    public TaskType getType() {
+        return type;
+    }
+
+    public synchronized void setObjectIds(List<Object> objectIds) {
         this.objectIds = objectIds;
     }
 
-    public List<Object> getObjectIds() {
+    public synchronized List<Object> getObjectIds() {
         return objectIds;
     }
 
-    public List<String> getOperations() {
+    public synchronized List<String> getOperations() {
         return operations;
     }
 
-    public void setOperations(List<String> operations) {
+    public synchronized void setOperations(List<String> operations) {
         this.operations = operations;
     }
 
@@ -105,11 +86,11 @@ public class TaskContext implements Serializable {
         return createdAt;
     }
 
-    public TaskState getStatus() {
+    public synchronized TaskState getStatus() {
         return status;
     }
 
-    public void setStatus(TaskState status) {
+    public synchronized void setStatus(TaskState status) {
         this.status = status;
         if (isTerminal()) {
             markCompleted();
@@ -120,7 +101,7 @@ public class TaskContext implements Serializable {
         return completedAt;
     }
 
-    public void markCompleted() {
+    public synchronized void markCompleted() {
         this.completedAt = Instant.now();
     }
 
@@ -136,7 +117,7 @@ public class TaskContext implements Serializable {
         return retryCount;
     }
 
-    public void setRetryCount(int retryCount) {
+    public synchronized void setRetryCount(int retryCount) {
         this.retryCount = retryCount;
     }
 
@@ -144,7 +125,7 @@ public class TaskContext implements Serializable {
         return maxRetries;
     }
 
-    public void setMaxRetries(int maxRetries) {
+    public synchronized void setMaxRetries(int maxRetries) {
         this.maxRetries = maxRetries;
     }
 
@@ -152,7 +133,7 @@ public class TaskContext implements Serializable {
         return retryDelayMillis;
     }
 
-    public void setRetryDelayMillis(long retryDelayMillis) {
+    public synchronized void setRetryDelayMillis(long retryDelayMillis) {
         this.retryDelayMillis = retryDelayMillis;
     }
 
@@ -160,24 +141,68 @@ public class TaskContext implements Serializable {
         return retryCount < maxRetries;
     }
 
-    public void incrementRetry() {
+    public synchronized void incrementRetry() {
         retryCount++;
     }
 
-    public void logSuccess(String objectId) {
-        objectStatus.put(objectId, TaskState.SUCCESS);
+    public synchronized void logSuccess(String objectId, String operation) {
+        TaskOperationsStatus status1 = objectStatus.get(objectId);
+        if (status1 == null) {
+            status1 = new TaskOperationsStatus();
+            objectStatus.put(objectId, status1);
+        }
+        status1.logSuccess(operation);
+    }
+
+    public synchronized void logFailure(String objectId, String operation, String reason) {
+        TaskOperationsStatus status1 = objectStatus.get(objectId);
+        if (status1 == null) {
+            status1 = new TaskOperationsStatus();
+            objectStatus.put(objectId, status1);
+        }
+        status1.logFailure(operation, reason);
     }
 
-    public void logFailure(String objectId, String reason) {
-        objectStatus.put(objectId, TaskState.FAILED);
+    public synchronized Map<String, TaskOperationsStatus> getObjectStatus() {
+        Map<String, TaskOperationsStatus> clone = Map.copyOf(objectStatus);
+        return clone;
     }
 
-    public Map<String, TaskState> getObjectStatus() {
-        return objectStatus;
+    public synchronized void putOperationsStatus(String objectId, TaskOperationsStatus status) {
+        objectStatus.put(objectId, status);
     }
 
-    public String getError() { return error; }
-    public void setError(String error) { this.error = error; }
+    public synchronized boolean isAllSuccess() {
+        if (objectIds.size() == objectStatus.size()) {
+            final int[] fails = new int[1];
+            objectStatus.forEach((k, v) -> {
+                if (!v.isAllSuccess()) {
+                    fails[0]++;
+                }
+            });
+            if (fails[0] > 0) {
+                return false;
+            }
+            return true;
+        }
+        return false;
+    }
+
+    public void setCreatedAt(Instant createdAt) {
+        this.createdAt = createdAt;
+    }
+
+    public void setCompletedAt(Instant completedAt) {
+        this.completedAt = completedAt;
+    }
+
+    public String getError() {
+        return error;
+    }
+
+    public synchronized void setError(String error) {
+        this.error = error;
+    }
 
     public Map<String, Object> getParams() {
         return params;
@@ -187,7 +212,7 @@ public class TaskContext implements Serializable {
         return extra;
     }
 
-    public void setExtra(Map<String, Object> extra) {
+    public synchronized void setExtra(Map<String, Object> extra) {
         this.extra = extra;
     }
 

+ 8 - 3
server/src/main/java/com/giantan/data/tasks/TaskEvent.java

@@ -1,20 +1,25 @@
 package com.giantan.data.tasks;
 
+import lombok.Data;
+
 import java.util.List;
 import java.util.Map;
 
+@Data
 public class TaskEvent {
     public String taskId;
     public String collection;
     public TaskType type;
-    public List<Object> objectIds;
+    //public List<Object> objectIds;
     public Map<String, Object> params;
 
-    public TaskEvent(String taskId, String collection, TaskType type, List<Object> objectIds, Map<String, Object> params) {
+    public TaskEvent(String taskId, String collection, TaskType type,  Map<String, Object> params) {
         this.taskId = taskId;
         this.collection = collection;
         this.type = type;
-        this.objectIds = objectIds;
+        //this.objectIds = objectIds;
         this.params = params;
     }
+
+
 }

+ 10 - 32
server/src/main/java/com/giantan/data/tasks/TaskEventListener.java

@@ -1,9 +1,9 @@
 package com.giantan.data.tasks;
 
+import com.google.common.eventbus.AllowConcurrentEvents;
 import com.google.common.eventbus.Subscribe;
 
 import java.lang.invoke.MethodHandles;
-import java.util.Map;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
@@ -20,34 +20,11 @@ public class TaskEventListener {
         this.manager = manager;
     }
 
-//    @PostConstruct
-//    public void init() {
-//        eventBus.register(this);
-//    }
-
-//    @Subscribe
-//    public void onTask(TaskEvent event) {
-//
-//        TaskContext context = manager.getTask(event.taskId);
-//        if (context == null || context.isCancelled()) return;
-//
-//        ITaskHandler handler = registry.get(event.type);
-//        if (handler != null) {
-//            try {
-//                handler.handle(context);
-//                context.setStatus(TaskStatus.SUCCESS);
-//            } catch (Exception e) {
-//                context.setStatus(TaskStatus.FAILED);
-//                context.setError(e.getMessage());
-//            }
-//        } else {
-//            context.setStatus(TaskStatus.UNKNOWN);
-//            context.setError("No handler found");
-//        }
-//    }
-
+    // 必须加上 @AllowConcurrentEvents 才会并发执行
     @Subscribe
+    @AllowConcurrentEvents
     public void onTask(TaskEvent event) {
+        log.info("Task event: {}", event);
         TaskContext context = manager.getTask(event.collection, event.taskId);
         if (context == null || context.isCancelled()) return;
 
@@ -66,16 +43,16 @@ public class TaskEventListener {
             // 下面 细化一下
             if (context.getStatus() == TaskState.RUNNING) {
                 context.setStatus(TaskState.SUCCESS);
-                Map<String, TaskState> objectStatus = context.getObjectStatus();
-                if (objectStatus != null && objectStatus.size() > 0) {
-                    if (objectStatus.containsValue(TaskState.FAILED)) {
-                        context.setStatus(TaskState.FAILED);
-                    }
+
+                if (!context.isAllSuccess()) {
+                    context.setStatus(TaskState.FAILED);
                 }
             }
+            manager.updateTaskStatus(context);
         } catch (Exception e) {
             context.incrementRetry();
             context.setError(e.getMessage());
+            e.printStackTrace();
             log.error("Task: {} failed. {}", context.getTaskId(), e.getMessage());
             if (context.canRetry()) {
                 log.error("任务失败,准备重试:" + context.getTaskId() + ",第 " + context.getRetryCount() + " 次");
@@ -88,6 +65,7 @@ public class TaskEventListener {
                     log.error("任务失败,超过最大重试次数:" + context.getTaskId());
                 }
             }
+            manager.updateTaskStatus(context);
         }
     }
 

+ 60 - 9
server/src/main/java/com/giantan/data/tasks/TaskManager.java

@@ -1,9 +1,12 @@
 package com.giantan.data.tasks;
 
+
+import com.giantan.data.tasks.repository.TaskStatusHistory;
 import com.google.common.eventbus.EventBus;
 
 import java.time.Duration;
 import java.time.Instant;
+import java.time.LocalDateTime;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Executors;
@@ -14,6 +17,9 @@ import java.util.stream.Collectors;
 public class TaskManager implements ITaskManager {
 
     private final Map<String, TaskContext> tasks = new ConcurrentHashMap<>();
+
+    private IPersistentTaskService persistentTaskService;
+
     private final EventBus eventBus;
 
     //@Value("${task.cleanup.expire-minutes:10}")
@@ -34,9 +40,17 @@ public class TaskManager implements ITaskManager {
         return eventBus;
     }
 
+    public IPersistentTaskService getPersistentTaskService() {
+        return persistentTaskService;
+    }
+
+    public void setPersistentTaskService(IPersistentTaskService persistentTaskService) {
+        this.persistentTaskService = persistentTaskService;
+    }
+
 
     public String submit(String coll, TaskType type, Map<String, Object> payload) {
-        List<Object> objects = null;
+        //List<Object> objects = new ArrayList<>();
 
 //        if (payload.containsKey("objectIds")) {
 //            objects = (List<Object>) payload.remove("objectIds");
@@ -51,17 +65,54 @@ public class TaskManager implements ITaskManager {
 //        } else {
 //            throw new IllegalArgumentException("必须提供 objectIds 或 fromId/toId");
 //        }
-        return submit(coll,type,objects,payload);
+        List<String> operations = null;
+        Object o = payload.get("operations");
+        if (o != null && o instanceof List) {
+            operations = (List<String>) o;
+        }else{
+            throw new IllegalArgumentException("必须提供 operations");
+        }
+        return submit(coll,type,payload, operations);
     }
 
-    public String submit(String coll, TaskType type, List<Object> objectIds, Map<String, Object> params) {
+    @Override
+    public synchronized String submit(String coll, TaskType type, Map<String, Object> params, List<String> operations) {
         String taskId = UUID.randomUUID().toString();
-        TaskContext context = new TaskContext(taskId, coll, type, objectIds, params);
+        List<Object> objectIds = new ArrayList<>();
+        TaskContext context = new TaskContext(taskId, coll, type, params);
+        context.setOperations(operations);
+
         tasks.put(taskId, context);
-        eventBus.post(new TaskEvent(taskId, coll, type, objectIds, params));
+
+        // 存 db
+        save(context);
+
+        eventBus.post(new TaskEvent(taskId, coll, type, params));
         return taskId;
     }
 
+    private void save(TaskContext context){
+        persistentTaskService.save(context.getCollection(),context);
+    }
+
+    @Override
+    public void updateTaskStatus(TaskContext context) {
+       // objectIds,status,objectStatus,extra,error,completedAt,retryCount,maxRetries,retryDelayMillis,
+        persistentTaskService.updateTaskStatus(context.getCollection(),context);
+    }
+
+    @Override
+    public List<TaskStatusHistory> getHistoryTasks(String coll, LocalDateTime startTime, LocalDateTime endTime, String status) {
+        List<TaskStatusHistory> rs = persistentTaskService.getHistoryTasks(coll,startTime,endTime,status);
+        return rs;
+    }
+
+    @Override
+    public int deleteHistoryTasks(String coll, LocalDateTime startTime, LocalDateTime endTime, String status) {
+        int r =  persistentTaskService.deleteHistoryTasks(coll,startTime,endTime,status);
+        return r;
+    }
+
     public boolean cancel(String coll, String taskId) {
         TaskContext context = tasks.get(taskId);
         if (context != null) {
@@ -72,7 +123,7 @@ public class TaskManager implements ITaskManager {
     }
 
     @Override
-    public boolean delete(String coll, String taskId) {
+    public synchronized boolean delete(String coll, String taskId) {
         TaskContext context = tasks.get(taskId);
         if (context != null && context.isTerminal()) {
             tasks.remove(taskId);
@@ -88,7 +139,6 @@ public class TaskManager implements ITaskManager {
         Collection<TaskContext> values2 = new ArrayList<>();
 
         for (TaskContext context : values) {
-
             if (context.getCollection().equals(coll) && context.getStatus() == taskType) {
                 values2.add(context);
             }
@@ -96,6 +146,7 @@ public class TaskManager implements ITaskManager {
         return values2;
     }
 
+
     public TaskContext getTask(String coll, String taskId) {
         return tasks.get(taskId);
     }
@@ -112,7 +163,7 @@ public class TaskManager implements ITaskManager {
         return values2;
     }
 
-    private void cleanupTasks() {
+    private synchronized void cleanupTasks() {
         Instant now = Instant.now();
         List<Map.Entry<String, TaskContext>> candidates = tasks.entrySet()
                 .stream()
@@ -134,7 +185,7 @@ public class TaskManager implements ITaskManager {
     }
 
     //@Override
-    protected int cleanupTasks(String coll) {
+    protected synchronized int cleanupTasks(String coll) {
         //Instant now = Instant.now();
         List<Map.Entry<String, TaskContext>> candidates = tasks.entrySet()
                 .stream()

+ 12 - 0
server/src/main/java/com/giantan/data/tasks/TaskObjectStatus.java

@@ -8,7 +8,19 @@ import lombok.NoArgsConstructor;
 @NoArgsConstructor
 @AllArgsConstructor
 public class TaskObjectStatus {
+    String operation;
     private TaskState status;
     private String error;
     private int attempt;
+
+    public TaskObjectStatus(String operation, TaskState status) {
+        this.operation = operation;
+        this.status = status;
+    }
+
+    public TaskObjectStatus(String operation, TaskState status, String error) {
+        this.operation = operation;
+        this.status = status;
+        this.error = error;
+    }
 }

+ 66 - 0
server/src/main/java/com/giantan/data/tasks/TaskOperationsStatus.java

@@ -0,0 +1,66 @@
+package com.giantan.data.tasks;
+
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+// task 里面包含多个operations
+public class TaskOperationsStatus implements Serializable {
+    List<TaskObjectStatus> states;
+
+    public TaskOperationsStatus() {
+        states = new ArrayList<TaskObjectStatus>();
+    }
+
+    public List<TaskObjectStatus> getStates() {
+        return states;
+    }
+
+    public void setStates(List<TaskObjectStatus> states) {
+        this.states = states;
+    }
+
+    public void addState(TaskObjectStatus state) {
+        states.add(state);
+    }
+
+    public void log(String operation, TaskState status) {
+        states.add(new TaskObjectStatus(operation, status));
+    }
+
+    public void logSuccess(String operation) {
+        states.add(new TaskObjectStatus(operation, TaskState.SUCCESS));
+    }
+
+    public void logFailure(String operation) {
+        states.add(new TaskObjectStatus(operation, TaskState.FAILED));
+    }
+
+    public void logFailure(String operation, String reason) {
+        states.add(new TaskObjectStatus(operation, TaskState.FAILED, reason));
+    }
+
+    @JsonIgnore
+    public boolean isAllSuccess() {
+        boolean success = true;
+        for (TaskObjectStatus state : states) {
+            if (state.getStatus() != TaskState.SUCCESS) {
+                success = false;
+                break;
+            }
+        }
+        return success;
+    }
+
+    @Override
+    public String toString() {
+        return "TaskOperationStatus{" +
+                "states=" + states +
+                '}';
+    }
+
+
+}

+ 1 - 1
server/src/main/java/com/giantan/data/tasks/TaskType.java

@@ -1,5 +1,5 @@
 package com.giantan.data.tasks;
 
 public enum TaskType {
-    UPLOAD, SLICE, CHUNK,EXTRACT_KEYWORDS, EMBEDDING, INDEX
+    UPLOAD, SLICE,EXTRACT_KEYWORDS, EMBEDDING, INDEX, MD,CHUNK
 }

+ 187 - 3
server/src/main/java/com/giantan/data/tasks/repository/DynamicTaskRepository.java

@@ -1,9 +1,22 @@
 package com.giantan.data.tasks.repository;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.giantan.ai.util.id.IdGenerator;
 import com.giantan.ai.util.id.UuidGenerator;
+import com.giantan.data.tasks.TaskOperationsStatus;
+import com.giantan.data.tasks.TaskState;
+import com.giantan.data.tasks.TaskType;
 import org.springframework.jdbc.core.JdbcTemplate;
+import org.springframework.jdbc.core.RowMapper;
+
+import java.io.IOException;
+import java.sql.*;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
 
 public class DynamicTaskRepository {
 
@@ -23,7 +36,6 @@ public class DynamicTaskRepository {
                     id BIGSERIAL PRIMARY KEY,
                       task_id VARCHAR(64) NOT NULL,
                       task_type VARCHAR(32) NOT NULL,
-                      collection VARCHAR(64),
                       object_ids TEXT,
                       object_statuses JSONB,
                       params JSONB,
@@ -55,8 +67,6 @@ public class DynamicTaskRepository {
 
     //@Override
     public int createTable(String collId) {
-        String suffix = collId + "";
-
         String createTaskTable = TASK_TABLE_TEMPLATE.formatted(tableName(collId));
         jdbc.execute(createTaskTable);
 
@@ -77,4 +87,178 @@ public class DynamicTaskRepository {
         String sql = String.format("DELETE FROM %s ;", tableName(collId));
         jdbc.update(sql);
     }
+
+    public int save(String collId, TaskStatusHistory task) {
+        String sql1 = """
+                INSERT INTO %s (
+                    task_id, task_type, object_ids, object_statuses, 
+                    params, extra, status, error, retry_count, max_retries, retry_delay_millis,
+                    created_at, completed_at, is_current
+                ) VALUES (?, ?, ?, ?::jsonb, ?::jsonb, ?::jsonb, ?, ?, ?, ?, ?, ?, ?, ?)
+                """;
+        String sql = String.format(sql1, tableName(collId));
+
+        //KeyHolder keyHolder = new GeneratedKeyHolder();
+        int updated = jdbc.update(con -> {
+            PreparedStatement ps = con.prepareStatement(sql, Statement.RETURN_GENERATED_KEYS);
+            ps.setString(1, task.getTaskId());
+            ps.setString(2, task.getTaskType().name());
+            //ps.setString(3, task.getCollection());
+            ps.setString(3, toJson(task.getObjectIds()));
+            ps.setString(4, task.getObjectStatuses() == null ? "{}" : toJson(task.getObjectStatuses()));
+            ps.setString(5, task.getParams() == null ? "{}" : toJson(task.getParams()));
+            ps.setString(6, task.getExtra() == null ? "{}" : toJson(task.getExtra()));
+            ps.setString(7, task.getStatus().name());
+            ps.setString(8, task.getError());
+            ps.setInt(9, task.getRetryCount());
+            ps.setInt(10, task.getMaxRetries());
+            ps.setLong(11, task.getRetryDelayMillis());
+            ps.setTimestamp(12, Timestamp.from(task.getCreatedAt()));
+            ps.setTimestamp(13, task.getCompletedAt() != null ? Timestamp.from(task.getCompletedAt()) : null);
+            ps.setBoolean(14, task.isCurrent());
+            return ps;
+        });
+
+        return updated;
+    }
+
+    public int updateTaskStatus(String collId, TaskStatusHistory taskContext) {
+        String sql1 = """
+                UPDATE %s
+                SET object_ids = ?,
+                    status = ?,
+                    object_statuses = ?::jsonb,
+                    extra = ?::jsonb,
+                    error = ?,
+                    completed_at = ?,
+                    retry_count = ?,
+                    max_retries = ?,
+                    retry_delay_millis = ?,
+                    is_current = ?
+                WHERE task_id = ?
+                """;
+        String sql = String.format(sql1, tableName(collId));
+        try {
+            String objectIdsJson = taskContext.getObjectIds() == null ? null :
+                    toJson(taskContext.getObjectIds());
+
+            String objectStatusesJson = taskContext.getObjectStatuses() == null ? null :
+                    toJson(taskContext.getObjectStatuses());
+
+            String extraJson = taskContext.getExtra() == null ? null : toJson(taskContext.getExtra());
+
+            int updated = jdbc.update(sql,
+                    objectIdsJson,
+                    taskContext.getStatus() == null ? null : taskContext.getStatus().name(),
+                    objectStatusesJson,
+                    extraJson,
+                    taskContext.getError(),
+                    taskContext.getCompletedAt() == null ? null : Timestamp.from(taskContext.getCompletedAt()),
+                    taskContext.getRetryCount(),
+                    taskContext.getMaxRetries(),
+                    taskContext.getRetryDelayMillis(),
+                    taskContext.isCurrent(),
+                    taskContext.getTaskId()
+            );
+            return updated;
+        } catch (Exception e) {
+            throw new RuntimeException(e.getMessage(), e);
+        }
+    }
+
+    public List<TaskStatusHistory> getHistoryTasks(String collId, LocalDateTime startTime, LocalDateTime endTime, String status) {
+        String sql1 = String.format("SELECT * FROM %s WHERE 1=1", tableName(collId));
+        StringBuilder sql = new StringBuilder(sql1);
+
+        List<Object> params = new ArrayList<>();
+
+        if (startTime != null) {
+            sql.append(" AND created_at >= ?");
+            params.add(Timestamp.valueOf(startTime));
+        }
+        if (endTime != null) {
+            sql.append(" AND created_at <= ?");
+            params.add(Timestamp.valueOf(endTime));
+        }
+        if (status != null && !status.isBlank()) {
+            sql.append(" AND status = ?");
+            params.add(status);
+        }
+//        if (taskType != null && !taskType.isBlank()) {
+//            sql.append(" AND task_type = ?");
+//            params.add(taskType);
+//        }
+
+        return jdbc.query(sql.toString(), params.toArray(), new TaskStatusHistoryRowMapper());
+    }
+
+    public int deleteHistoryTasks(String collId, LocalDateTime startTime, LocalDateTime endTime, String status) {
+        String sql1 = String.format("DELETE FROM %s WHERE 1=1", tableName(collId));
+        StringBuilder sql = new StringBuilder(sql1);
+
+        List<Object> params = new ArrayList<>();
+
+        if (startTime != null) {
+            sql.append(" AND created_at >= ?");
+            params.add(Timestamp.valueOf(startTime));
+        }
+        if (endTime != null) {
+            sql.append(" AND created_at <= ?");
+            params.add(Timestamp.valueOf(endTime));
+        }
+        if (status != null && !status.isBlank()) {
+            sql.append(" AND status = ?");
+            params.add(status);
+        }
+        return jdbc.update(sql.toString(), params.toArray());
+    }
+
+    public class TaskStatusHistoryRowMapper implements RowMapper<TaskStatusHistory> {
+        @Override
+        public TaskStatusHistory mapRow(ResultSet rs, int rowNum) throws SQLException {
+            TaskStatusHistory task = new TaskStatusHistory();
+            task.setId(rs.getLong("id"));
+            task.setTaskId(rs.getString("task_id"));
+            task.setTaskType(TaskType.valueOf(rs.getString("task_type")));
+            //task.setCollection(rs.getString("collection"));
+
+            //task.setObjectIds(mapper.readValue(rs.getString("object_ids"), new TypeReference<>() {}));
+            task.setObjectIds(fromJson(rs.getString("object_ids"), new TypeReference<List>() {
+            }));
+            task.setObjectStatuses(fromJson(rs.getString("object_statuses"), new TypeReference<Map<String, TaskOperationsStatus>>() {
+            }));
+            task.setParams(fromJson(rs.getString("params"), new TypeReference<Map<String, Object>>() {
+            }));
+            task.setExtra(fromJson(rs.getString("extra"), new TypeReference<Map<String, Object>>() {
+            }));
+
+            task.setStatus(TaskState.valueOf(rs.getString("status")));
+            task.setError(rs.getString("error"));
+            task.setRetryCount(rs.getInt("retry_count"));
+            task.setMaxRetries(rs.getInt("max_retries"));
+            task.setRetryDelayMillis(rs.getLong("retry_delay_millis"));
+            task.setCreatedAt(rs.getTimestamp("created_at").toInstant());
+            Timestamp completed = rs.getTimestamp("completed_at");
+            if (completed != null) task.setCompletedAt(completed.toInstant());
+            task.setCurrent(rs.getBoolean("is_current"));
+            return task;
+        }
+    }
+
+    public String toJson(Object obj) {
+        try {
+            return mapper.writeValueAsString(obj);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public <T> T fromJson(String json, TypeReference<T> typeRef) {
+        try {
+            return mapper.readValue(json, typeRef);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
 }

+ 0 - 30
server/src/main/java/com/giantan/data/tasks/repository/JsonMapper2.java

@@ -1,30 +0,0 @@
-package com.giantan.data.tasks.repository;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import org.springframework.stereotype.Component;
-
-import java.io.IOException;
-
-@Component
-public class JsonMapper2 {
-
-    private final ObjectMapper objectMapper = new ObjectMapper();
-
-    public String toJson(Object obj) {
-        try {
-            return objectMapper.writeValueAsString(obj);
-        } catch (JsonProcessingException e) {
-            throw new RuntimeException(e);
-        }
-    }
-
-    public <T> T fromJson(String json, TypeReference<T> typeRef) {
-        try {
-            return objectMapper.readValue(json, typeRef);
-        } catch (IOException e) {
-            throw new RuntimeException(e);
-        }
-    }
-}

+ 18 - 1
server/src/main/java/com/giantan/data/tasks/repository/PersistentTaskManager.java

@@ -5,13 +5,14 @@ import com.giantan.data.tasks.TaskContext;
 import com.giantan.data.tasks.TaskType;
 import com.google.common.eventbus.EventBus;
 
+import java.time.LocalDateTime;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
 public class PersistentTaskManager implements ITaskManager {
     @Override
-    public String submit(String collection, TaskType type, List<Object> objectIds, Map<String, Object> params) {
+    public String submit(String collection, TaskType type, Map<String, Object> params, List<String> operations) {
         return "";
     }
 
@@ -80,6 +81,22 @@ public class PersistentTaskManager implements ITaskManager {
         return List.of();
     }
 
+    @Override
+    public void updateTaskStatus(TaskContext context) {
+
+    }
+
+    @Override
+    public List<TaskStatusHistory> getHistoryTasks(String coll, LocalDateTime startTime, LocalDateTime endTime, String status) {
+        return List.of();
+    }
+
+    @Override
+    public int deleteHistoryTasks(String collId, LocalDateTime startTime, LocalDateTime endTime, String status) {
+        return 0;
+    }
+
+
 //    private final TaskStatusHistoryRepository repository;
 //    private final EventBus eventBus;
 //

+ 153 - 0
server/src/main/java/com/giantan/data/tasks/repository/TaskConverter.java

@@ -0,0 +1,153 @@
+package com.giantan.data.tasks.repository;
+
+import com.giantan.data.tasks.*;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class TaskConverter {
+
+    public static TaskStatusHistory toHistory(TaskContext context) {
+        TaskStatusHistory history = new TaskStatusHistory();
+        history.setTaskId(context.getTaskId());
+        history.setCollection(context.getCollection());
+        history.setTaskType(context.getType());
+        history.setObjectIds(context.getObjectIds());
+        history.setObjectStatuses(context.getObjectStatus());
+        //if (context.getObjectStatus() != null){
+        //    history.setObjectStatuses(toMap(context.getObjectStatus()));
+        //}
+
+        history.setParams(context.getParams() != null ? new HashMap<>(context.getParams()) : null);
+        history.setExtra(context.getExtra() != null ? new HashMap<>(context.getExtra()) : null);
+
+        history.setStatus(context.getStatus());
+        history.setError(context.getError());
+        history.setRetryCount(context.getRetryCount());
+        history.setMaxRetries(context.getMaxRetries());
+        history.setRetryDelayMillis(context.getRetryDelayMillis());
+
+        history.setCreatedAt(context.getCreatedAt());
+        history.setCompletedAt(context.getCompletedAt());
+        history.setCurrent(!context.isTerminal());// 默认 true
+
+        return history;
+    }
+
+    public static TaskContext toContext(TaskStatusHistory history) {
+        TaskContext context = new TaskContext(
+                history.getTaskId(),
+                history.getCollection(),
+                history.getTaskType(),
+                history.getParams() != null ? new HashMap<>(history.getParams()) : new HashMap<>()
+        );
+
+        if (history.getObjectIds() != null) {
+            context.setObjectIds(new ArrayList<>(history.getObjectIds())); // List<String> → List<Object>
+        }
+        Map<String, TaskOperationsStatus> objectStatuses = history.getObjectStatuses();
+        if (objectStatuses != null) {
+            objectStatuses.forEach((key, value) -> {
+                context.putOperationsStatus(key, value);
+            });
+        }
+//        if (objectStatuses != null) {
+//            // ⚠️ 这里要判断 history 中的 objectStatuses 是否能转成 TaskOperationsStatus
+//            //Map<String, TaskOperationsStatus> statusMap = new HashMap<>();
+//            objectStatuses.forEach((key, value) -> {
+//                TaskOperationsStatus status = toTaskOperationsStatus((Map<String, Object>) value);
+//                context.putOperationsStatus(key, status);
+//            });
+//        }
+
+        context.setExtra(history.getExtra() != null ? new HashMap<>(history.getExtra()) : new HashMap<>());
+        context.setStatus(history.getStatus());
+        context.setError(history.getError());
+        context.setRetryCount(history.getRetryCount());
+        context.setMaxRetries(history.getMaxRetries());
+        context.setRetryDelayMillis(history.getRetryDelayMillis());
+
+        context.setCreatedAt(history.getCreatedAt());
+        context.setCompletedAt(history.getCompletedAt());
+
+        return context;
+    }
+
+
+    private static TaskOperationsStatus toTaskOperationsStatus(Map<String, Object> m) {
+        TaskOperationsStatus r = null;
+        Object o = m.get("states");
+        if (o != null && o instanceof List<?>) {
+            r = new TaskOperationsStatus();
+            List<Object> ls = (List<Object>) o;
+            for (Object l : ls) {
+                r.addState(toTaskObjectStatus((Map) l));
+            }
+        }
+        return r;
+    }
+
+    private static TaskObjectStatus toTaskObjectStatus(Map<String, Object> m) {
+        TaskObjectStatus r = new TaskObjectStatus();
+        Object o = m.get("operation");
+        if (o != null) {
+            r.setOperation((String) o);
+        }
+        o = m.get("status");
+        if (o != null) {
+            r.setStatus(TaskState.valueOf((String) o));
+        }
+        o = m.get("error");
+        if (o != null) {
+            r.setError((String) o);
+        }
+        o = m.get("attempt");
+        if (o != null) {
+            r.setAttempt(Integer.parseInt((String) o));
+        }
+        return r;
+
+
+    }
+
+    private static Map<String, Object> toMap(Map<String, TaskOperationsStatus> taskDetails) {
+        Map<String, Object> result = new HashMap<>();
+        taskDetails.forEach((k, v) -> {
+            result.put(k, toMap(v));
+        });
+        return result;
+    }
+
+
+    //private final Map<String, TaskOperationsStatus> objectStatus
+    private static Map<String, Object> toMap(TaskOperationsStatus status) {
+        Map<String, Object> map = new HashMap<>();
+        String key = "states";
+        List<Map> value = new ArrayList<>();
+        List<TaskObjectStatus> states = status.getStates();
+        for (TaskObjectStatus s : states) {
+            Map<String, Object> ms = toObjectStateMap(s);
+            value.add(ms);
+        }
+        map.put(key, value);
+        return map;
+    }
+
+    private static Map<String, Object> toObjectStateMap(TaskObjectStatus s) {
+        //    String operation;
+        //    private TaskState status;
+        //    private String error;
+        //    private int attempt;
+        Map<String, Object> map = new HashMap<>();
+        map.put("operation", s.getOperation());
+        map.put("status", s.getStatus().name());
+        if (s.getError() != null) {
+            map.put("error", s.getError());
+        }
+        map.put("attempt", s.getAttempt());
+        return map;
+    }
+
+}

+ 5 - 3
server/src/main/java/com/giantan/data/tasks/repository/TaskStatusHistory.java

@@ -1,7 +1,9 @@
 package com.giantan.data.tasks.repository;
 
 import com.giantan.data.tasks.TaskObjectStatus;
+import com.giantan.data.tasks.TaskOperationsStatus;
 import com.giantan.data.tasks.TaskState;
+import com.giantan.data.tasks.TaskType;
 import lombok.Data;
 
 import java.time.Instant;
@@ -32,9 +34,9 @@ public class TaskStatusHistory {
     private Long id;
     private String taskId;
     private String collection;
-    private String taskType;
-    private List<String> objectIds;
-    private Map<String, TaskObjectStatus> objectStatuses;
+    private TaskType taskType;
+    private List<Object> objectIds;
+    private Map<String, TaskOperationsStatus> objectStatuses;
     private Map<String, Object> params;
     private Map<String, Object> extra; // ✅ 新增字段
 

+ 95 - 95
server/src/main/java/com/giantan/data/tasks/repository/TaskStatusHistoryRepository.java

@@ -13,99 +13,99 @@ import java.util.Map;
 //@Repository
 public class TaskStatusHistoryRepository {
 
-    private final JdbcTemplate jdbcTemplate;
-    private final JsonMapper2 jsonMapper;
-
-    public TaskStatusHistoryRepository(JdbcTemplate jdbcTemplate, JsonMapper2 jsonMapper) {
-        this.jdbcTemplate = jdbcTemplate;
-        this.jsonMapper = jsonMapper;
-    }
-
-    public void save(TaskStatusHistory task) {
-        String sql = """
-            INSERT INTO task_status_history (
-                task_id, task_type, object_ids, object_statuses, params,
-                extra, status, error, retry_count, max_retries, retry_delay_millis,
-                created_at, completed_at, is_current
-            ) VALUES (?, ?, ?, ?::jsonb, ?::jsonb, ?::jsonb, ?, ?, ?, ?, ?, ?, ?, ?)
-            ON CONFLICT (task_id, is_current)
-            DO UPDATE SET
-              object_statuses = EXCLUDED.object_statuses,
-              params = EXCLUDED.params,
-              extra = EXCLUDED.extra,
-              status = EXCLUDED.status,
-              error = EXCLUDED.error,
-              retry_count = EXCLUDED.retry_count,
-              completed_at = EXCLUDED.completed_at
-        """;
-
-        jdbcTemplate.update(sql,
-                task.getTaskId(),
-                task.getTaskType(),
-                jsonMapper.toJson(task.getObjectIds()),
-                jsonMapper.toJson(task.getObjectStatuses()),
-                jsonMapper.toJson(task.getParams()),
-                jsonMapper.toJson(task.getExtra()),   // ✅ 序列化 extra
-                task.getStatus().name(),
-                task.getError(),
-                task.getRetryCount(),
-                task.getMaxRetries(),
-                task.getRetryDelayMillis(),
-                Timestamp.from(task.getCreatedAt()),
-                task.getCompletedAt() != null ? Timestamp.from(task.getCompletedAt()) : null,
-                task.isCurrent()
-        );
-    }
-
-    public TaskStatusHistory findCurrentByTaskId(String taskId) {
-        String sql = "SELECT * FROM task_status_history WHERE task_id = ? AND is_current = true";
-        return jdbcTemplate.queryForObject(sql, new Object[]{taskId}, mapRow());
-    }
-
-    public List<TaskStatusHistory> findAllCurrent() {
-        return jdbcTemplate.query("SELECT * FROM task_status_history WHERE is_current = true", mapRow());
-    }
-
-    public void updateObjectStatus(String taskId, String objectId, TaskObjectStatus newStatus) {
-        TaskStatusHistory task = findCurrentByTaskId(taskId);
-        Map<String, TaskObjectStatus> statuses = task.getObjectStatuses();
-        statuses.put(objectId, newStatus);
-
-        // 这里只更新 object_statuses 字段
-        String sql = "UPDATE task_status_history SET object_statuses = ?::jsonb WHERE id = ?";
-        jdbcTemplate.update(sql, jsonMapper.toJson(statuses), task.getId());
-    }
-
-    public void updateExtra(String taskId, Map<String, Object> extra) {
-        String sql = """
-        UPDATE task_status_history
-        SET extra = ?::jsonb
-        WHERE task_id = ? AND is_current = true
-    """;
-
-        jdbcTemplate.update(sql, jsonMapper.toJson(extra), taskId);
-    }
-
-    private RowMapper<TaskStatusHistory> mapRow() {
-        return (rs, rowNum) -> {
-            TaskStatusHistory task = new TaskStatusHistory();
-            task.setId(rs.getLong("id"));
-            task.setTaskId(rs.getString("task_id"));
-            task.setTaskType(rs.getString("task_type"));
-            task.setObjectIds(jsonMapper.fromJson(rs.getString("object_ids"), new TypeReference<>() {}));
-            task.setObjectStatuses(jsonMapper.fromJson(rs.getString("object_statuses"), new TypeReference<>() {}));
-            task.setParams(jsonMapper.fromJson(rs.getString("params"), new TypeReference<>() {}));
-            task.setExtra(jsonMapper.fromJson(rs.getString("extra"), new TypeReference<>() {})); // ✅ 反序列化 extra
-            task.setStatus(TaskState.valueOf(rs.getString("status")));
-            task.setError(rs.getString("error"));
-            task.setRetryCount(rs.getInt("retry_count"));
-            task.setMaxRetries(rs.getInt("max_retries"));
-            task.setRetryDelayMillis(rs.getLong("retry_delay_millis"));
-            task.setCreatedAt(rs.getTimestamp("created_at").toInstant());
-            Timestamp completed = rs.getTimestamp("completed_at");
-            if (completed != null) task.setCompletedAt(completed.toInstant());
-            task.setCurrent(rs.getBoolean("is_current"));
-            return task;
-        };
-    }
+//    private final JdbcTemplate jdbcTemplate;
+//    private final JsonMapper2 jsonMapper;
+//
+//    public TaskStatusHistoryRepository(JdbcTemplate jdbcTemplate, JsonMapper2 jsonMapper) {
+//        this.jdbcTemplate = jdbcTemplate;
+//        this.jsonMapper = jsonMapper;
+//    }
+//
+//    public void save(TaskStatusHistory task) {
+//        String sql = """
+//            INSERT INTO task_status_history (
+//                task_id, task_type, object_ids, object_statuses, params,
+//                extra, status, error, retry_count, max_retries, retry_delay_millis,
+//                created_at, completed_at, is_current
+//            ) VALUES (?, ?, ?, ?::jsonb, ?::jsonb, ?::jsonb, ?, ?, ?, ?, ?, ?, ?, ?)
+//            ON CONFLICT (task_id, is_current)
+//            DO UPDATE SET
+//              object_statuses = EXCLUDED.object_statuses,
+//              params = EXCLUDED.params,
+//              extra = EXCLUDED.extra,
+//              status = EXCLUDED.status,
+//              error = EXCLUDED.error,
+//              retry_count = EXCLUDED.retry_count,
+//              completed_at = EXCLUDED.completed_at
+//        """;
+//
+//        jdbcTemplate.update(sql,
+//                task.getTaskId(),
+//                task.getTaskType(),
+//                jsonMapper.toJson(task.getObjectIds()),
+//                jsonMapper.toJson(task.getObjectStatuses()),
+//                jsonMapper.toJson(task.getParams()),
+//                jsonMapper.toJson(task.getExtra()),   // ✅ 序列化 extra
+//                task.getStatus().name(),
+//                task.getError(),
+//                task.getRetryCount(),
+//                task.getMaxRetries(),
+//                task.getRetryDelayMillis(),
+//                Timestamp.from(task.getCreatedAt()),
+//                task.getCompletedAt() != null ? Timestamp.from(task.getCompletedAt()) : null,
+//                task.isCurrent()
+//        );
+//    }
+//
+//    public TaskStatusHistory findCurrentByTaskId(String taskId) {
+//        String sql = "SELECT * FROM task_status_history WHERE task_id = ? AND is_current = true";
+//        return jdbcTemplate.queryForObject(sql, new Object[]{taskId}, mapRow());
+//    }
+//
+//    public List<TaskStatusHistory> findAllCurrent() {
+//        return jdbcTemplate.query("SELECT * FROM task_status_history WHERE is_current = true", mapRow());
+//    }
+//
+//    public void updateObjectStatus(String taskId, String objectId, TaskObjectStatus newStatus) {
+//        TaskStatusHistory task = findCurrentByTaskId(taskId);
+//        Map<String, TaskObjectStatus> statuses = task.getObjectStatuses();
+//        statuses.put(objectId, newStatus);
+//
+//        // 这里只更新 object_statuses 字段
+//        String sql = "UPDATE task_status_history SET object_statuses = ?::jsonb WHERE id = ?";
+//        jdbcTemplate.update(sql, jsonMapper.toJson(statuses), task.getId());
+//    }
+//
+//    public void updateExtra(String taskId, Map<String, Object> extra) {
+//        String sql = """
+//        UPDATE task_status_history
+//        SET extra = ?::jsonb
+//        WHERE task_id = ? AND is_current = true
+//    """;
+//
+//        jdbcTemplate.update(sql, jsonMapper.toJson(extra), taskId);
+//    }
+//
+//    private RowMapper<TaskStatusHistory> mapRow() {
+//        return (rs, rowNum) -> {
+//            TaskStatusHistory task = new TaskStatusHistory();
+//            task.setId(rs.getLong("id"));
+//            task.setTaskId(rs.getString("task_id"));
+//            task.setTaskType(rs.getString("task_type"));
+//            task.setObjectIds(jsonMapper.fromJson(rs.getString("object_ids"), new TypeReference<>() {}));
+//            task.setObjectStatuses(jsonMapper.fromJson(rs.getString("object_statuses"), new TypeReference<>() {}));
+//            task.setParams(jsonMapper.fromJson(rs.getString("params"), new TypeReference<>() {}));
+//            task.setExtra(jsonMapper.fromJson(rs.getString("extra"), new TypeReference<>() {})); // ✅ 反序列化 extra
+//            task.setStatus(TaskState.valueOf(rs.getString("status")));
+//            task.setError(rs.getString("error"));
+//            task.setRetryCount(rs.getInt("retry_count"));
+//            task.setMaxRetries(rs.getInt("max_retries"));
+//            task.setRetryDelayMillis(rs.getLong("retry_delay_millis"));
+//            task.setCreatedAt(rs.getTimestamp("created_at").toInstant());
+//            Timestamp completed = rs.getTimestamp("completed_at");
+//            if (completed != null) task.setCompletedAt(completed.toInstant());
+//            task.setCurrent(rs.getBoolean("is_current"));
+//            return task;
+//        };
+//    }
 }

+ 105 - 0
server/src/test/java/com/giantan/data/mds/MapDoubleToInt.java

@@ -0,0 +1,105 @@
+package com.giantan.data.mds;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.giantan.ai.common.util.JsonUtil;
+import com.giantan.data.tasks.TaskObjectStatus;
+import com.giantan.data.tasks.TaskOperationsStatus;
+import com.giantan.data.tasks.TaskState;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class MapDoubleToInt {
+
+
+    public static Object toIntOrLong(Double d){
+        if (d > Integer.MAX_VALUE || d < Integer.MIN_VALUE){
+            return d.intValue();
+        }
+        return d.longValue();
+    }
+
+    @SuppressWarnings("unchecked")
+    public static void convertDoubleToInt(Object obj) {
+        if (obj instanceof Map<?, ?> map) {
+            Map<String, Object> typedMap = (Map<String, Object>) map;
+            typedMap.forEach((k, v) -> {
+                if (v instanceof Double d) {
+                    if (d % 1 == 0) {
+                        typedMap.put((String) k, toIntOrLong(d)); // 转换为 Long
+                    }
+                } else if (v instanceof Map<?, ?> || v instanceof List<?>) {
+                    convertDoubleToInt(v); // 递归处理
+                }
+            });
+        } else if (obj instanceof List<?> list) {
+            for (int i = 0; i < list.size(); i++) {
+                Object v = list.get(i);
+                if (v instanceof Double d) {
+                    if (d % 1 == 0) {
+                        List<Object> typedList = (List<Object>)list;
+                        typedList.set(i, toIntOrLong(d)); // 转换为 Long
+                    }
+                } else if (v instanceof Map<?, ?> || v instanceof List<?>) {
+                    convertDoubleToInt(v);
+                }
+            }
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public static Object deepCopyAndConvert(Object obj) {
+        if (obj instanceof Map<?, ?> map) {
+            Map<String, Object> newMap = new HashMap<>();
+            for (Map.Entry<?, ?> entry : map.entrySet()) {
+                String key = (String) entry.getKey();
+                Object value = entry.getValue();
+                newMap.put(key, deepCopyAndConvert(value));
+            }
+            return newMap;
+        } else if (obj instanceof List<?> list) {
+            List<Object> newList = new ArrayList<>();
+            for (Object item : list) {
+                newList.add(deepCopyAndConvert(item));
+            }
+            return newList;
+        } else if (obj instanceof Double d) {
+            if (d % 1 == 0) {
+                return toIntOrLong(d); // 转换为 Long or Integer
+            } else {
+                return d; // 保持原 Double
+            }
+        } else {
+            return obj; // 其他类型保持不变
+        }
+    }
+
+public static void test3(){
+    Map<String, TaskOperationsStatus> map = new HashMap<>();
+    TaskOperationsStatus to = new TaskOperationsStatus();
+    to.addState(new TaskObjectStatus("slice", TaskState.SUCCESS));
+    map.put("201",to);
+
+    String json = JsonUtil.toJson(map);
+    System.out.println(json);
+    Map<String, TaskOperationsStatus> temp = JsonUtil.fromJson(json, new TypeReference<Map<String, TaskOperationsStatus>>() {
+    });
+    System.out.println(temp);
+}
+
+    public static void main(String[] args) {
+//        Map<String, Object> data = new HashMap<>();
+//        data.put("a", 1.0);
+//        data.put("b", 2.5);
+//        data.put("c", List.of(3.0, 4.1));
+//        data.put("d", Map.of("x", 5.0));
+//
+//        Object o = deepCopyAndConvert(data);
+//        System.out.println(data);
+//        System.out.println(o);
+
+test3();
+    }
+}

+ 23 - 10
server/src/test/java/com/giantan/data/mds/MdsApplicationTests.java

@@ -1,21 +1,14 @@
 package com.giantan.data.mds;
 
-import com.giantan.ai.common.util.JsonUtil;
-import com.giantan.ai.util.id.IdGenerator;
-import com.giantan.ai.util.id.UlidGenerator;
 import com.giantan.data.mds.bot.GChatClient;
-import com.giantan.data.mds.service.impl.DocReq;
-import com.giantan.data.mds.service.impl.HybridSearch;
+import com.giantan.data.index.HybridSearch;
 import com.giantan.data.mds.service.impl.MdChunksService;
 import com.giantan.data.mds.service.impl.Vectorization;
-import com.github.f4b6a3.ulid.Ulid;
-import com.github.f4b6a3.ulid.UlidCreator;
 import org.junit.jupiter.api.Test;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -34,10 +27,30 @@ class MdsApplicationTests {
 	@Autowired
 	HybridSearch hybridSearch;
 
+
+	private void testDeepseek(){
+		String s = """
+GliNER使用的预训练模型:
+GLiNER 使用的是 DeBERTA v3 large,它对原始 DeBERTA 模型进行了更有效的维护改进,用替换标记检测(RTD)取代了掩码语言建模(MLM)。
+模型架构流程:
+经过deberta后,将token输入lstm:通过 DeBERTA后,GLiNER提取每个label和token本身的嵌入。此外,token嵌入还会通过双向 LSTM 模型。这种方法加快了模型的训练速度,因此在数据量较少的情况下很有帮助;此外,它还能限制negative tokenization和位置编码人工痕迹的影响。
+获取embedding后,送入评分模块:在获得label和token的表征后,我们会将其传递给一个评分模块,该模块会预测token在实体中的位置(开始、内部、结束)及其类别。
+首先,每个token表征和label表征都被投影到一个高维空间中。让 T ∈ R B × L × H 
+				""";
+		Map<String, Object> kws = deepseek2.getKeywordsAndQuestions(s, Map.of());
+		System.out.println(kws);
+	}
 	@Test
 	void contextLoads() throws IOException, InterruptedException {
 		System.out.println("Hello World");
+
+		testDeepseek();
+
 		//fetchInfo();
+//		boolean demo12 = hybridSearch.deleteAll("demo11");
+//		System.out.println();
+//		List<String> demo11 = hybridSearch.getAllIds("demo11");
+//		System.out.println(demo11);
 
 //		String s="发烧";
 //		long t= System.currentTimeMillis();
@@ -51,8 +64,8 @@ class MdsApplicationTests {
 //		List<Map<String,Object>> demo11 = mdChunksService.getKeywordsByMdId("demo11", 2);
 //		System.out.println(demo11);
 
-		int i = mdChunksService.updateKeywordsOrMetadata("demo11", 316l, List.of("demo1", "测试"), Map.of("m1", "11"));
-		System.out.println(i);
+//		int i = mdChunksService.updateKeywordsOrMetadata("demo11", 316l, List.of("demo1", "测试"), Map.of("m1", "11"));
+//		System.out.println(i);
 
 //		List<MdChunk> chunks = mdChunksService.findAll("demo11");
 //		System.out.println(chunks);