|
|
@@ -4,12 +4,13 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
import com.giantan.ai.util.id.IdGenerator;
|
|
|
import com.giantan.ai.util.id.UuidGenerator;
|
|
|
+import com.giantan.data.kvs.kvstore.GBaseKeyValue;
|
|
|
+import com.giantan.data.util.JdbcUtils;
|
|
|
import org.springframework.jdbc.core.JdbcTemplate;
|
|
|
|
|
|
-import java.sql.Array;
|
|
|
-import java.sql.ResultSet;
|
|
|
-import java.sql.SQLException;
|
|
|
+import java.sql.*;
|
|
|
import java.util.ArrayList;
|
|
|
+import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
|
|
|
@@ -87,6 +88,16 @@ public class DynamicChunkRepository {
|
|
|
jdbc.update(sql);
|
|
|
}
|
|
|
|
|
|
+ public List<MdChunk> findAll(String collId) {
|
|
|
+ String sql = String.format("SELECT * FROM %s ORDER BY chunk_index",tableName(collId));
|
|
|
+ return jdbc.query(sql, this::mapRow);
|
|
|
+ }
|
|
|
+
|
|
|
+ public MdChunk findById(String collId, Long id) {
|
|
|
+ String sql = String.format("SELECT * FROM %s WHERE id = ?",tableName(collId));
|
|
|
+ List<MdChunk> rets = jdbc.query(sql, new Object[]{id}, this::mapRow);
|
|
|
+ return rets.isEmpty() ? null : rets.get(0);
|
|
|
+ }
|
|
|
|
|
|
public long deleteByMdId(String collId, Integer mdId) {
|
|
|
//String sql = "DELETE FROM %s WHERE md_id = ?";
|
|
|
@@ -94,35 +105,91 @@ public class DynamicChunkRepository {
|
|
|
return jdbc.update(sql, mdId);
|
|
|
}
|
|
|
|
|
|
+// public Integer save(String collId, MdChunk chunk) {
|
|
|
+// String sql1 = """
|
|
|
+// INSERT INTO %s (
|
|
|
+// md_id, chunk_index, content, plain_text, embedding, chunk_type,
|
|
|
+// paragraph_start, paragraph_end, offset_start, offset_end, section_path,
|
|
|
+// keywords, metadata, extra
|
|
|
+// ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?::jsonb, ?::jsonb)
|
|
|
+// """;
|
|
|
+// String sql = String.format(sql1, tableName(collId));
|
|
|
+//
|
|
|
+// int updated = jdbc.update(sql,
|
|
|
+// chunk.getMdId(),
|
|
|
+// chunk.getChunkIndex(),
|
|
|
+// chunk.getContent(),
|
|
|
+// chunk.getPlainText(),
|
|
|
+// chunk.getEmbedding(),
|
|
|
+// chunk.getChunkType(),
|
|
|
+// chunk.getParagraphStart(),
|
|
|
+// chunk.getParagraphEnd(),
|
|
|
+// chunk.getOffsetStart(),
|
|
|
+// chunk.getOffsetEnd(),
|
|
|
+// chunk.getSectionPath(),
|
|
|
+// toSqlArray(chunk.getKeywords()),
|
|
|
+// toJson(chunk.getMetadata()),
|
|
|
+// toJson(chunk.getExtra())
|
|
|
+// );
|
|
|
+// return updated;
|
|
|
+// }
|
|
|
+
|
|
|
+
|
|
|
public Integer save(String collId, MdChunk chunk) {
|
|
|
- String sql1 = """
|
|
|
- INSERT INTO %s (
|
|
|
- md_id, chunk_index, content, plain_text, embedding, chunk_type,
|
|
|
- paragraph_start, paragraph_end, offset_start, offset_end, section_path,
|
|
|
- keywords, metadata, extra
|
|
|
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?::jsonb, ?::jsonb)
|
|
|
- """;
|
|
|
- String sql = String.format(sql1, tableName(collId));
|
|
|
+ String sqlTemplate = """
|
|
|
+ INSERT INTO %s (
|
|
|
+ md_id, chunk_index, content, plain_text, embedding, chunk_type,
|
|
|
+ paragraph_start, paragraph_end, offset_start, offset_end, section_path,
|
|
|
+ keywords, metadata, extra
|
|
|
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?::jsonb, ?::jsonb)
|
|
|
+ """;
|
|
|
+ String sql = String.format(sqlTemplate, tableName(collId));
|
|
|
|
|
|
- int updated = jdbc.update(sql,
|
|
|
- chunk.getMdId(),
|
|
|
- chunk.getChunkIndex(),
|
|
|
- chunk.getContent(),
|
|
|
- chunk.getPlainText(),
|
|
|
- chunk.getEmbedding(),
|
|
|
- chunk.getChunkType(),
|
|
|
- chunk.getParagraphStart(),
|
|
|
- chunk.getParagraphEnd(),
|
|
|
- chunk.getOffsetStart(),
|
|
|
- chunk.getOffsetEnd(),
|
|
|
- chunk.getSectionPath(),
|
|
|
- toSqlArray(chunk.getKeywords()),
|
|
|
- toJson(chunk.getMetadata()),
|
|
|
- toJson(chunk.getExtra())
|
|
|
- );
|
|
|
- return updated;
|
|
|
+ return jdbc.update(con -> {
|
|
|
+ PreparedStatement ps = con.prepareStatement(sql);
|
|
|
+
|
|
|
+ ps.setLong(1, chunk.getMdId());
|
|
|
+ ps.setInt(2, chunk.getChunkIndex());
|
|
|
+ ps.setString(3, chunk.getContent());
|
|
|
+ ps.setString(4, chunk.getPlainText());
|
|
|
+ ps.setObject(5, chunk.getEmbedding()); // 假设是 bytea 或 vector
|
|
|
+ ps.setString(6, chunk.getChunkType());
|
|
|
+ ps.setInt(7, chunk.getParagraphStart());
|
|
|
+ ps.setInt(8, chunk.getParagraphEnd());
|
|
|
+ ps.setInt(9, chunk.getOffsetStart());
|
|
|
+ ps.setInt(10, chunk.getOffsetEnd());
|
|
|
+ ps.setString(11, chunk.getSectionPath());
|
|
|
+
|
|
|
+ // keywords → text[]
|
|
|
+ if (chunk.getKeywords() != null) {
|
|
|
+ Array sqlArray = con.createArrayOf("text", chunk.getKeywords().toArray(new String[0]));
|
|
|
+ ps.setArray(12, sqlArray);
|
|
|
+ } else {
|
|
|
+ ps.setNull(12, Types.ARRAY);
|
|
|
+ }
|
|
|
+
|
|
|
+ // metadata → jsonb
|
|
|
+ String metadataJson = toJson(chunk.getMetadata());
|
|
|
+ if (metadataJson != null) {
|
|
|
+ ps.setString(13, metadataJson);
|
|
|
+ } else {
|
|
|
+ ps.setNull(13, Types.VARCHAR);
|
|
|
+ }
|
|
|
+
|
|
|
+ // extra → jsonb
|
|
|
+ String extraJson = toJson(chunk.getExtra());
|
|
|
+ if (extraJson != null) {
|
|
|
+ ps.setString(14, extraJson);
|
|
|
+ } else {
|
|
|
+ ps.setNull(14, Types.VARCHAR);
|
|
|
+ }
|
|
|
+
|
|
|
+ return ps;
|
|
|
+ });
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+
|
|
|
public List<Integer> saveAll(String collId, List<MdChunk> chunks) {
|
|
|
String sql1 = """
|
|
|
INSERT INTO %s (
|
|
|
@@ -151,9 +218,11 @@ public class DynamicChunkRepository {
|
|
|
ps.setObject(9, chunk.getOffsetStart());
|
|
|
ps.setObject(10, chunk.getOffsetEnd());
|
|
|
ps.setObject(11, chunk.getSectionPath());
|
|
|
- ps.setArray(12, toSqlArray(chunk.getKeywords()));
|
|
|
+ //ps.setArray(12, toSqlArray(chunk.getKeywords()));
|
|
|
ps.setObject(13, toJson(chunk.getMetadata()));
|
|
|
ps.setObject(14, toJson(chunk.getExtra()));
|
|
|
+
|
|
|
+ JdbcUtils.setStringArray(ps,12,chunk.getKeywords());
|
|
|
});
|
|
|
for (int j = 0; j < batched.length; j++) {
|
|
|
for (int k = 0; k < batched[j].length; k++) {
|
|
|
@@ -164,16 +233,120 @@ public class DynamicChunkRepository {
|
|
|
return rets;
|
|
|
}
|
|
|
|
|
|
- private Array toSqlArray(List<String> ls) {
|
|
|
- if (ls == null) {
|
|
|
- return null;
|
|
|
- }
|
|
|
- try {
|
|
|
- // 使用 JdbcTemplate 连接的 DataSource 创建 Array
|
|
|
- return jdbc.getDataSource().getConnection().createArrayOf("TEXT", ls.toArray(new String[0]));
|
|
|
- } catch (SQLException e) {
|
|
|
- throw new RuntimeException("Error creating SQL Array", e);
|
|
|
- }
|
|
|
+
|
|
|
+// private Array toSqlArray(List<String> ls) {
|
|
|
+// if (ls == null) {
|
|
|
+// return null;
|
|
|
+// }
|
|
|
+// try (Connection conn = jdbc.getDataSource().getConnection()) {
|
|
|
+// return conn.createArrayOf("TEXT", ls.toArray(new String[0]));
|
|
|
+// } catch (SQLException e) {
|
|
|
+// throw new RuntimeException("Error creating SQL Array", e);
|
|
|
+// }
|
|
|
+// }
|
|
|
+
|
|
|
+ public List<Map<String,Object>> getKeywordsByMdId(String collId, Integer mdId) {
|
|
|
+ String sql1 = "SELECT id,keywords FROM %s WHERE md_id = ?";
|
|
|
+ String sql = String.format(sql1, tableName(collId));
|
|
|
+ List<Map<String,Object>> keywordsList = jdbc.query(
|
|
|
+ sql,
|
|
|
+ new Object[]{mdId},
|
|
|
+ (rs, rowNum) -> {
|
|
|
+ Map<String,Object> map = new HashMap<>();
|
|
|
+ long id = rs.getLong("id");
|
|
|
+ map.put("id", id);
|
|
|
+ Array array = rs.getArray("keywords");
|
|
|
+ if (array != null) {
|
|
|
+ map.put("keywords",(String[]) array.getArray());
|
|
|
+ }else{
|
|
|
+ map.put("keywords",null);
|
|
|
+ }
|
|
|
+ //return new String[0];
|
|
|
+ return map;
|
|
|
+ }
|
|
|
+ );
|
|
|
+ return keywordsList;
|
|
|
+ }
|
|
|
+
|
|
|
+// public int updateKeywords(String collId, Long id, List<String> keywords, Map<String, Object> metadata) {
|
|
|
+// String sql1 = """
|
|
|
+// UPDATE %s
|
|
|
+// SET
|
|
|
+// keywords = COALESCE(?, keywords),
|
|
|
+// metadata = CASE
|
|
|
+// WHEN ?::jsonb IS NOT NULL
|
|
|
+// THEN metadata || ?::jsonb
|
|
|
+// ELSE metadata
|
|
|
+// END
|
|
|
+// WHERE id = ?
|
|
|
+// """;
|
|
|
+//
|
|
|
+// String sql = String.format(sql1, tableName(collId));
|
|
|
+// String json = toJson(metadata);
|
|
|
+// int updated = jdbc.update(sql,
|
|
|
+// toSqlArray(keywords), // 可传 null
|
|
|
+// json, // 可传 null
|
|
|
+// json, // 占位符重复,合并用
|
|
|
+// id
|
|
|
+// );
|
|
|
+//
|
|
|
+// return updated;
|
|
|
+// }
|
|
|
+
|
|
|
+
|
|
|
+ public int updateKeywords(String collId, Long id, List<String> keywords, Map<String, Object> metadata) {
|
|
|
+ String sqlTemplate = """
|
|
|
+ UPDATE %s
|
|
|
+ SET
|
|
|
+ keywords = COALESCE(?, keywords),
|
|
|
+ metadata = CASE
|
|
|
+ WHEN ?::jsonb IS NOT NULL
|
|
|
+ THEN metadata || ?::jsonb
|
|
|
+ ELSE metadata
|
|
|
+ END
|
|
|
+ WHERE id = ?
|
|
|
+ """;
|
|
|
+
|
|
|
+ String sql = String.format(sqlTemplate, tableName(collId));
|
|
|
+ String json = toJson(metadata); // 可能为 null
|
|
|
+
|
|
|
+ return jdbc.update(con -> {
|
|
|
+ PreparedStatement ps = con.prepareStatement(sql);
|
|
|
+
|
|
|
+ // keywords → text[]
|
|
|
+ if (keywords != null) {
|
|
|
+ Array sqlArray = con.createArrayOf("text", keywords.toArray(new String[0]));
|
|
|
+ ps.setArray(1, sqlArray);
|
|
|
+ } else {
|
|
|
+ ps.setNull(1, Types.ARRAY);
|
|
|
+ }
|
|
|
+
|
|
|
+ // metadata → jsonb
|
|
|
+ if (json != null) {
|
|
|
+ ps.setString(2, json);
|
|
|
+ ps.setString(3, json);
|
|
|
+ } else {
|
|
|
+ ps.setNull(2, Types.VARCHAR); // 仍然能匹配 ?::jsonb
|
|
|
+ ps.setNull(3, Types.VARCHAR);
|
|
|
+ }
|
|
|
+
|
|
|
+ ps.setLong(4, id);
|
|
|
+ return ps;
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ public int setEmbedding(String collId, Long id, String es) {
|
|
|
+ String sql1 = """
|
|
|
+ UPDATE %s
|
|
|
+ SET
|
|
|
+ embedding = ?
|
|
|
+ WHERE id = ?
|
|
|
+ """;
|
|
|
+
|
|
|
+ String sql = String.format(sql1, tableName(collId));
|
|
|
+ int updated = jdbc.update(sql, es, id);
|
|
|
+ return updated;
|
|
|
}
|
|
|
|
|
|
private MdChunk mapRow(ResultSet rs, int rowNum) throws SQLException {
|
|
|
@@ -190,7 +363,7 @@ public class DynamicChunkRepository {
|
|
|
c.setOffsetStart(rs.getInt("offset_start"));
|
|
|
c.setOffsetEnd(rs.getInt("offset_end"));
|
|
|
c.setSectionPath(rs.getString("section_path"));
|
|
|
- c.setKeywords(SqlArrayUtils.fromStringArray(rs.getArray("keywords")));
|
|
|
+ c.setKeywords(JdbcUtils.fromStringArray(rs.getArray("keywords")));
|
|
|
c.setMetadata(fromJson(rs.getString("metadata")));
|
|
|
c.setExtra(fromJson(rs.getString("extra")));
|
|
|
c.setCreatedAt(rs.getTimestamp("created_at").toInstant());
|
|
|
@@ -214,4 +387,5 @@ public class DynamicChunkRepository {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
}
|