package org.cnnlp.data.splitter; import org.cnnlp.data.document.GDocument; import org.cnnlp.data.util.BaseParameters; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; import java.util.stream.Stream; public class SplitTools { public SplitTools() { } public void splitFaq(Path path) throws IOException { System.out.println("Process " + path.toString()); String json = path.toString() + ".json"; FaqMdSplitter splitter = new FaqMdSplitter(); BaseParameters params = BaseParameters.defaultParams(); List docs = splitter.split(path, params); SplitUtils.toJsonFile(new File(json), docs); } public static boolean isMd(Path p) { if (Files.isRegularFile(p)) { boolean b = p.toString().endsWith(".md"); return b; } return false; } public void splitSimple(Path path) throws IOException { System.out.println("Process " + path.toString()); String json = path.toString() + ".json"; SimpleMdSplitter splitter = new SimpleMdSplitter(); splitter.setCommentProcessor(KvCommentProcessor.build()); BaseParameters params = BaseParameters.defaultParams(); // Path path = Paths.get(md); //String baseName = SplitUtils.getFileBaseName(path); //System.out.println("baseName=" + baseName); List docs = splitter.split(path, params); SplitUtils.toJsonFile(new File(json), docs); } public void splitSimpleDir(Path p) throws IOException { Stream fl = Files.list(p); fl.forEach(p1 -> { try { // if (isMd(f)) { // tools.splitSimple(f); // } //System.out.println(p1); File file = p1.toFile(); if (file.isDirectory()) { splitSimpleDir(p1); } else { System.out.println(p1); splitSimple(p1); } } catch (Exception e) { throw new RuntimeException(e); } }); } public static void main(String[] args) throws IOException { String md = "D:\\testdata\\md\\官网银行卡知识.1.md"; md = "D:\\testdata\\md\\海底捞员工手册.md"; md = "D:\\testdata\\md\\列表1.md"; md = "D:\\data\\乙烯\\乙烯装置技术与运行测试.md"; md = "D:\\data\\乙烯\\乙烯1.md"; md = "D:\\data\\乙烯\\催化裂化操作规程2024.4.28.md"; // String json = md + ".json"; SplitTools tools = new SplitTools(); // tools.splitSimple(Paths.get(md)); // String p = "D:\\data\\乙烯\\target"; // tools.splitSimpleDir(Paths.get(p)); String p = "D:\\data\\乙烯\\规章制度\\中国石油四川石化有限责任公司原油采购管理办法.md"; tools.splitSimple(Paths.get(p)); // String dir = "D:\\data\\乙烯\\0624\\安全技术说明书完善\\"; // tools.splitSimpleDir(Paths.get(dir)); // String p = "D:\\data\\乙烯\\2\\"; // Stream fl = Files.list(Path.of(p)); // fl.forEach(f->{ // try { // if (isMd(f)){ // tools.splitSimple(f); // } // } catch (IOException e) { // throw new RuntimeException(e); // } // }); // // String p2 = "D:\\data\\乙烯\\2\\faq\\"; // Stream fl2 = Files.list(Path.of(p2)); // fl2.forEach(f->{ // try { // if (isMd(f)){ // tools.splitFaq(f); // } // } catch (IOException e) { // throw new RuntimeException(e); // } // }); } }