IMdChunking.java 731 B

123456789101112131415161718192021222324
  1. package org.cnnlp.data.splitter;
  2. import org.cnnlp.data.document.GDocument;
  3. import org.cnnlp.data.util.BaseParameters;
  4. import java.io.IOException;
  5. import java.util.List;
  6. public interface IMdChunking {
  7. // simple/faq/tagged
  8. //public static final String MD_TYPE = "mdType";
  9. //输出的是 章节
  10. List<GDocument> splitSimple(String text, BaseParameters params) throws IOException;
  11. //输出的是 章节
  12. List<GDocument> splitFaq(String text, BaseParameters params) throws IOException;
  13. //输出的是 章节
  14. List<GDocument> split(String text, BaseParameters params) throws IOException;
  15. // 输出的是 chunk
  16. List<GDocument> chunking(String text, BaseParameters params) throws IOException;
  17. }