Bladeren bron

initial commit

dwp 9 maanden geleden
commit
b823ca122c
100 gewijzigde bestanden met toevoegingen van 20730 en 0 verwijderingen
  1. 38 0
      .gitignore
  2. 8 0
      .idea/.gitignore
  3. 15 0
      .idea/encodings.xml
  4. 14 0
      .idea/misc.xml
  5. 124 0
      .idea/uiDesigner.xml
  6. 6 0
      .idea/vcs.xml
  7. 90 0
      gfs/pom.xml
  8. 42 0
      gfs/src/main/java/com/giantan/gfs/service/FileStorageService.java
  9. 64 0
      gfs/src/main/java/com/giantan/gfs/service/IGkbService.java
  10. 17 0
      gfs/src/main/java/com/giantan/gfs/service/PermissionsFilter.java
  11. 410 0
      gfs/src/main/java/com/giantan/gfs/service/impl/GStorageService.java
  12. 53 0
      gfs/src/main/java/com/giantan/gfs/service/impl/PermissionsFilterImpl.java
  13. 10 0
      gfs/src/main/java/com/giantan/gfs/service/impl/S3Constants.java
  14. 706 0
      gfs/src/main/java/com/giantan/gfs/service/impl/S3GkbService.java
  15. 295 0
      gfs/src/main/java/com/giantan/gfs/service/impl/S3Utils.java
  16. 177 0
      gfs/src/main/java/com/giantan/gfs/storer/FileItem.java
  17. 36 0
      gfs/src/main/java/com/giantan/gfs/storer/FileType.java
  18. 7 0
      gfs/src/main/java/com/giantan/gfs/storer/ReadableFileSystem.java
  19. 250 0
      gfs/src/main/java/com/giantan/gfs/storer/Storer.java
  20. 221 0
      gfs/src/main/java/com/giantan/gfs/storer/impl/AbstractStorer.java
  21. 373 0
      gfs/src/main/java/com/giantan/gfs/storer/impl/FSStorer.java
  22. 548 0
      gfs/src/main/java/com/giantan/gfs/storer/impl/S3Storer.java
  23. 833 0
      gfs/src/main/java/com/giantan/gfs/storer/util/FileUtil.java
  24. 142 0
      gfs/src/main/java/com/giantan/gfs/storer/util/J7Zip.java
  25. 21 0
      gfs/src/main/java/com/giantan/gfs/storer/util/ObjectUtil.java
  26. 229 0
      gfs/src/main/java/com/giantan/gfs/storer/util/OpenMultipartArchiveRar.java
  27. 316 0
      gfs/src/main/java/com/giantan/gfs/storer/util/StringUtil.java
  28. 176 0
      gfs/src/main/java/com/giantan/gfs/storer/util/SystemUtil.java
  29. 73 0
      gfs/src/main/java/com/giantan/gfs/storer/util/ZipUtil2.java
  30. 78 0
      gfs/src/test/java/com/giantan/gfs/service/impl/S3GkbServiceTest.java
  31. 133 0
      gtbook/pom.xml
  32. 61 0
      gtbook/src/main/java/com/vladsch/flexmark/ext/obs/comments/Comments.java
  33. 42 0
      gtbook/src/main/java/com/vladsch/flexmark/ext/obs/comments/CommentsExtension.java
  34. 64 0
      gtbook/src/main/java/com/vladsch/flexmark/ext/obs/comments/internal/CommentsDelimiterProcessor.java
  35. 80 0
      gtbook/src/main/java/com/vladsch/flexmark/ext/obs/comments/internal/CommentsNodeRenderer.java
  36. 45 0
      gtbook/src/main/java/opennlp/tools/svm/IOneClassModel.java
  37. 303 0
      gtbook/src/main/java/opennlp/tools/svm/OneClassModel.java
  38. 260 0
      gtbook/src/main/java/opennlp/tools/svm/data/evaluators/BinaryClassifierEvaluator.java
  39. 245 0
      gtbook/src/main/java/opennlp/tools/svm/data/evaluators/ClassifierEvaluator.java
  40. 68 0
      gtbook/src/main/java/opennlp/tools/svm/data/evaluators/ConfusionMatrix.java
  41. 91 0
      gtbook/src/main/java/opennlp/tools/svm/data/evaluators/RegressionEvaluator.java
  42. 8 0
      gtbook/src/main/java/opennlp/tools/svm/data/exceptions/NotImplementedException.java
  43. 316 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/BasicDataFrame.java
  44. 321 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/BasicDataRow.java
  45. 30 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/DataColumn.java
  46. 10 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/DataFileType.java
  47. 52 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/DataFrame.java
  48. 298 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/DataQuery.java
  49. 60 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/DataRow.java
  50. 80 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/InputDataColumn.java
  51. 68 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/OutputDataColumn.java
  52. 95 0
      gtbook/src/main/java/opennlp/tools/svm/data/frame/Sampler.java
  53. 60 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/CollectionUtils.java
  54. 206 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/CountRepository.java
  55. 137 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/CsvUtils.java
  56. 16 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/Mean.java
  57. 56 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/NumberUtils.java
  58. 127 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/Scaler.java
  59. 10 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/StdDev.java
  60. 27 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/StringUtils.java
  61. 45 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/TupleTwo.java
  62. 20 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/Variance.java
  63. 14 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/discretizers/AttributeValueDiscretizer.java
  64. 136 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/discretizers/KMeansDiscretizer.java
  65. 140 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/discretizers/KMeansFilter.java
  66. 104 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/transforms/ComplementaryCoding.java
  67. 147 0
      gtbook/src/main/java/opennlp/tools/svm/data/utils/transforms/Standardization.java
  68. 2878 0
      gtbook/src/main/java/opennlp/tools/svm/libsvm/SupportVectorMachine.java
  69. 17 0
      gtbook/src/main/java/opennlp/tools/svm/libsvm/SupportVectorMachineNode.java
  70. 57 0
      gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_model.java
  71. 63 0
      gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_parameter.java
  72. 197 0
      gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_predict.java
  73. 5 0
      gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_print_interface.java
  74. 7 0
      gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_problem.java
  75. 324 0
      gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_train.java
  76. 14 0
      gtbook/src/main/java/opennlp/tools/svm/svmext/Learner.java
  77. 221 0
      gtbook/src/main/java/opennlp/tools/svm/svmext/classifiers/BinarySVC.java
  78. 236 0
      gtbook/src/main/java/opennlp/tools/svm/svmext/classifiers/OneVsOneSVC.java
  79. 174 0
      gtbook/src/main/java/opennlp/tools/svm/svmext/oneclass/OneClassSVM.java
  80. 205 0
      gtbook/src/main/java/opennlp/tools/svm/svmext/regression/SVR.java
  81. 98 0
      gtbook/src/main/java/org/cnnlp/data/Main.java
  82. 44 0
      gtbook/src/main/java/org/cnnlp/data/ReadMe.java
  83. 1627 0
      gtbook/src/main/java/org/cnnlp/data/book/GTBook.java
  84. 19 0
      gtbook/src/main/java/org/cnnlp/data/book/GTBookConstants.java
  85. 262 0
      gtbook/src/main/java/org/cnnlp/data/book/GTBookHelper.java
  86. 191 0
      gtbook/src/main/java/org/cnnlp/data/book/GTBookUtil.java
  87. 480 0
      gtbook/src/main/java/org/cnnlp/data/book/GTNode.java
  88. 5 0
      gtbook/src/main/java/org/cnnlp/data/book/IBook.java
  89. 12 0
      gtbook/src/main/java/org/cnnlp/data/book/IElement.java
  90. 28 0
      gtbook/src/main/java/org/cnnlp/data/book/INode.java
  91. 9 0
      gtbook/src/main/java/org/cnnlp/data/book/IVisitor.java
  92. 121 0
      gtbook/src/main/java/org/cnnlp/data/book/MDElement.java
  93. 115 0
      gtbook/src/main/java/org/cnnlp/data/book/PrettyPrinter.java
  94. 11 0
      gtbook/src/main/java/org/cnnlp/data/document/GDocConstants.java
  95. 332 0
      gtbook/src/main/java/org/cnnlp/data/document/GDocument.java
  96. 479 0
      gtbook/src/main/java/org/cnnlp/data/html/HtmlRender.java
  97. 135 0
      gtbook/src/main/java/org/cnnlp/data/html/StrUtil.java
  98. 1687 0
      gtbook/src/main/java/org/cnnlp/data/md/DocRender.java
  99. 1018 0
      gtbook/src/main/java/org/cnnlp/data/md/DocRender2.java
  100. 312 0
      gtbook/src/main/java/org/cnnlp/data/md/DocTree.java

+ 38 - 0
.gitignore

@@ -0,0 +1,38 @@
+target/
+!.mvn/wrapper/maven-wrapper.jar
+!**/src/main/**/target/
+!**/src/test/**/target/
+
+### IntelliJ IDEA ###
+.idea/modules.xml
+.idea/jarRepositories.xml
+.idea/compiler.xml
+.idea/libraries/
+*.iws
+*.iml
+*.ipr
+
+### Eclipse ###
+.apt_generated
+.classpath
+.factorypath
+.project
+.settings
+.springBeans
+.sts4-cache
+
+### NetBeans ###
+/nbproject/private/
+/nbbuild/
+/dist/
+/nbdist/
+/.nb-gradle/
+build/
+!**/src/main/**/build/
+!**/src/test/**/build/
+
+### VS Code ###
+.vscode/
+
+### Mac OS ###
+.DS_Store

+ 8 - 0
.idea/.gitignore

@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

+ 15 - 0
.idea/encodings.xml

@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding">
+    <file url="file://$PROJECT_DIR$/gfs/src/main/java" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/gfs/src/main/resources" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/gtbook/src/main/java" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/gtbook/src/main/resources" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/server/src/main/java" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/server/src/main/resources" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/tools/src/main/java" charset="UTF-8" />
+    <file url="file://$PROJECT_DIR$/tools/src/main/resources" charset="UTF-8" />
+  </component>
+</project>

+ 14 - 0
.idea/misc.xml

@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ExternalStorageConfigurationManager" enabled="true" />
+  <component name="MavenProjectsManager">
+    <option name="originalFiles">
+      <list>
+        <option value="$PROJECT_DIR$/pom.xml" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="true" project-jdk-name="17" project-jdk-type="JavaSDK">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>

+ 124 - 0
.idea/uiDesigner.xml

@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Palette2">
+    <group name="Swing">
+      <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
+      </item>
+      <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
+      </item>
+      <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
+      </item>
+      <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
+        <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
+      </item>
+      <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
+        <initial-values>
+          <property name="text" value="Button" />
+        </initial-values>
+      </item>
+      <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
+        <initial-values>
+          <property name="text" value="RadioButton" />
+        </initial-values>
+      </item>
+      <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
+        <initial-values>
+          <property name="text" value="CheckBox" />
+        </initial-values>
+      </item>
+      <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
+        <initial-values>
+          <property name="text" value="Label" />
+        </initial-values>
+      </item>
+      <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
+          <preferred-size width="150" height="-1" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
+          <preferred-size width="150" height="-1" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
+          <preferred-size width="150" height="-1" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
+      </item>
+      <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
+          <preferred-size width="200" height="200" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
+          <preferred-size width="200" height="200" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
+      </item>
+      <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
+      </item>
+      <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
+      </item>
+      <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
+      </item>
+      <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
+          <preferred-size width="-1" height="20" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
+      </item>
+      <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
+      </item>
+    </group>
+  </component>
+</project>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

+ 90 - 0
gfs/pom.xml

@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>com.giantan.mds</groupId>
+        <artifactId>mds</artifactId>
+        <version>1.0.0</version>
+    </parent>
+
+    <groupId>com.giantan.gfs</groupId>
+    <artifactId>gfs</artifactId>
+
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+
+    <dependencies>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>2.0.17</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>2.0.17</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>2.15.0</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/software.amazon.awssdk/s3 -->
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>s3</artifactId>
+            <!--            <version>2.20.162</version>-->
+            <version>2.31.66</version>
+
+            <exclusions>
+                <!--                <exclusion>-->
+                <!--                    <groupId>org.slf4j</groupId>-->
+                <!--                    <artifactId>slf4j-simple</artifactId>-->
+                <!--                </exclusion>-->
+
+                <exclusion>
+                    <groupId>commons-logging</groupId>
+                    <artifactId>commons-logging</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-web</artifactId>
+            <version>6.2.7</version>
+            <scope>compile</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>net.sf.sevenzipjbinding</groupId>
+            <artifactId>sevenzipjbinding</artifactId>
+            <version>16.02-2.01</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/net.sf.sevenzipjbinding/sevenzipjbinding-all-platforms -->
+        <dependency>
+            <groupId>net.sf.sevenzipjbinding</groupId>
+            <artifactId>sevenzipjbinding-all-platforms</artifactId>
+            <version>16.02-2.01</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-compress</artifactId>
+            <version>1.23.0</version>
+        </dependency>
+
+    </dependencies>
+</project>

+ 42 - 0
gfs/src/main/java/com/giantan/gfs/service/FileStorageService.java

@@ -0,0 +1,42 @@
+package com.giantan.gfs.service;
+
+import com.giantan.gfs.storer.FileItem;
+import com.giantan.gfs.storer.ReadableFileSystem;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+
+public interface FileStorageService extends ReadableFileSystem {
+
+    void init();
+
+    void save(MultipartFile multipartFile);
+
+    InputStream download(String filename) throws IOException;
+
+    List<FileItem> list() throws Exception;
+
+    List<FileItem> getFileItems(String path) throws Exception;
+
+    FileItem getFileItem(String filename) throws IOException;
+
+    void clear();
+
+    public void uploadZip(InputStream is, String fileName,String charSet,String basePath) throws IOException;
+
+    String delete(String filename) throws Exception;
+
+    boolean rename(String src,String dest) throws Exception;
+
+    boolean copy(String src,String dest) throws Exception;
+
+    boolean createDirectory(String uri) throws Exception;
+
+    boolean writeFile(String uri,MultipartFile multipartFile);
+
+    boolean writeFile(String uri,InputStream stream);
+
+}

+ 64 - 0
gfs/src/main/java/com/giantan/gfs/service/IGkbService.java

@@ -0,0 +1,64 @@
+package com.giantan.gfs.service;
+
+import com.giantan.gfs.storer.FileItem;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+public interface IGkbService {
+
+    void storeDirect(InputStream is, String repository, String fullName) throws Exception;
+
+    String storeDirect(InputStream is, String repository, String fileName, String uri) throws Exception;
+
+    public void backupAndStore(InputStream is, String repository, String objectKey) throws Exception;
+
+    public String backupAndStore(InputStream is, String repository, String fileName, String uri) throws Exception;
+
+    String getObjectKey(String repository, String resourceType, String fullName);
+
+    public InputStream download(String repository, String fullName) throws IOException;
+
+    public void upload(File dir, String tempBase, String basePath, String repository, String root) throws IOException;
+
+    public String getEtag(String repository, String objectKey);
+
+    public List<FileItem> getFileItems(String repository) throws Exception;
+
+    // 取出的是所有文件(包括子目录下的文件,但不包括目录)
+    public List<FileItem> getAllFileItems(String repository, String path) throws Exception;
+
+    public int delete(String repository, String path) throws Exception;
+
+    public int delete(String path) throws Exception;
+
+    public String getBucket();
+
+    public String getEndpoint();
+
+    //////////////////
+    // 取出的是所有文件(包括子目录下的文件,但不包括目录)
+    public List<FileItem> getAllFileItems(String repository, String resourceType, String path) throws Exception;
+
+    public InputStream download(String repository, String resourceType, String fullName) throws IOException;
+
+    public void uploadFiles(File dir, String sourceRoot, String bucket, String repository, String resourceType, String path) throws IOException;
+
+    public String uploadFile(File dir, String sourceRoot, String bucket, String repository, String resourceType, String path) throws IOException;
+
+    public String copyFile(String fromBucket, String fromObjectKey, String toBucket, String toObjectKey);
+
+    public void copyFolder(String srcBucket, String srcDir, String destBucket, String destDir) throws Exception;
+
+    public String renameFile(String repository, String fromObjectKey, String toObjectKey);
+
+    public int renameFolder(String repository, String srcDir, String destDir) throws Exception;
+
+
+    // 列出当前目录下的文件和目录
+    public List<FileItem> listObjects(String bucket, String path) throws Exception;
+
+    //void uploadZip(InputStream is, String fileName, String charSet, String objBasePath) throws IOException;
+}

+ 17 - 0
gfs/src/main/java/com/giantan/gfs/service/PermissionsFilter.java

@@ -0,0 +1,17 @@
+package com.giantan.gfs.service;
+
+import com.giantan.gfs.storer.FileItem;
+
+import java.util.List;
+
+public interface PermissionsFilter {
+
+    public void init();
+
+    public int getPermissions(FileItem fi);
+
+    public void filter(FileItem fi);
+
+    public void filter(List<FileItem> fis);
+
+}

+ 410 - 0
gfs/src/main/java/com/giantan/gfs/service/impl/GStorageService.java

@@ -0,0 +1,410 @@
+package com.giantan.gfs.service.impl;
+
+//import com.giantan.baiying.g1.api.ObjectUtil;
+
+import com.giantan.gfs.service.FileStorageService;
+import com.giantan.gfs.storer.FileItem;
+import com.giantan.gfs.storer.Storer;
+import com.giantan.gfs.storer.util.ObjectUtil;
+import com.giantan.gfs.storer.util.StringUtil;
+import com.giantan.gfs.storer.util.ZipUtil2;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+//@Service("fileStorageService")
+public class GStorageService implements FileStorageService {
+
+    protected static Logger log = LoggerFactory.getLogger(GStorageService.class);
+
+    String backupDir = "/_backup/";
+    String clazzName;
+
+    Storer storer;
+
+    public GStorageService(){
+
+    }
+    @Override
+    public void init() {
+        init(com.giantan.gfs.storer.impl.FSStorer.class.getName());
+    }
+
+    public void init(String className) {
+        this.clazzName = className;
+        //String className = Context.getProperty("storer.class");
+        log.info("storer.class=" + className);
+
+        Class clazz = null;
+        try {
+            // Try to instantiate the builder
+            clazz = Class.forName(className);
+            Object obj = clazz.getDeclaredConstructor().newInstance();
+            storer = (Storer) obj;
+            storer.init();
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        }
+    }
+
+    public Storer getStorer() {
+        return storer;
+    }
+
+    public void setStorer(Storer storer) {
+        this.storer = storer;
+    }
+
+    @Override
+    public void save(MultipartFile multipartFile) {
+        try {
+            //Files.copy(multipartFile.getInputStream(),this.path.resolve(multipartFile.getOriginalFilename()));
+            storer.store(multipartFile.getInputStream(), multipartFile.getOriginalFilename());
+        } catch (IOException e) {
+            log.error(e.getMessage());
+            throw new RuntimeException("Could not store the file. Error:" + e.getMessage());
+        }
+    }
+
+    @Override
+    public InputStream download(String filename) throws IOException {
+        try {
+            InputStream is = storer.getStream(filename);
+            return is;
+        } catch (IOException e) {
+            //e.printStackTrace();
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+    @Override
+    public List<FileItem> list() throws Exception {
+        //return storer.load();
+        try {
+            List<FileItem> ls = storer.getFileItems("/");
+            return ls;
+        } catch (Exception e) {
+            //e.printStackTrace();
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+    @Override
+    public List<FileItem> getFileItems(String path) throws Exception {
+        try {
+            return storer.getFileItems(path);
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+    @Override
+    public FileItem getFileItem(String filename) throws IOException {
+        try {
+            FileItem ls = storer.getFileItem(filename);
+            return ls;
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+    @Override
+    public void clear() {
+        if (storer != null) {
+            //storer.clear();
+        }
+    }
+
+    @Override
+    public void uploadZip(InputStream is, String fileName, String charSet, String objBasePath) throws IOException {
+
+        if (fileName.endsWith(".zip")) {  //&& importZip
+            log.debug("zip file = {}", fileName);
+        } else {
+            return;
+        }
+
+        //        // 存本地文件方式,s3用了这种方式
+//        String objName = computeBlobPath(docId, resource);
+        File file = null;
+        Path temp = null;
+        try {
+            file = File.createTempFile("zip-", null);
+            temp = Files.createTempDirectory("unzip-");
+
+            BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
+            try {
+                IOUtils.copy(is, bos);
+            } finally {
+                bos.flush();
+                bos.close();
+            }
+
+//            if (temp.exists()) {
+//                try {
+//                    FileUtils.deleteDirectory(temp);
+//                } catch (IOException e) {
+//                }
+//            }
+//
+//            try {
+//                FileUtils.forceMkdir(temp);
+//            } catch (IOException e) {
+//            }
+
+            final File zipFile = file;
+            final File dir = temp.toFile();
+
+            // Prepare the import thread
+            Thread zipImporter = new Thread(new Runnable() {
+                public void run() {
+                    /*
+                     * Prepare the Master document used to create the
+                     * new one
+                     */
+//                    try {
+//                        InMemoryZipImport importer = new InMemoryZipImport(destFile, charSet);
+//                        importer.process(destFile,storer,basePath);
+//                    } catch (Throwable e) {
+//                        log.error("Unable to delete {}", destFile, e);
+//                    } finally {
+//                        FileUtil.strongDelete(destFile);
+//                    }
+
+//                    ZipUtil zipUtil = new ZipUtil();
+//                    zipUtil.unzip(zipFile.getPath(), dir.getPath());
+
+                    ZipUtil2.unzip(zipFile, dir.getPath());
+
+                    File[] files = dir.listFiles();
+
+                    String tempPath = dir.getPath();
+                    upload(dir, tempPath, objBasePath);
+
+                    try {
+                        FileUtils.forceDelete(zipFile);
+                        FileUtils.deleteDirectory(dir);
+                    } catch (IOException e) {
+                    }
+                }
+            });
+
+            // And launch it
+            zipImporter.start();
+
+        } catch (Exception e) {
+            //e.printStackTrace();
+            log.error("Error occurred: " + e);
+            throw e;
+        } finally {
+//            if (file != null)
+//                FileUtil.strongDelete(file);
+        }
+
+    }
+
+
+    private void upload(File dir, String tempBase, String basePath) {
+        if (dir.isDirectory()) {
+            File[] files = dir.listFiles();
+
+            for (int i = 0; i < files.length; i++) {
+                if (StringUtil.isNotEmpty(files[i].getName())
+                        || StringUtil.isNotEmpty(FilenameUtils.getBaseName(files[i].getName())))
+                    try {
+                        upload(files[i], tempBase, basePath);
+                    } catch (Exception e) {
+                        log.error("Error adding entry " + files[i].getName(), e);
+                    }
+            }
+        } else {
+            String path = dir.getPath();
+            String relativePath = path.substring(tempBase.length() + 1);
+            relativePath = relativePath.replace("\\", "/");
+            String objName = basePath + relativePath;
+            try {
+                storer.store(dir, objName);
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    @Override
+    public String delete(String filename) throws Exception {
+//        try {
+//            storer.delete(filename);
+//        } catch (Exception e) {
+//            //e.printStackTrace();
+//            return e.getMessage();
+//        }
+        storer.delete(filename);
+        return filename;
+    }
+
+    @Override
+    public boolean rename(String src, String dest) throws Exception {
+        return storer.move(src, dest);
+    }
+
+    @Override
+    public boolean copy(String src, String dest) throws Exception {
+        return storer.copy(src, dest);
+    }
+
+    @Override
+    public boolean createDirectory(String uri) throws Exception {
+        return storer.createDirectory(uri);
+    }
+
+    @Override
+    public boolean writeFile(String uri, MultipartFile multipartFile) {
+        try {
+            //Files.copy(multipartFile.getInputStream(),this.path.resolve(multipartFile.getOriginalFilename()));
+            //storer.store(multipartFile.getInputStream(),multipartFile.getOriginalFilename());
+            storer.store(multipartFile.getInputStream(), uri);
+            return true;
+        } catch (IOException e) {
+            log.error(e.getMessage());
+            throw new RuntimeException("Could not store the file. Error:" + e.getMessage());
+        }
+    }
+
+    @Override
+    public boolean writeFile(String uri, InputStream stream) {
+        try {
+            //Files.copy(multipartFile.getInputStream(),this.path.resolve(multipartFile.getOriginalFilename()));
+            //storer.store(multipartFile.getInputStream(),multipartFile.getOriginalFilename());
+            storer.store(stream, uri);
+            return true;
+        } catch (IOException e) {
+            log.error(e.getMessage());
+            throw new RuntimeException("Could not store the file. Error:" + e.getMessage());
+        }
+    }
+
+    @Override
+    public String getContent(String uri) throws IOException {
+        try {
+            InputStream is = storer.getStream(uri);
+
+            String ss = getStringFromInputStream(is);
+            return ss;
+        } catch (IOException e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+
+    }
+
+
+    private String getBachupFile(String path){
+        String p2 = backupDir+path+"."+System.currentTimeMillis()+".bak";
+        return p2;
+    }
+
+    public void backupAndStore(InputStream is,String path) throws Exception {
+        if (storer.exists(path)){
+            String bachupFile = getBachupFile(path);
+            storer.copy(path,bachupFile);
+        }
+        storer.store(is,path);
+    }
+
+    public static String getStringFromInputStream(InputStream is) throws IOException {
+        BufferedReader br = null;
+        StringBuilder sb = new StringBuilder();
+
+        String line;
+        try {
+            br = new BufferedReader(new InputStreamReader(is, "utf-8"));
+            while ((line = br.readLine()) != null) {
+                if (sb.length() > 0)
+                    sb.append("\n");
+                sb.append(line);
+            }
+        } finally {
+            if (br != null) {
+                try {
+                    br.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+        return line;
+    }
+
+    public static String normalizePath(String path) {
+        // 将反斜杠转换为正斜杠
+        String normalizedPath = path.replace('\\', '/');
+        return normalizedPath;
+    }
+    public static boolean isEndsWithSeparator(String path) {
+        if (path.endsWith("/") || path.endsWith("\\")) {
+            return true;
+        }
+        return false;
+    }
+
+    public static String endsWithSeparator(String path) {
+        if (path.endsWith("/") || path.endsWith("\\")) {
+            return path;
+        }
+        return path+File.separator;
+    }
+
+    public static String concatPath(String prefix, String path) {
+        if (prefix == null || prefix.length() == 0) {
+            return GStorageService.normalizePath(path);
+        }
+        String p1 = GStorageService.normalizePath(prefix);
+        String p2 = GStorageService.normalizePath(path);
+        String p = null;
+
+        if (!p1.endsWith("/") && !p2.startsWith("/")) {
+            p = p1 + "/" + p2;
+        } else if (p1.endsWith("/") && p2.startsWith("/")) {
+            p = p1 + p2.substring(1);
+        } else {
+            p = p1 + p2;
+        }
+        return p;
+    }
+
+    public static String getPhysicalPath(String repository, String uri, String original) {
+        String ws = repository;
+        if (ObjectUtil.isEmpty(uri)) {
+            uri = original;
+        }else{
+            uri = concatPath(uri,original);
+        }
+        if (ObjectUtil.isEmpty(ws)) {
+
+        } else {
+            //System.out.println(uri);
+//            String uri2 = null;
+//            if (uri.startsWith("/") || uri.startsWith("\\")) {
+//                uri2 = File.separator + ws + uri;
+//            } else {
+//                uri2 = File.separator + ws + File.separator + uri;
+//            }
+//            uri2 = normalizePath(uri2);
+            uri = concatPath(ws,uri);
+            //return uri2;
+        }
+        return uri;
+    }
+}

+ 53 - 0
gfs/src/main/java/com/giantan/gfs/service/impl/PermissionsFilterImpl.java

@@ -0,0 +1,53 @@
+package com.giantan.gfs.service.impl;
+
+import com.giantan.gfs.service.PermissionsFilter;
+import com.giantan.gfs.storer.FileItem;
+
+import java.util.List;
+
+//@Service("permissions")
+public class PermissionsFilterImpl implements PermissionsFilter {
+
+    public static final int READ_ONLY = 1;
+
+    public PermissionsFilterImpl(){
+
+    }
+
+
+    @Override
+    public void init() {
+
+    }
+
+    @Override
+    public int getPermissions(FileItem fi) {
+        String fn = fi.getName();
+        int p = 0;
+        if (fn.endsWith(".model")){
+            p = READ_ONLY;
+        }
+        return p;
+    }
+
+    @Override
+    public void filter(FileItem fi) {
+        if (fi!= null) {
+            String fn = fi.getName();
+            int p = 0;
+            if (fn.endsWith(".model")) {
+                p = READ_ONLY;
+            }
+            fi.setPermissions(p);
+        }
+    }
+
+    @Override
+    public void filter(List<FileItem> fis) {
+        if (fis != null) {
+            for (FileItem fi : fis) {
+                filter(fi);
+            }
+        }
+    }
+}

+ 10 - 0
gfs/src/main/java/com/giantan/gfs/service/impl/S3Constants.java

@@ -0,0 +1,10 @@
+package com.giantan.gfs.service.impl;
+
+public class S3Constants {
+    public static final String BACKUP_SUFFIX = "2bak";
+    public static final String WEB_SUFFIX = "2web";
+    public static final String SOURCE = "source";
+    public static final String GMD = "gmd";
+    public static final String GBOOK = "gbook";
+
+}

+ 706 - 0
gfs/src/main/java/com/giantan/gfs/service/impl/S3GkbService.java

@@ -0,0 +1,706 @@
+package com.giantan.gfs.service.impl;
+
+//import com.giantan.ai.common.reponse.ObjectUtil;
+
+import com.giantan.gfs.service.IGkbService;
+import com.giantan.gfs.storer.FileItem;
+import com.giantan.gfs.storer.FileType;
+import com.giantan.gfs.storer.impl.S3Storer;
+import com.giantan.gfs.storer.util.FileUtil;
+import com.giantan.gfs.storer.util.ObjectUtil;
+import com.giantan.gfs.storer.util.StringUtil;
+import org.apache.commons.io.FilenameUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.core.sync.RequestBody;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.*;
+
+import java.io.*;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.List;
+
+// 用于多bucket的存储
+public class S3GkbService implements IGkbService {
+
+    protected static Logger log = LoggerFactory.getLogger(S3GkbService.class);
+    public static final String BACKUP_SUFFIX = S3Constants.BACKUP_SUFFIX;
+    public static final String WEB_SUFFIX = S3Constants.WEB_SUFFIX;
+    public static final String SOURCE = S3Constants.SOURCE;
+    public static final String GMD = S3Constants.GMD;
+    public static final String GBOOK = S3Constants.GBOOK;
+
+    String endpoint;
+    String bucket;
+
+    private S3Client s3Client;
+
+    private S3Storer kbStorer;
+
+    //private S3Storer bakStorer;
+    public S3GkbService() {
+
+    }
+
+    public S3GkbService(S3Client s3client, String bucket) {
+        this.s3Client = s3client;
+        this.bucket = bucket;
+    }
+
+    public S3GkbService(S3Client s3client, String endpoint, String bucket) {
+        this.s3Client = s3client;
+        this.endpoint = endpoint;
+        this.bucket = bucket;
+    }
+
+    public void init() {
+        kbStorer = new S3Storer(s3Client, endpoint, bucket);
+    }
+
+    public String getBucket() {
+        return bucket;
+    }
+
+    @Override
+    public String getEndpoint() {
+        return endpoint;
+    }
+
+    public void setBucket(String bucket) {
+        this.bucket = bucket;
+    }
+
+    public S3Client getS3Client() {
+        return s3Client;
+    }
+
+    public void setS3Client(S3Client s3Client) {
+        this.s3Client = s3Client;
+    }
+
+    private String getSourceKey(String repository, String fullName) {
+        String f1 = GStorageService.normalizePath(fullName);
+        String sep = "/";
+        if (f1.startsWith("/")) {
+            sep = "";
+        }
+        String key = repository + "/" + SOURCE + sep + f1;
+        return key;
+    }
+
+    private String getBackupKey(String repository, String fullName) {
+        String f1 = GStorageService.normalizePath(fullName);
+        String sep = "/";
+        if (f1.startsWith("/")) {
+            sep = "";
+        }
+        //String key = "/" + repository + "/" + SOURCE + sep + f1 + "." + System.currentTimeMillis() + ".bak";
+        String key = repository + "/" + SOURCE + sep + f1 + "." + System.currentTimeMillis() + ".bak";
+        return key;
+    }
+
+    @Override
+    public String getObjectKey(String repository, String resourceType, String fullName) {
+//        String f1 = GStorageService.normalizePath(fullName);
+//        String sep = "/";
+//        if (f1.startsWith("/")) {
+//            sep = "";
+//        }
+//        String key = "/" + repository + "/" + resourceType + sep + f1;
+//        return key;
+        String path = GStorageService.getPhysicalPath(repository, resourceType, fullName);
+        path = StringUtil.removeFirstSeparator(path);
+        return path;
+    }
+
+    private String getBackupBucket() {
+        return bucket + BACKUP_SUFFIX;
+    }
+
+    // 从S3GkbService.java 拷贝过来
+    public static String getObjectPath(String uri, String original) throws Exception {
+        String fn = original;
+        if (ObjectUtil.isEmpty(uri)) {
+            //fn = original;
+        } else {
+            if (FileUtil.isEndsWithSeparator(uri)) {
+                fn = uri + original;
+            } else {
+                fn = uri;
+            }
+        }
+        return fn;
+    }
+
+    @Override
+    public String backupAndStore(InputStream is, String repository, String fileName, String uri) throws Exception {
+        String objectPath = getObjectPath(uri, fileName);
+        backupAndStore(is, repository, objectPath);
+        return objectPath;
+    }
+
+    @Override
+    public void backupAndStore(InputStream is, String repository, String fullName) throws Exception {
+        String sourceKey = getSourceKey(repository, fullName);
+        if (kbStorer.exists(sourceKey)) {
+            String toObjectKey = getBackupKey(repository, fullName);
+            copyFile(bucket, sourceKey, getBackupBucket(), toObjectKey);
+        }
+        kbStorer.store(is, sourceKey);
+    }
+
+    @Override
+    public String storeDirect(InputStream is, String repository, String fileName, String uri) throws Exception {
+        String objectPath = getObjectPath(uri, fileName);
+        storeDirect(is, repository, objectPath);
+        return objectPath;
+    }
+
+    @Override
+    public void storeDirect(InputStream is, String repository, String fullName) throws Exception {
+        String sourceKey = getSourceKey(repository, fullName);
+//        if (kbStorer.exists(sourceKey)) {
+//            String toObjectKey = getBackupKey(repository, fullName);
+//            copyFile(bucket, sourceKey, getBackupBucket(), toObjectKey);
+//        }
+        kbStorer.store(is, sourceKey);
+    }
+
+    @Override
+    public String copyFile(String fromBucket, String fromObjectKey, String toBucket, String toObjectKey) {
+//        CopyObjectRequest copyReq = CopyObjectRequest.builder()
+//                .sourceBucket(fromBucket)
+//                .sourceKey(fromObjectKey)
+//                .destinationBucket(toBucket)
+//                .destinationKey(toObjectKey)
+//                .build();
+//
+//        try {
+//            CopyObjectResponse copyRes = s3Client.copyObject(copyReq);
+//            return copyRes.copyObjectResult().toString();
+//
+//        } catch (S3Exception e) {
+//            log.error(e.awsErrorDetails().errorMessage());
+//        }
+        String fn = S3Utils.copyFile(s3Client, fromBucket, fromObjectKey, toBucket, toObjectKey);
+
+        return fn;
+    }
+
+//    public void store(InputStream is, String repository, String fullName) throws Exception {
+//        String sourceKey = getSourceKey(repository, fullName);
+//        kbStorer.store(is, sourceKey);
+//    }
+
+    public InputStream download(String repository, String fullName) throws IOException {
+        String sourceKey = getSourceKey(repository, fullName);
+        try {
+            InputStream is = kbStorer.getStream(sourceKey);
+            return is;
+        } catch (IOException e) {
+            //e.printStackTrace();
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+    @Override
+    public InputStream download(String repository, String resourceType, String fullName) throws IOException {
+        String sourceKey = getObjectKey(repository, resourceType, fullName);
+        try {
+            InputStream is = kbStorer.getStream(sourceKey);
+            return is;
+        } catch (IOException e) {
+            //e.printStackTrace();
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+
+//    protected void storeDirect(Path path, String repository, String root, String fullName) throws Exception {
+//        String sourceKey = getObjectKey(repository, root, fullName);
+//        //kbStorer.store(is, sourceKey);
+//        storeFile(path, bucket, sourceKey);
+//    }
+
+
+    public void backupAndstoreDirect(Path path, String repository, String root, String fullName) throws Exception {
+        String sourceKey = getObjectKey(repository, root, fullName);
+        if (kbStorer.exists(sourceKey)) {
+            //if (exists(bucket,sourceKey)) {
+            //String toObjectKey = getBackupKey(repository, fullName);
+            String toObjectKey = sourceKey;
+            copyFile(bucket, sourceKey, getBackupBucket(), toObjectKey);
+        }
+        //kbStorer.store(is, sourceKey);
+        storeFile(path, bucket, sourceKey);
+    }
+
+
+    @Override
+    public void upload(File dir, String tempBase, String basePath, String repository, String root) throws IOException {
+        if (dir.isDirectory()) {
+            File[] files = dir.listFiles();
+
+            for (int i = 0; i < files.length; i++) {
+                if (StringUtil.isNotEmpty(files[i].getName())
+                        || StringUtil.isNotEmpty(FilenameUtils.getBaseName(files[i].getName())))
+                    try {
+                        upload(files[i], tempBase, basePath, repository, root);
+                    } catch (Exception e) {
+                        log.error("Error adding entry " + files[i].getName(), e);
+                    }
+            }
+        } else {
+            String path = dir.getPath();
+            String relativePath = path.substring(tempBase.length() + 1);
+            relativePath = relativePath.replace("\\", "/");
+            // String objName = basePath + relativePath;
+            String objName = GStorageService.concatPath(basePath, relativePath);
+            //InputStream is = Files.newInputStream(Path.of(path));
+            try {
+                //storeDirect(Path.of(path), repository, root, objName);
+                backupAndstoreDirect(Path.of(path), repository, root, objName);
+            } catch (Exception e) {
+                //e.printStackTrace();
+                log.error(e.getMessage());
+            }
+        }
+    }
+
+
+    public String getEtag(String repository, String objectKey) {
+        String objectName = getSourceKey(repository, objectKey);
+
+        // 获取文件的元数据
+        HeadObjectRequest headObjectRequest = HeadObjectRequest.builder()
+                .bucket(bucket)
+                .key(objectName)
+                .build();
+
+        HeadObjectResponse headObjectResponse = s3Client.headObject(headObjectRequest);
+
+        // 获取 ETag
+        String eTag = headObjectResponse.eTag();
+        return eTag;
+    }
+
+    @Override
+    public List<FileItem> getFileItems(String repository) throws Exception {
+        String sourceKey = getSourceKey(repository, "");
+        try {
+            return kbStorer.getFileItems(sourceKey);
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+    // 取出的是所有文件(包括子目录下的文件,但不包括目录)
+    @Override
+    public List<FileItem> getAllFileItems(String repository, String path) throws Exception {
+        return getAllFileItems(repository, SOURCE, path);
+    }
+
+//    @Override
+//    public int delete(String repository) throws Exception {
+//        kbStorer.delete(repository+"/");
+//        return 1;
+//    }
+
+    @Override
+    public int delete(String repository, String path) throws Exception {
+        String objectKey = getObjectKey(repository, SOURCE, path);
+        return delete(objectKey);
+    }
+
+    @Override
+    public int delete(String path) throws Exception {
+        String dir = path;
+//        try {
+//            List<String> ls = listObjectNames(bucket, dir, false);
+//            if (ls == null || ls.size() <= 0) return;
+//
+//            List<DeleteObject> objects = new LinkedList<>();
+//            for (int i = 0; i < ls.size(); i++) {
+//                objects.add(new DeleteObject(ls.get(i)));
+//            }
+//
+//            Iterable<Result<DeleteError>> results =
+//                    getMinioClient().removeObjects(
+//                            RemoveObjectsArgs.builder().bucket(bucket).objects(objects).build());
+//            for (Result<DeleteError> result : results) {
+//                DeleteError error = result.get();
+//                log.error(
+//                        "Error in deleting object " + error.objectName() + "; " + error.message());
+//            }
+//        } catch (MinioException | InvalidKeyException | IOException | NoSuchAlgorithmException e) {
+//            log.error("Error occurred: " + e);
+//            throw e;
+//        }
+
+        try {
+//            DeleteObjectsRequest deleteObjectsRequest = DeleteObjectsRequest.builder()
+//                    .bucket(bucket)
+//                    .delete(Delete.builder().objects(ObjectIdentifier.builder().key(path+"/").build()).build())
+//                    .build();
+//            DeleteObjectsResponse res = s3Client.deleteObjects(deleteObjectsRequest);
+//            return res.deleted().size();
+
+            boolean versioningEnabled = true;
+            // 检查是否启用了版本控制
+            try {
+                GetBucketVersioningResponse versioning = s3Client.getBucketVersioning(
+                        GetBucketVersioningRequest.builder().bucket(bucket).build()
+                );
+                versioningEnabled = "Enabled".equalsIgnoreCase(versioning.statusAsString());
+            } catch (Exception e) {
+                //throw new RuntimeException(e);
+                versioningEnabled = false;
+            }
+
+            if (versioningEnabled) {
+
+                int deleted = S3Utils.strongDeleteFilesRecursively(s3Client, bucket, path);
+                //int deleted = S3Utils.deleteFilesRecursively(s3Client,bucket,path);
+                return deleted;
+            } else {
+                int deleted = S3Utils.deleteFilesRecursively(s3Client, bucket, path);
+                //int deleted = S3Utils.deleteFilesRecursively(s3Client,bucket,path);
+                return deleted;
+            }
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+
+    @Override
+    public List<FileItem> getAllFileItems(String repository, String resourceType, String path) throws Exception {
+        String directoryPath = getObjectKey(repository, resourceType, path);
+
+        ListObjectsRequest listObjectsRequest = ListObjectsRequest.builder()
+                .bucket(bucket)
+                .prefix(directoryPath)
+                .build();
+
+        ListObjectsResponse res = s3Client.listObjects(listObjectsRequest);
+
+        List<FileItem> fileItemList = new ArrayList<>();
+        // 遍历结果集
+        for (S3Object s3Object : res.contents()) {
+            FileItem fi = toFileItem(s3Object);
+            //FileItem fi = toFileItem(s3Object, path);
+            fileItemList.add(fi);
+        }
+        return fileItemList;
+    }
+
+
+    protected String getDirectUrl(String fullPath) {
+//        String url = new StringBuilder("https://").append(bucket)
+//                .append(".")
+//                .append(this.obsProperties.getEndpoint())
+//                .append("/")
+//                .append(ossObjectName)
+//                .toString();
+
+        String url1 = null;
+        if (fullPath.startsWith("/")) {
+            url1 = endpoint + "/" + bucket + fullPath;
+        } else {
+            url1 = endpoint + "/" + bucket + "/" + fullPath;
+        }
+        return url1;
+    }
+
+    //    public void uploadFiles(File dir, String bucket, String repository, String resourceType, String path) throws IOException {
+//        String baseRoot = dir.toString();
+//        uploadFiles(dir, baseRoot,bucket, repository, resourceType, path);
+//    }
+    @Override
+    public void uploadFiles(File dir, String sourceRoot, String bucket, String repository, String resourceType, String path) throws IOException {
+        if (dir.isDirectory()) {
+            File[] files = dir.listFiles();
+
+            for (int i = 0; i < files.length; i++) {
+                if (StringUtil.isNotEmpty(files[i].getName())
+                        || StringUtil.isNotEmpty(FilenameUtils.getBaseName(files[i].getName())))
+                    try {
+                        //upload(files[i], tempBase, basePath, repository, root);
+                        uploadFiles(files[i], sourceRoot, bucket, repository, resourceType, path);
+                    } catch (Exception e) {
+                        log.error("Error adding entry " + files[i].getName(), e);
+                    }
+            }
+        } else {
+            uploadFile(dir, sourceRoot, bucket, repository, resourceType, path);
+        }
+    }
+
+    public String uploadFile(File dir, String sourceRoot, String bucket, String repository, String resourceType, String path) throws IOException {
+        String fpath = dir.getPath();
+        String relativePath = fpath.substring(sourceRoot.length() + 1);
+        relativePath = relativePath.replace("\\", "/");
+
+        String objPath = GStorageService.concatPath(path, relativePath);
+        String objectKey = getObjectKey(repository, resourceType, objPath);
+
+        try {
+            String etag = storeFile(dir.toPath(), bucket, objectKey);
+            return etag;
+        } catch (Exception e) {
+            //e.printStackTrace();
+            log.error(e.getMessage());
+        }
+        return null;
+    }
+
+    protected String storeFile(Path path, String bucket, String objectKey) throws IOException {
+        PutObjectRequest putOb = PutObjectRequest.builder()
+                .bucket(bucket)
+                .key(objectKey)
+                //.metadata(metadata)
+                //.contentType("text/html")
+                .build();
+
+        PutObjectResponse res = getS3Client().putObject(putOb, path);
+        log.info(objectKey + " is successfully uploaded " + " to bucket " + bucket + ".");
+        return res.eTag();
+    }
+
+
+    protected String storeFile(InputStream is, String bucket, String objectKey) throws IOException {
+        try {
+            PutObjectRequest putOb = PutObjectRequest.builder()
+                    .bucket(bucket)
+                    .key(objectKey)
+                    //.metadata(metadata)
+                    //.contentType("text/html")
+                    .build();
+
+            PutObjectResponse res = getS3Client().putObject(putOb, RequestBody.fromInputStream(is, is.available()));
+            log.info(objectKey + " is successfully uploaded " + " to bucket " + bucket + ".");
+            return res.eTag();
+        } catch (IOException e) {
+            //e.printStackTrace();
+            log.error(e.getMessage());
+            throw e;
+        } finally {
+            if (is != null) {
+                try {
+                    is.close();
+                } catch (IOException e) {
+
+                }
+            }
+        }
+
+    }
+
+    @Override
+    public void copyFolder(String srcBucket, String srcDir, String destBucket, String destDir) throws Exception {
+
+//        List<String> files = listAllFileObjects(srcBucket, srcDir);
+//        for (String fileObject : files) {
+//            copyFile(srcBucket,fileObject,destBucket,destDir + fileObject.substring(srcDir.length()));
+//        }
+        S3Utils.copyFolder(s3Client, srcBucket, srcDir, destBucket, destDir);
+        log.info("Folder copy from {} to {} completed.", srcBucket + "/" + srcDir, destBucket);
+    }
+
+    @Override
+    public String renameFile(String repository, String fromObjectKey, String toObjectKey) {
+        String oldKey = getSourceKey(repository, fromObjectKey);
+        String newKey = getSourceKey(repository, toObjectKey);
+        String s = S3Utils.renameFile(s3Client, bucket, oldKey, bucket, newKey);
+        return s;
+    }
+
+    @Override
+    public int renameFolder(String repository, String srcDir, String destDir) throws Exception {
+        String oldKey = getSourceKey(repository, srcDir);
+        String newKey = getSourceKey(repository, destDir);
+        int count = S3Utils.renameFolder(s3Client, bucket, oldKey, bucket, newKey);
+        return count;
+    }
+
+//    private FileItem toFileItem(S3Object so, String path) {
+//
+//        FileItem fi = new FileItem();
+//        String s = so.key();
+//        s = StringUtil.removePrefix(s, path);
+//        s = StringUtil.removeFirstSeparator(s);
+//        s = StringUtil.removeLastSeparator(s);
+//        fi.setName(s);
+//        fi.setSize(so.size());
+//        String et1 = so.eTag();
+//        fi.setEtag(et1.substring(1, et1.length() - 1));
+//
+//        fi.setType(FileType.FILE);
+//        Instant instant = so.lastModified();
+//
+//        // Convert Instant to Date.
+//        // Date modified = Date.from(instant);
+//
+//        fi.setMtime(instant.toEpochMilli());
+//
+//        fi.setPath(path);
+//
+//        String fullPathAndName = StringUtil.concatUrl(path, fi.getName());
+//        String directlink = getDirectUrl(fullPathAndName);
+//        fi.setUrl(directlink);
+//        return fi;
+//    }
+//
+//
+//    private FileItem toFileItem(CommonPrefix cp, String path) {
+//
+//        FileItem fi = new FileItem();
+//        String s = cp.prefix();
+//        s = StringUtil.removePrefix(s, path);
+//        s = StringUtil.removeFirstSeparator(s);
+//        s = StringUtil.removeLastSeparator(s);
+//        fi.setName(s);
+//        //fi.setSize(ii.size());
+//
+//        fi.setType(FileType.FOLDER);
+//
+//        fi.setPath(path);
+//
+//        String fullPathAndName = StringUtil.concatUrl(path, fi.getName());
+//        String directlink = getDirectUrl(fullPathAndName);
+//        fi.setUrl(directlink);
+//        return fi;
+//    }
+
+    private FileItem toFileItem(S3Object so) {
+
+        FileItem fi = new FileItem();
+        String s = so.key();
+//        s = StringUtil.removePrefix(s, path);
+//        s = StringUtil.removeFirstSeparator(s);
+//        s = StringUtil.removeLastSeparator(s);
+
+        fi.setName(FilenameUtils.getName(s));
+        fi.setSize(so.size());
+        String et1 = so.eTag();
+        fi.setEtag(et1.substring(1, et1.length() - 1));
+
+        fi.setType(FileType.FILE);
+        Instant instant = so.lastModified();
+
+        fi.setMtime(instant.toEpochMilli());
+
+        String path = FilenameUtils.getPath(s);
+        fi.setPath(path);
+
+        String fullPathAndName = StringUtil.concatUrl(path, fi.getName());
+        String directlink = getDirectUrl(fullPathAndName);
+        fi.setUrl(directlink);
+        return fi;
+    }
+
+
+    private FileItem toFileItem(CommonPrefix cp) {
+
+        FileItem fi = new FileItem();
+        String s = cp.prefix();
+//        s = StringUtil.removePrefix(s, path);
+//        s = StringUtil.removeFirstSeparator(s);
+//        s = StringUtil.removeLastSeparator(s);
+        //int i = FilenameUtils.indexOfLastSeparator(s);
+        int i = s.lastIndexOf("/", s.length() - 2);
+        fi.setType(FileType.FOLDER);
+        String directlink = getDirectUrl(s);
+        fi.setUrl(directlink);
+        if (i <= 0) {
+            fi.setName(s.substring(0, s.length() - 1));
+            fi.setPath("");
+        } else {
+            fi.setName(s.substring(i + 1, s.length() - 1));
+            String path = s.substring(0, i + 1);
+            fi.setPath(path);
+        }
+        //String fullPathAndName = StringUtil.concatUrl(path, fi.getName());
+        //String directlink = getDirectUrl(fullPathAndName);
+        //fi.setUrl(directlink);
+        return fi;
+    }
+
+    @Override
+    public List<FileItem> listObjects(String bucket, String path) throws Exception {
+        path = StringUtil.removeFirstSeparator(path);
+        path = StringUtil.appendLastSeparator(path);
+        //String fullPath = StringUtil.removeFirstSeparator(StringUtil.getFullPath(basePath, path));
+
+        List<FileItem> fileItemList = new ArrayList<>();
+
+        ListObjectsRequest listObjects = ListObjectsRequest
+                .builder().prefix(path)
+                .bucket(bucket)
+                //.maxKeys(2)
+                .delimiter("/")
+                .build();
+
+        ListObjectsResponse res = getS3Client().listObjects(listObjects);
+
+        //System.out.println("------directory");
+        List<CommonPrefix> commonPrefixes = res.commonPrefixes();
+        for (CommonPrefix cp : commonPrefixes) {
+            //System.out.println("=="+cp.prefix());
+            //FileItem fi = toFileItem(cp, path);
+            FileItem fi = toFileItem(cp);
+            fileItemList.add(fi);
+        }
+
+        List<S3Object> objects = res.contents();
+        for (S3Object so : objects) {
+            //FileItem fi = toFileItem(so, path);
+            FileItem fi = toFileItem(so);
+            fileItemList.add(fi);
+        }
+        return fileItemList;
+    }
+
+    public List<String> listAllFileObjects(String bucket, String path) throws Exception {
+        path = StringUtil.removeFirstSeparator(path);
+        path = StringUtil.appendLastSeparator(path);
+        //String fullPath = StringUtil.removeFirstSeparator(StringUtil.getFullPath(basePath, path));
+
+        List<String> fileList = new ArrayList<>();
+
+        ListObjectsRequest listObjects = ListObjectsRequest
+                .builder().prefix(path)
+                .bucket(bucket)
+                //.maxKeys(2)
+                .delimiter("/")
+                .build();
+
+        ListObjectsResponse res = getS3Client().listObjects(listObjects);
+
+        //System.out.println("------directory");
+        //List<CommonPrefix> commonPrefixes = res.commonPrefixes();
+
+        List<S3Object> objects = res.contents();
+        for (S3Object so : objects) {
+            //FileItem fi = toFileItem(so, path);
+            //fileItemList.add(fi);
+            fileList.add(so.key());
+        }
+
+        for (CommonPrefix commonPrefix : res.commonPrefixes()) {
+            List<String> files1 = listAllFileObjects(bucket, commonPrefix.prefix());
+            //System.out.println("Folder: " + commonPrefix.prefix());
+            fileList.addAll(files1);
+        }
+        return fileList;
+    }
+
+
+}
+

+ 295 - 0
gfs/src/main/java/com/giantan/gfs/service/impl/S3Utils.java

@@ -0,0 +1,295 @@
+package com.giantan.gfs.service.impl;
+
+//import com.giantan.baiying.tasks.ToGbookTask;
+//import lombok.extern.slf4j.Slf4j;
+
+import com.giantan.gfs.storer.util.StringUtil;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.*;
+import software.amazon.awssdk.services.s3.paginators.ListObjectVersionsIterable;
+import software.amazon.awssdk.services.s3.paginators.ListObjectsV2Iterable;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+//@Slf4j
+public class S3Utils {
+    private static final org.slf4j.Logger log
+            = org.slf4j.LoggerFactory.getLogger(S3Utils.class);
+
+    //  删除目录下的所有文件(包括子目录)
+    public static int deleteFilesRecursively(S3Client s3, String bucket, String prefix) {
+        ListObjectsRequest listObjectsRequest = ListObjectsRequest.builder()
+                .bucket(bucket)
+                .prefix(prefix)
+                .build();
+        ListObjectsResponse res = s3.listObjects(listObjectsRequest);
+        int count = 0;
+        while (true) {
+            ArrayList<ObjectIdentifier> objects = new ArrayList<>();
+
+            for (Iterator<?> it = res.contents().iterator(); it.hasNext(); ) {
+                S3Object s3Object = (S3Object) it.next();
+                objects.add(ObjectIdentifier.builder().key(s3Object.key()).build());
+            }
+            count = count + objects.size();
+            if (objects.size() > 0) {
+                s3.deleteObjects(
+                        DeleteObjectsRequest.builder().bucket(bucket).delete(Delete.builder().objects(objects).build()).build()
+                );
+            }
+            if (res.isTruncated()) {
+                res = s3.listObjects(listObjectsRequest);
+                continue;
+            }
+            break;
+        }
+        log.debug("Delete folder recursively--->" + prefix);
+        ;
+        return count;
+    }
+
+
+    // 删除所有版本 Delete all versions of an object S3 using java
+    public static int strongDeleteFilesRecursively(S3Client s3, String bucket, String prefix) {
+        ListObjectsRequest listObjectsRequest = ListObjectsRequest.builder()
+                .bucket(bucket)
+                .prefix(prefix)
+                .build();
+        ListObjectsResponse res = s3.listObjects(listObjectsRequest);
+        int count = 0;
+        while (true) {
+            for (Iterator<?> it = res.contents().iterator(); it.hasNext(); ) {
+                S3Object s3Object = (S3Object) it.next();
+                strongDeleteFilesRecursively1(s3, bucket, s3Object.key());
+                count++;
+            }
+            if (res.isTruncated()) {
+                res = s3.listObjects(listObjectsRequest);
+                continue;
+            }
+            break;
+        }
+        log.debug("Strong delete folder recursively--->" + prefix);
+        ;
+        return count;
+    }
+
+    // 删除所有版本 Delete all versions of an object S3 using java
+    public static int strongDeleteFilesRecursively1(S3Client s3, String bucket, String objectKey) {
+        // List all versions of the object
+        ListObjectVersionsRequest listVersions = ListObjectVersionsRequest.builder()
+                .bucket(bucket)
+                .prefix(objectKey)
+                .build();
+
+        ListObjectVersionsIterable responses = s3.listObjectVersionsPaginator(listVersions);
+
+        for (ObjectVersion objectVersion : responses.versions()) {
+            //Filter pre-filtered result
+            if (objectVersion.key().equals(objectKey)) {
+                DeleteObjectRequest deleteRequest = DeleteObjectRequest.builder().bucket(bucket).key(objectKey)
+                        .versionId(objectVersion.versionId()).build();
+                DeleteObjectResponse response = s3.deleteObject(deleteRequest);
+            }
+        }
+        log.debug("Deleted all versions of: " + objectKey);
+        return 1;
+    }
+
+
+    // 删除当前目录下的文件
+    public static int deleteFilesInDirectory(S3Client s3, String bucketName, String folderPath) {
+        ArrayList<ObjectIdentifier> toDeletes = new ArrayList<>();
+        List<String> objects = listFilesInDirectory(s3, bucketName, folderPath);
+        if (null == objects || objects.size() == 0) {
+            return 0;
+        }
+        int count = 0;
+        for (String k : objects) {
+            toDeletes.add(ObjectIdentifier.builder().key(k).build());
+        }
+        try {
+            DeleteObjectsRequest dor = DeleteObjectsRequest.builder()
+                    .bucket(bucketName)
+                    .delete(Delete.builder().objects(toDeletes).build())
+                    .build();
+            DeleteObjectsResponse response = s3.deleteObjects(dor);
+//            while (!response.sdkHttpResponse().isSuccessful()) {
+//                Thread.sleep(100);
+//            }
+            List<DeletedObject> deleted = response.deleted();
+            if (deleted != null) {
+                count = deleted.size();
+            }
+        } catch (S3Exception e) {
+            log.error(e.getMessage());
+        }
+        log.debug("Delete folder successfully--->" + folderPath);
+        return count;
+    }
+
+    public static List<String> listFilesInDirectory(S3Client s3, String bucket, String prefix) {
+        String delimiter = "/";
+        if (!prefix.endsWith(delimiter)) {
+            prefix += delimiter;
+        }
+        // Build the list objects request
+        ListObjectsV2Request listReq = ListObjectsV2Request.builder()
+                .bucket(bucket)
+                .prefix(prefix)
+                .delimiter(delimiter)
+                .maxKeys(1000)
+                .build();
+
+        ListObjectsV2Iterable listRes = s3.listObjectsV2Paginator(listReq);
+        List<String> keyList = new ArrayList<>();
+        final String flolder = prefix;
+        listRes.contents().stream()
+                .forEach(content -> {
+                    if (!flolder.equals(content.key())) {
+                        keyList.add(content.key());
+                    }
+                });
+        return keyList;
+    }
+
+    // 包括子目录的文件
+    public static List<String> listAllFilesInDirectory(S3Client s3, String bucket, String prefix) {
+        String delimiter = "/";
+        if (!prefix.endsWith(delimiter)) {
+            prefix += delimiter;
+        }
+        // Build the list objects request
+        ListObjectsV2Request listReq = ListObjectsV2Request.builder()
+                .bucket(bucket)
+                .prefix(prefix)
+                //.delimiter(delimiter)  // 列出所有子目录的文件就要去掉delimiter
+                .maxKeys(1000)
+                .build();
+
+        ListObjectsV2Iterable listRes = s3.listObjectsV2Paginator(listReq);
+        List<String> keyList = new ArrayList<>();
+
+        listRes.contents().stream()
+                .forEach(content -> keyList.add(content.key()));
+        return keyList;
+    }
+
+
+    public static String copyFile(S3Client s3, String fromBucket, String fromObjectKey, String toBucket, String toObjectKey) {
+        CopyObjectRequest copyReq = CopyObjectRequest.builder()
+                .sourceBucket(fromBucket)
+                .sourceKey(fromObjectKey)
+                .destinationBucket(toBucket)
+                .destinationKey(toObjectKey)
+                .build();
+
+        CopyObjectResponse copyRes = s3.copyObject(copyReq);
+        return copyRes.copyObjectResult().toString();
+    }
+
+
+    public static int copyFolder(S3Client s3, String srcBucket, String srcDir, String destBucket, String destDir) throws Exception {
+        // 源 bucket 名称和目录路径
+        //String srcBucket = "your-source-bucket";
+        //String sourceDirectoryPath = "source/directory/";
+
+        // 目标 bucket 名称和目录路径
+        //String destinationBucketName = "your-destination-bucket";
+        //String destinationDirectoryPath = "destination/directory/";
+
+        List<String> files = listAllFilesInDirectory(s3, srcBucket, srcDir);
+        //srcDir + "/" + fileObject
+        // 遍历源 bucket 下的所有对象,并复制到目标 bucket 中
+        for (String fileObject : files) {
+            copyFile(s3, srcBucket, fileObject, destBucket, destDir + fileObject.substring(srcDir.length()));
+        }
+
+        //log.info("Folder copy from {} to {} completed.", srcBucket + "/" + srcDir, destBucket);
+        return files.size();
+    }
+
+    public static String renameFile(S3Client s3, String fromBucket, String fromObjectKey, String toBucket, String toObjectKey) {
+        CopyObjectRequest copyReq = CopyObjectRequest.builder()
+                .sourceBucket(fromBucket)
+                .sourceKey(fromObjectKey)
+                .destinationBucket(toBucket)
+                .destinationKey(toObjectKey)
+                .build();
+
+        CopyObjectResponse copyRes = s3.copyObject(copyReq);
+        //copyRes.copyObjectResult().toString();
+        DeleteObjectRequest deleteReq = DeleteObjectRequest.builder()
+                .bucket(fromBucket)
+                .key(fromObjectKey)
+                .build();
+        DeleteObjectResponse deleteObjectResponse = s3.deleteObject(deleteReq);
+        return copyRes.copyObjectResult().toString();
+    }
+
+
+    private static String striptPrefix(String src, String prefix) {
+        if (src.startsWith(prefix)) {
+            return src.substring(prefix.length());
+        }
+        if (prefix.startsWith("/")) {
+            prefix = prefix.substring(1);
+        }
+        if (src.startsWith("/")) {
+            src = src.substring(1);
+        }
+        return src.substring(prefix.length());
+    }
+
+    public static String concatPath(String domain, String path) {
+        if (path != null && path.length() > 1 && path.charAt(0) != '/') {
+            path = '/' + path;
+        }
+
+        if (domain != null && domain.charAt(domain.length() - 1) == '/') {
+            domain = domain.substring(0, domain.length() - 1);
+        }
+
+        return domain + path;
+    }
+
+    public static int renameFolder(S3Client s3, String srcBucket, String srcDir, String destBucket, String destDir) throws Exception {
+        ListObjectsV2Request listRequest = ListObjectsV2Request.builder()
+                .bucket(srcBucket)
+                .prefix(srcDir)
+                .build();
+
+        ListObjectsV2Response listResponse = s3.listObjectsV2(listRequest);
+
+        int count = listResponse.contents().size();
+
+        for (S3Object obj : listResponse.contents()) {
+            String oldKey = obj.key();
+
+            // striptPrefix(srcDir,oldKey)
+            //String fileName = oldKey.substring(srcDir.length());
+            String fileName = striptPrefix(oldKey, srcDir);
+            String newKey = concatPath(destDir, fileName);
+
+            CopyObjectRequest copyReq = CopyObjectRequest.builder()
+                    .sourceBucket(srcBucket)
+                    .sourceKey(oldKey)
+                    .destinationBucket(destBucket)
+                    .destinationKey(newKey)
+                    .build();
+            s3.copyObject(copyReq);
+
+            DeleteObjectRequest deleteReq = DeleteObjectRequest.builder()
+                    .bucket(srcBucket)
+                    .key(oldKey)
+                    .build();
+            s3.deleteObject(deleteReq);
+
+            //System.out.println("Renamed: " + oldKey + " -> " + newKey);
+        }
+
+        return count;
+    }
+}

+ 177 - 0
gfs/src/main/java/com/giantan/gfs/storer/FileItem.java

@@ -0,0 +1,177 @@
+package com.giantan.gfs.storer;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+public class FileItem implements Serializable {
+
+    private String name;
+
+    //private Date time;
+    private long ctime;
+    private long mtime;
+
+    private Long size;
+
+
+    //    const FileType = {
+//        Unknown: 0,
+//                File: 1,
+//                Directory: 2,
+//                SymbolicLink: 64,
+//    }
+    private int type;
+    private String path;
+    private String url;
+
+    private int permissions;
+    private String etag;
+    public Map<String, String> attributes;
+
+    public FileItem() {
+        attributes = new HashMap<>();
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+//    public Date getTime() {
+//        return time;
+//    }
+//
+//    public void setTime(Date time) {
+//        this.time = time;
+//    }
+
+
+    public String getEtag() {
+        return etag;
+    }
+
+    public void setEtag(String etag) {
+        this.etag = etag;
+    }
+
+    public long getCtime() {
+        return ctime;
+    }
+
+    public void setCtime(long ctime) {
+        this.ctime = ctime;
+    }
+
+    public long getMtime() {
+        return mtime;
+    }
+
+    public void setMtime(long mtime) {
+        this.mtime = mtime;
+    }
+
+    public Long getSize() {
+        return size;
+    }
+
+    public void setSize(Long size) {
+        this.size = size;
+    }
+
+    //    public FileTypeEnum getType() {
+//        return type;
+//    }
+//
+//    public void setType(FileTypeEnum type) {
+//        this.type = type;
+//    }
+    public int getType() {
+        return type;
+    }
+
+    public void setType(int type) {
+        this.type = type;
+    }
+
+    public boolean isFile(){
+        if (type == 1){
+            return true;
+        }
+        return false;
+    }
+
+    public boolean isDirectory(){
+        if (type == 2){
+            return true;
+        }
+        return false;
+    }
+
+    public String getPath() {
+        return path;
+    }
+
+    public void setPath(String path) {
+        this.path = path;
+    }
+
+    public String getFullPath() {
+        return path+name;
+    }
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public int getPermissions() {
+        return permissions;
+    }
+
+    public void setPermissions(int permissions) {
+        this.permissions = permissions;
+    }
+
+    public Map<String, String> getAttributes() {
+        return attributes;
+    }
+
+    public void setAttributes(Map<String, String> attributes) {
+        this.attributes = attributes;
+    }
+
+//    @Override
+//    public String toString() {
+//        return "FileItem{" +
+//                "name='" + name + '\'' +
+//                ", ctime=" + ctime +
+//                ", mtime=" + mtime +
+//                ", size=" + size +
+//                ", type=" + type +
+//                ", path='" + path + '\'' +
+//                ", url='" + url + '\'' +
+//                ", attributes=" + attributes +
+//                '}';
+//    }
+
+    @Override
+    public String toString() {
+        return "FileItem{" +
+                "name='" + name + '\'' +
+                ", ctime=" + ctime +
+                ", mtime=" + mtime +
+                ", size=" + size +
+                ", type=" + type +
+                ", path='" + path + '\'' +
+                ", url='" + url + '\'' +
+                ", permissions=" + permissions +
+                ", attributes=" + attributes +
+                '}';
+    }
+}

+ 36 - 0
gfs/src/main/java/com/giantan/gfs/storer/FileType.java

@@ -0,0 +1,36 @@
+package com.giantan.gfs.storer;
+
+public interface FileType {
+
+    public static final int UNKNOEN = 0;
+
+    public static final int FILE = 1;
+    public static final int FOLDER = 2;
+
+    public static final int SYMBOLICLINK = 64;
+
+    /**
+     * 文件
+     */
+//    FILE("File"),
+//
+//    /**
+//     * 文件夹
+//     */
+//    FOLDER("Folder");
+//
+//    private String value;
+//
+//    FileTypeEnum(String value) {
+//        this.value = value;
+//    }
+//
+//    public String getValue() {
+//        return value;
+//    }
+//
+//    public void setValue(String value) {
+//        this.value = value;
+//    }
+
+}

+ 7 - 0
gfs/src/main/java/com/giantan/gfs/storer/ReadableFileSystem.java

@@ -0,0 +1,7 @@
+package com.giantan.gfs.storer;
+
+import java.io.IOException;
+
+public interface ReadableFileSystem {
+    public String getContent(String path) throws IOException;
+}

+ 250 - 0
gfs/src/main/java/com/giantan/gfs/storer/Storer.java

@@ -0,0 +1,250 @@
+package com.giantan.gfs.storer;
+
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The Storer manages the repository where document files are maintained and all
+ * general resources are stored.
+ */
+//public interface Storer extends Comparable<Storer> {
+public interface Storer extends ReadableFileSystem {
+
+    /**
+     * The unique identifier
+     *
+     * @return the storer identifier
+     */
+    public int getId();
+
+    /**
+     * Sets the unique identifier
+     *
+     * @param id the storer identifier
+     */
+    public void setId(int id);
+
+    /**
+     * This method has to store a resource in the document's container. The
+     * location where (DBMS, Filesystem, other) the document should be stored is
+     * defined by the concrete implementation. It is possible to store a new
+     * document or a new version of an existing document.
+     *
+     * @param stream Document as InputStream
+     * @param path Name of the resource to be stored
+     *
+     * @throws IOException the content cannot be stored
+     */
+    public void store(InputStream stream, String path) throws IOException;
+
+    /**
+     * Stores a file
+     *
+     * @param file the file to store
+     * @param path name of the resource
+     *
+     * @throws IOException the content cannot be stored
+     */
+    public void store(File file, String path) throws IOException;
+
+    /**
+     * Deletes a specific resource of a document from the storage.
+     * @param path Name of the resource to be deleted
+     */
+    public void delete(String path) throws Exception;
+
+//	/**
+//	 * Computes the resource name inside the container
+//	 *
+//	 * @param doc The document representation
+//	 * @param fileVersion The file version (use null for the latest version)
+//	 * @param suffix The file suffix (use null if you want the exact document
+//	 *        file)
+//	 * @return The document's resource name
+//	 */
+//	public String getResourceName(Document doc, String fileVersion, String suffix);
+//
+//	/**
+//	 * Computes the resource name inside the container
+//	 *
+//	 * @param docId The document identifier
+//	 * @param fileVersion The file version (use null for the latest version)
+//	 * @param suffix The file suffix (use null if you want the exact document
+//	 *        file)
+//	 * @return The document's resource name
+//	 */
+//	public String getResourceName(long docId, String fileVersion, String suffix);
+
+    /**
+     * Lists all resources in the document's container
+     * @param path If specified, lists the resources for that specific
+     *        file version only
+     *
+     * @return list of file names
+     */
+    public List<String> getFileNames(String path);
+
+    /***
+     * 获取指定路径下的文件及文件夹
+     * @param path 文件路径
+     * @return     文件及文件夹列表
+     * @throws Exception  获取文件列表中出现的异常
+     */
+    public List<FileItem> getFileItems(String path) throws Exception;
+
+    public boolean copy(String source,String target) throws Exception;
+
+    public boolean move(String source,String target) throws Exception;
+
+    public FileItem getFileItem(String path) throws IOException;
+
+    /**
+     * Computed the size of a specific resource.
+     * @param path The resource
+     *
+     * @return the size in bytes
+     */
+    public long size(String path);
+
+    /**
+     * Checks if the passed resource exists in the document's container
+     * @param path Name of the resource
+     * @return true only if the resource already exists
+     */
+    public boolean exists(String path);
+
+    /**
+     * Writes the specified resource in a file
+     *
+     * @param resource Name of the resource
+     * @param out File that will receive the resource's content
+     *
+     * @throws IOException error writing the file or reading the resource
+     */
+    public void writeToFile(String resource, File out) throws IOException;
+
+    /**
+     * Writes the specified resource in an output stream
+     *
+     * @param path Name of the resource
+     * @param output The output stream
+     * @param start Index of the starting byte
+     * @param length Total packet length
+     *
+     * @throws IOException error writing the stream or reading the resource
+     */
+    public void writeToStream(String path, OutputStream output, long start, long length)
+            throws IOException;
+
+    /**
+     * Writes the specified resource in an output stream
+     *
+     * @param path Name of the resource
+     * @param output The output stream
+     *
+     * @throws IOException error writing the stream or reading the resource
+     */
+    public void writeToStream(String path, OutputStream output) throws IOException;
+
+    /**
+     * Obtains the document's content for the specified resource
+     *
+     * @param path Name of the resource
+     *
+     * @return The document file's content
+     *
+     * @throws IOException cannot open the stream
+     */
+    public InputStream getStream(String path) throws IOException;
+
+    /**
+     * Obtains the document's raw bytes for the specified resource
+     *
+     * @param path Name of the resource
+     *
+     * @return The document file's bytes
+     *
+     * @throws IOException cannot open the resource to get the bytes
+     */
+    public byte[] getBytes(String path) throws IOException;
+
+    /**
+     * Obtains the document's raw bytes for the specified resource
+     *
+     * @param path Name of the resource
+     * @param start Index of the starting byte
+     * @param length Total packet length
+     *
+     * @return The document file's bytes
+     *
+     * @throws IOException cannot open the resource to get the bytes
+     */
+    public byte[] getBytes(String path, long start, long length) throws IOException;
+
+
+    /**
+     * Obtains the document's content as string for the specified resource
+     *
+     * @param path Name of the resource
+     *
+     * @return The document file's as string representation
+     */
+    public String getContent(String path);
+
+    /**
+     * Computes the total size of the documents repository(in bytes)
+     *
+     * @return sum of the sizes of all the documents expressed in bytes
+     */
+    public long getTotalSize();
+
+
+
+    /**
+     * Implementations should return the list of the required parameters. A
+     * parameter is stored in the context as storer.<b>id</b>.parameter = value
+     *
+     * @return list of parameter names
+     */
+    public List<String> getParameterNames();
+
+    /**
+     * Returns the map of parameters
+     *
+     * @return a map with settings <b>setting_name</b> - <b>setting_value</b>
+     */
+    public Map<String, String> getParameters();
+
+    /**
+     * Tests if the storer can read and write
+     *
+     * @return if the storer can read and write
+     */
+    public boolean test();
+
+    /**
+     * Tests if the storer is enabled
+     *
+     * @return if the storer is enabled
+     */
+    public boolean isEnabled();
+
+    /**
+     * Initialization method
+     */
+    public void init();
+
+    /**
+     * Destroy method
+     */
+    public void destroy();
+
+
+    boolean createDirectory(String uri) throws Exception;
+
+}

+ 221 - 0
gfs/src/main/java/com/giantan/gfs/storer/impl/AbstractStorer.java

@@ -0,0 +1,221 @@
+package com.giantan.gfs.storer.impl;
+
+
+import com.giantan.gfs.storer.util.FileUtil;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import com.giantan.gfs.storer.Storer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.util.HashMap;
+import java.util.Map;
+
+public abstract class AbstractStorer implements Storer {
+    protected static final int DEFAULT_BUFFER_SIZE = 1024;
+
+    protected static Logger log = LoggerFactory.getLogger(AbstractStorer.class);
+
+
+    protected int id = 1;
+
+    protected Map<String, String> parameters = new HashMap<String, String>();
+
+    public AbstractStorer() {
+    }
+
+
+    public int getId() {
+        return id;
+    }
+
+    public void setId(int id) {
+        this.id = id;
+    }
+
+    public int compareTo(Storer o) {
+        return Integer.valueOf(id).compareTo(o.getId());
+    }
+
+    @Override
+    public int hashCode() {
+        return Integer.valueOf(id).hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        return id == ((Storer) obj).getId();
+    }
+
+    @Override
+    public void store(File file, String path) throws IOException {
+        if (!isEnabled()) {
+            log.warn("Storer not enabled");
+            throw new IOException("Storer not enabled");
+        }
+
+        InputStream is = new BufferedInputStream(new FileInputStream(file), DEFAULT_BUFFER_SIZE);
+
+        store(is, path);
+    }
+
+    protected String computeRelativePath(String path) {
+//		StringBuffer tmp = new StringBuffer(computeRelativePath(docId));
+//		if (StringUtils.isNotEmpty(path)) {
+//			if (!tmp.toString().endsWith("/"))
+//				tmp.append("/");
+//			tmp.append(resource.startsWith("/") ? resource.substring(1) : resource);
+//		} else if (!tmp.toString().endsWith("/"))
+//			tmp.append("/");
+        StringBuilder sb = new StringBuilder();
+        if (path != null && path.length() > 0) {
+            sb.append(path);
+            if (!(path.endsWith("/") || path.endsWith("\\"))) {
+                sb.append("/");
+            }
+        } else {
+            sb.append("/");
+        }
+        return sb.toString();
+    }
+
+    @Override
+    public byte[] getBytes(String path) throws IOException {
+        InputStream is = null;
+        try {
+            is = getStream(path);
+            byte[] bytes = IOUtils.toByteArray(is);
+            return bytes;
+        } finally {
+            if (is != null)
+                try {
+                    is.close();
+                } catch (IOException e) {
+
+                }
+        }
+    }
+
+    @Override
+    public void writeToStream(String path, OutputStream output, long start, long length)
+            throws IOException {
+        try {
+            IOUtils.copyLarge(getStream(path), output, start, length);
+        } catch (IOException ioe) {
+            log.error(ioe.getMessage(), ioe);
+            throw ioe;
+        }
+    }
+
+    @Override
+    public void writeToStream(String resource, OutputStream output) throws IOException {
+        try {
+            IOUtils.copyLarge(getStream(resource), output);
+        } catch (IOException ioe) {
+            log.error(ioe.getMessage(), ioe);
+            throw ioe;
+        }
+    }
+
+    @Override
+    public void writeToFile(String resource, File out) throws IOException {
+        OutputStream os = null;
+        InputStream is = null;
+        try {
+            os = new BufferedOutputStream(new FileOutputStream(out, false), DEFAULT_BUFFER_SIZE);
+            is = getStream(resource);
+            FileUtils.copyInputStreamToFile(is, out);
+        } catch (IOException ioe) {
+            log.error(ioe.getMessage(), ioe);
+            throw ioe;
+        } catch (Throwable e) {
+            log.error("Error writing document {} into {}", out.getPath());
+            log.error(e.getMessage(), e);
+        } finally {
+            if (os != null) {
+                try {
+                    os.flush();
+                    os.close();
+                } catch (Throwable e) {
+                }
+            }
+            if (is != null)
+                try {
+                    is.close();
+                } catch (Throwable e) {
+                }
+        }
+    }
+
+    @Override
+    public String getContent(String resource) {
+        StringWriter writer = new StringWriter();
+        try {
+            IOUtils.copy(getStream(resource), writer, "UTF-8");
+            return writer.toString();
+        } catch (Throwable e) {
+            log.error(e.getMessage());
+            return null;
+        }
+    }
+
+    /**
+     * Retrieves the setting "store." + id + ".dir"
+     */
+    protected String getDir() {
+        //return getConfig().getPropertyWithSubstitutions("store." + id + ".dir");
+        return "/";
+    }
+
+    @Override
+    public Map<String, String> getParameters() {
+        return parameters;
+    }
+
+    @Override
+    public boolean test() {
+        String resource = "1.0";
+        File tmpFile = null;
+        try {
+            tmpFile = File.createTempFile("st-test", ".txt");
+            FileUtils.write(tmpFile, "test");
+            //FileUtil.writeFile("test", tmpFile.getAbsolutePath());
+            store(tmpFile, resource);
+            return exists(resource);
+        } catch (Throwable e) {
+            log.error(e.getMessage(), e);
+            return false;
+        } finally {
+            if (tmpFile.exists())
+                FileUtil.strongDelete(tmpFile);
+            //FileUtils.deleteQuietly(tmpFile);
+            try {
+                if (exists(resource))
+                    delete(resource);
+            } catch (Throwable t) {
+
+            }
+        }
+    }
+
+    @Override
+    public boolean isEnabled() {
+        return true;
+    }
+
+    @Override
+    public String toString() {
+        return this.getClass().getSimpleName() + " #" + id;
+    }
+
+    @Override
+    public void init() {
+
+    }
+
+    @Override
+    public void destroy() {
+
+    }
+}

+ 373 - 0
gfs/src/main/java/com/giantan/gfs/storer/impl/FSStorer.java

@@ -0,0 +1,373 @@
+package com.giantan.gfs.storer.impl;
+
+import com.giantan.gfs.storer.FileItem;
+import com.giantan.gfs.storer.FileType;
+import com.giantan.gfs.storer.util.FileUtil;
+import com.giantan.gfs.storer.util.StringUtil;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.FilenameUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.FileTime;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class is an implementation of the Storer interface to persist documents
+ * in the filesystem. From the root of the documents storage, this
+ * implementation saves all document's files into a defined directory using the
+ * following logic. The document's id is tokenized by three chars tokens, than
+ * the doc/ dir is appended, so if the docId=12345, the document's path will
+ * be:123/45/doc.
+ *
+ * @author Marco Meschieri - LogicalDOC
+ * @since 4.5
+ */
+public class FSStorer extends AbstractStorer {
+
+    protected static Logger log = LoggerFactory.getLogger(FSStorer.class);
+
+    private String baseDir = "/filestore";
+    private String baseDownloadPath = "/filedownload/";
+
+    public FSStorer() {
+        super();
+    }
+
+    public void init() {
+//        baseDir = Context.getProperty("storer.baseDir");
+//        if (baseDir == null){
+//            baseDir = "/docs";
+//        }
+//
+//        baseDownloadPath = Context.getProperty("storer.baseDownloadPath");
+//        if (baseDownloadPath == null){
+//            baseDownloadPath = "/file/";
+//        }
+    }
+
+    public void init(String baseDir) {
+
+        String dp = "filedownload";
+        try {
+            Path fss = Files.createTempDirectory("fss");
+            dp = fss.toString();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        init(baseDir,dp);
+    }
+
+    public void init(String baseDir,String baseDownloadPath) {
+        this.baseDir = baseDir;
+        this.baseDownloadPath = baseDownloadPath;
+    }
+
+    public File getContainer(String path) {
+        String relativePath = FilenameUtils.getFullPathNoEndSeparator(path);
+        //String relativePath = computeRelativePath(docId);
+        String dir = getRoot().getPath() + "/" + relativePath;
+        return new File(dir);
+    }
+
+    public void setRoot(String baseDir) {
+        this.baseDir = baseDir;
+    }
+
+    public File getRoot() {
+//        if (baseDir == null){
+//            baseDir = Context.getProperty("storer.baseDir");
+//        }
+        return new File(baseDir);
+    }
+
+    public String getBaseDownloadPath() {
+//        if (baseDownloadPath == null){
+//            baseDownloadPath = Context.getProperty("storer.baseDownloadPath");
+//        }
+        return baseDownloadPath;
+    }
+
+    public void setBaseDownloadPath(String baseDownloadPath) {
+        this.baseDownloadPath = baseDownloadPath;
+    }
+
+    @Override
+    public void store(File file, String resource) throws IOException {
+        // Do not store 0 byte files
+        if (file.length() == 0L)
+            throw new IOException("Do not store 0 byte file");
+
+        if (!isEnabled())
+            throw new IOException("Storer not enabled");
+
+        File dir = getContainer(resource);
+        FileUtils.forceMkdir(dir);
+        String fileName = FilenameUtils.getName(resource);
+        File dest = new File(dir.getPath()+"/"+fileName);
+        FileUtils.copyFile(file, dest);
+    }
+
+    @Override
+    public void store(InputStream stream, String resource) throws IOException {
+        if (!isEnabled())
+            throw new IOException("Storer not enabled");
+
+        File file = null;
+        try {
+            File dir = getContainer(resource);
+            FileUtils.forceMkdir(dir);
+            String fileName = FilenameUtils.getName(resource);
+            file = new File(dir.getPath() + "/" + fileName);
+            FileUtils.copyInputStreamToFile(stream,file);
+        } catch (IOException e) {
+            throw e;
+        } catch (Throwable e) {
+            throw new IOException(e.getMessage(), e);
+        } finally {
+            try {
+                stream.close();
+            } catch (IOException e) {
+            }
+        }
+    }
+
+    @Override
+    public void writeToFile(String resource, File out) throws IOException {
+        File container = getContainer(resource);
+        String fileName = FilenameUtils.getName(resource);
+        File file = new File(container, fileName);
+        FileUtils.copyFile(file, out);
+    }
+
+    @Override
+    public InputStream getStream(String resource) throws IOException {
+        File container = getContainer(resource);
+        String fileName = FilenameUtils.getName(resource);
+        File file = new File(container, fileName);
+
+        try {
+            return new BufferedInputStream(new FileInputStream(file), DEFAULT_BUFFER_SIZE);
+        } catch (IOException e) {
+            throw e;
+        } catch (Throwable e) {
+            throw new IOException(e.getMessage(), e);
+        }
+    }
+
+    @Override
+    public long getTotalSize() {
+        long size = 0;
+        File docDir = getRoot();
+        if (docDir.exists())
+            size = FileUtils.sizeOfDirectory(docDir);
+
+        return size;
+    }
+
+    @Override
+    public byte[] getBytes( String resource, long start, long length) throws IOException {
+        File container = getContainer(resource);
+        String fileName = FilenameUtils.getName(resource);
+        File file = new File(container, fileName);
+
+        return FileUtil.toByteArray(file, start, length);
+    }
+
+    @Override
+    public void delete(String resource) throws IOException {
+        String fileName = FilenameUtils.getName(resource);
+        File file = new File(getContainer(resource), fileName);
+        if (file.exists())
+            FileUtils.forceDelete(file);
+
+//            try {
+//                FileUtils.forceDelete(file);
+//            } catch (IOException e) {
+//                log.error(e.getMessage());
+//            }
+    }
+
+    @Override
+    public List<String> getFileNames(String path) {
+        List<String> resources = new ArrayList<String>();
+        //File container = getContainer(resource);
+        File container =new File(getRoot(),path);
+                File[] buf = container.listFiles(new FilenameFilter() {
+            @Override
+            public boolean accept(File dir, String name) {
+                if (name.startsWith("."))
+                    return false;
+//                else if (StringUtils.isNotEmpty(fileVersion)) {
+//                    return name.startsWith(fileVersion);
+//                }
+                return true;
+            }
+        });
+        if (buf != null)
+            for (File file : buf) {
+                resources.add(file.getName());
+            }
+        return resources;
+    }
+
+    @Override
+    public List<FileItem> getFileItems(String path) throws Exception {
+        List<FileItem> fileItemList = new ArrayList<>();
+
+        //String fullPath = StringUtil.removeDuplicateSeparator(filePath + path);
+        //File file = new File(fullPath);
+        File file =new File(getRoot(),path);
+
+        if (!file.exists()) {
+            throw new FileNotFoundException("File not exist !");
+        }
+
+        File[] files = file.listFiles();
+
+        if (files == null) {
+            return fileItemList;
+        }
+        for (File f : files) {
+            FileItem item = new FileItem();
+            item.setType(f.isDirectory() ? FileType.FOLDER : FileType.FILE);
+            item.setMtime(file.lastModified());
+
+            try {
+                FileTime creationTime = (FileTime) Files.getAttribute(file.toPath(), "creationTime");
+                item.setCtime(creationTime.toMillis());
+            } catch (IOException ex) {
+                // handle exception
+            }
+            item.setSize(f.length());
+            item.setName(f.getName());
+            item.setPath(path);
+            ///QQQ 这是下载地址,暂时用不上
+//            if (f.isFile()) {
+//                item.setUrl(getDownloadUrl(StringUtil.concatUrl(path, f.getName())));
+//            }
+            fileItemList.add(item);
+        }
+
+        return fileItemList;
+    }
+
+    @Override
+    public boolean copy(String source, String target) throws Exception {
+        File container = getContainer(source);
+        String fileName = FilenameUtils.getName(source);
+        File src = new File(container, fileName);
+
+        File container2 = getContainer(target);
+        String fileName2 = FilenameUtils.getName(target);
+        File dest = new File(container2, fileName2);
+
+        try {
+            FileUtils.copyFile(src, dest);
+            return true;
+        }catch (Exception e){
+            throw e;
+        }
+    }
+
+    @Override
+    public boolean move(String source, String target) throws Exception {
+        File container = getContainer(source);
+        String fileName = FilenameUtils.getName(source);
+        File src = new File(container, fileName);
+
+        File container2 = getContainer(target);
+        String fileName2 = FilenameUtils.getName(target);
+        File dest = new File(container2, fileName2);
+
+        if (src.isDirectory()) {
+            src.renameTo(dest);
+            return true;
+        }else{
+            try {
+                FileUtils.moveFile(src, dest);
+                return true;
+            } catch (Exception e) {
+                throw e;
+            }
+        }
+    }
+
+    @Override
+    public FileItem getFileItem(String resource) throws IOException {
+        File container = getContainer(resource);
+
+        String fileName = FilenameUtils.getName(resource);
+        File file = new File(container, fileName);
+
+        if (!file.exists()) {
+            //throw new IOException();
+            return null;
+        }
+
+        FileItem item = new FileItem();
+        item.setType(file.isDirectory() ? FileType.FOLDER : FileType.FILE);
+        item.setMtime(file.lastModified());
+
+        try {
+            FileTime creationTime = (FileTime) Files.getAttribute(file.toPath(), "creationTime");
+            item.setCtime(creationTime.toMillis());
+        } catch (IOException ex) {
+            // handle exception
+            log.error(ex.getMessage());
+            throw ex;
+        }
+        item.setSize(file.length());
+        item.setName(file.getName());
+        item.setPath(resource);
+//        if (file.isFile()) {
+//            item.setUrl(getDownloadUrl(resource));
+//        }
+
+        return item;
+    }
+
+
+    @Override
+    public boolean createDirectory(String uri) throws Exception {
+        String dir = getRoot().getPath() + "/" +uri;
+        // File dir = getContainer(uri);
+        try {
+            FileUtils.forceMkdir(new File(dir));
+            return true;
+        }catch (Exception e){
+            throw e;
+        }
+    }
+
+    private String getDownloadUrl(String path) {
+        return StringUtil.removeDuplicateSeparator( getBaseDownloadPath()+ StringUtil.PATH_SEPARATOR + path);
+    }
+
+    @Override
+    public long size(String resource) {
+        File file = getContainer(resource);
+        String fileName = FilenameUtils.getName(resource);
+        file = new File(file, fileName);
+        return file.length();
+    }
+
+    @Override
+    public boolean exists(String resource) {
+        File file = getContainer(resource);
+        String fileName = FilenameUtils.getName(resource);
+        file = new File(file, fileName);
+        return file.exists();
+    }
+
+    @Override
+    public List<String> getParameterNames() {
+        return new ArrayList<String>();
+    }
+
+
+}

+ 548 - 0
gfs/src/main/java/com/giantan/gfs/storer/impl/S3Storer.java

@@ -0,0 +1,548 @@
+package com.giantan.gfs.storer.impl;
+
+import com.giantan.gfs.storer.FileItem;
+import com.giantan.gfs.storer.FileType;
+import com.giantan.gfs.storer.util.StringUtil;
+import org.apache.commons.io.FilenameUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
+import software.amazon.awssdk.auth.credentials.AwsCredentials;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.core.ResponseInputStream;
+import software.amazon.awssdk.core.sync.RequestBody;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class S3Storer extends AbstractStorer {
+
+    protected static Logger log = LoggerFactory.getLogger(S3Storer.class);
+
+    String PATH_SEPARATOR = "/";
+
+    //protected static final String BUCKET = "bucket";
+    private String endpoint;
+
+    //@Value("${minio.server.accessKey:admin}")
+    private String accessKey;
+
+    //@Value("${minio.server.secretKey:12345678}")
+    private String secretKey;
+
+    private String bucket;
+
+    private S3Client s3;
+
+    public S3Storer() {
+        super();
+    }
+
+    public S3Storer(S3Client s3client, String bucket) {
+        this.s3 = s3client;
+        this.bucket = bucket;
+    }
+
+    public S3Storer(S3Client s3client, String endpoint,String bucket) {
+        this.s3 = s3client;
+        this.endpoint = endpoint;
+        this.bucket = bucket;
+    }
+
+    public S3Storer(String endpoint, String accessKey, String secretKey, String bucket) {
+        this.endpoint = endpoint;
+        this.accessKey = accessKey;
+        this.secretKey = secretKey;
+        this.bucket = bucket;
+    }
+
+    public boolean isEnabled() {
+        //return LicenseManager.getInstance().isEnabled("Feature_46");
+        return true;
+    }
+
+    public String getBucket() {
+        return bucket;
+    }
+
+    public void setBucket(String bucket) {
+        this.bucket = bucket;
+    }
+
+
+    //protected synchronized S3Client getS3Client() {
+    public synchronized S3Client getS3Client() {
+        if (s3 == null) {
+           s3 = create(endpoint,accessKey,secretKey);
+        }
+        return s3;
+    }
+
+    public void setS3Client(S3Client s3) {
+        this.s3 = s3;
+
+    }
+
+
+    @Override
+    public void store(InputStream is, String path) throws IOException {
+        String objName = path;
+
+        try {
+            PutObjectRequest putOb = PutObjectRequest.builder()
+                    .bucket(bucket)
+                    .key(objName)
+                    //.metadata(metadata)
+                    .build();
+
+            PutObjectResponse res = getS3Client().putObject(putOb, RequestBody.fromInputStream(is, is.available()));
+            log.info(objName + " is successfully uploaded " + " to bucket " + bucket + ".");
+        } catch (IOException e) {
+            //e.printStackTrace();
+            log.error(e.getMessage());
+            throw e;
+        } finally {
+            if (is != null) {
+                try {
+                    is.close();
+                } catch (IOException e) {
+
+                }
+            }
+        }
+
+    }
+
+    @Override
+    public void delete(String path) throws Exception {
+        String dir = path;
+//        try {
+//            List<String> ls = listObjectNames(bucket, dir, false);
+//            if (ls == null || ls.size() <= 0) return;
+//
+//            List<DeleteObject> objects = new LinkedList<>();
+//            for (int i = 0; i < ls.size(); i++) {
+//                objects.add(new DeleteObject(ls.get(i)));
+//            }
+//
+//            Iterable<Result<DeleteError>> results =
+//                    getMinioClient().removeObjects(
+//                            RemoveObjectsArgs.builder().bucket(bucket).objects(objects).build());
+//            for (Result<DeleteError> result : results) {
+//                DeleteError error = result.get();
+//                log.error(
+//                        "Error in deleting object " + error.objectName() + "; " + error.message());
+//            }
+//        } catch (MinioException | InvalidKeyException | IOException | NoSuchAlgorithmException e) {
+//            log.error("Error occurred: " + e);
+//            throw e;
+//        }
+
+        try {
+//            DeleteObjectRequest deleteObjectRequest = DeleteObjectRequest.builder()
+//                    .bucket(bucket)
+//                    .key(path)
+//                    .build();
+//            s3.deleteObject(deleteObjectRequest);
+            DeleteObjectsRequest deleteObjectsRequest = DeleteObjectsRequest.builder()
+                    .bucket(bucket)
+                    .delete(Delete.builder().objects(ObjectIdentifier.builder().key(path).build()).build())
+                    .build();
+            DeleteObjectsResponse res = s3.deleteObjects(deleteObjectsRequest);
+            //res.deleted().size()
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+    @Override
+    public List<String> getFileNames(String path) {
+        String dir = path;
+        try {
+            List<String> ls = listObjectNames(bucket, dir, false);
+            return ls;
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        }
+        return null;
+    }
+
+    /**
+     * 获取对象文件名称列表
+     *
+     * @param bucketName 存储桶名称
+     * @param prefix     对象名称前缀
+     * @param sort       是否排序(升序)
+     * @return objectNames
+     */
+    public List<String> listObjectNames(String bucketName, String prefix, Boolean sort) throws Exception {
+        ListObjectsRequest listObjectsArgs;
+        if (null == prefix) {
+            listObjectsArgs = ListObjectsRequest.builder()
+                    .bucket(bucketName)
+                    //.recursive(true)
+                    .build();
+        } else {
+            listObjectsArgs = ListObjectsRequest.builder()
+                    .bucket(bucketName)
+                    .prefix(prefix)
+                    //.recursive(true)
+                    .build();
+        }
+
+//        Iterable<Result<Item>> chunks = getS3Client().listObjects(listObjectsArgs);
+//        List<String> chunkPaths = new ArrayList<>();
+//        for (Result<Item> item : chunks) {
+//            chunkPaths.add(item.get().objectName());
+//        }
+//        if (sort) {
+//            return chunkPaths.stream().distinct().collect(Collectors.toList());
+//        }
+//        return chunkPaths;
+
+        ListObjectsResponse res = getS3Client().listObjects(listObjectsArgs);
+        List<String> chunkPaths = new ArrayList<>();
+        //System.out.println("------directory");
+        List<CommonPrefix> commonPrefixes = res.commonPrefixes();
+        for (CommonPrefix cp : commonPrefixes) {
+            chunkPaths.add(cp.prefix());
+        }
+
+        List<S3Object> objects = res.contents();
+        for (S3Object so : objects) {
+            chunkPaths.add(so.key());
+        }
+        if (sort) {
+            return chunkPaths.stream().distinct().collect(Collectors.toList());
+        }
+        return chunkPaths;
+
+    }
+
+    @Override
+    public List<FileItem> getFileItems(String path) throws Exception {
+        path = StringUtil.removeFirstSeparator(path);
+        path = StringUtil.appendLastSeparator(path);
+        //String fullPath = StringUtil.removeFirstSeparator(StringUtil.getFullPath(basePath, path));
+
+        List<FileItem> fileItemList = new ArrayList<>();
+
+        ListObjectsRequest listObjects = ListObjectsRequest
+                .builder().prefix(path)
+                .bucket(bucket)
+                //.maxKeys(2)
+                .build();
+
+        ListObjectsResponse res = getS3Client().listObjects(listObjects);
+
+        //System.out.println("------directory");
+        List<CommonPrefix> commonPrefixes = res.commonPrefixes();
+        for (CommonPrefix cp : commonPrefixes) {
+            //System.out.println("=="+cp.prefix());
+            FileItem fi = toFileItem(cp, path);
+            fileItemList.add(fi);
+        }
+
+        List<S3Object> objects = res.contents();
+        for (S3Object so : objects) {
+            FileItem fi = toFileItem(so, path);
+            fileItemList.add(fi);
+        }
+        return fileItemList;
+    }
+
+    private FileItem toFileItem(CommonPrefix cp, String path) {
+
+        FileItem fi = new FileItem();
+        String s = cp.prefix();
+        s = StringUtil.removePrefix(s, path);
+        s = StringUtil.removeFirstSeparator(s);
+        s = StringUtil.removeLastSeparator(s);
+        fi.setName(s);
+        //fi.setSize(ii.size());
+
+        fi.setType(FileType.FOLDER);
+
+        fi.setPath(path);
+
+        String fullPathAndName = StringUtil.concatUrl(path, fi.getName());
+        String directlink = getDirectUrl(fullPathAndName);
+        fi.setUrl(directlink);
+        return fi;
+    }
+
+
+    private FileItem toFileItem(S3Object so, String path) {
+
+        FileItem fi = new FileItem();
+        String s = so.key();
+        s = StringUtil.removePrefix(s, path);
+        s = StringUtil.removeFirstSeparator(s);
+        s = StringUtil.removeLastSeparator(s);
+        fi.setName(s);
+        fi.setSize(so.size());
+
+
+        fi.setType(FileType.FILE);
+        Instant instant = so.lastModified();
+
+        // Convert Instant to Date.
+        // Date modified = Date.from(instant);
+
+        fi.setMtime(instant.toEpochMilli());
+
+        fi.setPath(path);
+
+        String fullPathAndName = StringUtil.concatUrl(path, fi.getName());
+        String directlink = getDirectUrl(fullPathAndName);
+        fi.setUrl(directlink);
+        return fi;
+    }
+
+
+    protected String getDirectUrl(String fullPath) {
+//        String url = new StringBuilder("https://").append(bucket)
+//                .append(".")
+//                .append(this.obsProperties.getEndpoint())
+//                .append("/")
+//                .append(ossObjectName)
+//                .toString();
+
+        String url1 = null;
+        if (fullPath.startsWith("/")) {
+            url1 = endpoint + "/" + bucket + fullPath;
+        } else {
+            url1 = endpoint + "/" + bucket + "/" + fullPath;
+        }
+        return url1;
+    }
+
+    @Override
+    public boolean copy(String source, String target) throws Exception {
+        try {
+            CopyObjectRequest copyReq = CopyObjectRequest.builder()
+                    //.copySourceIfMatch(encodedUrl)
+                    .sourceBucket(bucket)
+                    .sourceKey(source)
+                    .destinationBucket(bucket)
+                    .destinationKey(target)
+                    .build();
+
+            CopyObjectResponse copyRes = s3.copyObject(copyReq);
+            return true;
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+    @Override
+    public boolean move(String source, String target) throws Exception {
+        if (copy(source, target)) {
+            delete(source);
+            return true;
+        }
+        return false;
+    }
+
+    @Override
+    public FileItem getFileItem(String path) throws IOException {
+
+        try {
+            ListObjectsRequest request = ListObjectsRequest.builder()
+                    .bucket(bucket)
+                    .prefix(path)
+                    .maxKeys(2)
+                    .delimiter(PATH_SEPARATOR)
+                    .build();
+            ListObjectsResponse res = getS3Client().listObjects(request);
+
+            List<S3Object> objects = res.contents();
+            if (objects!= null && objects.size()>0){
+                if (objects.size() == 1) {
+                    String fullPath = FilenameUtils.getFullPath(path);
+                    FileItem fi = toFileItem(objects.get(0), fullPath);
+                    return fi;
+                }else if (objects.size() >1){
+                    FileItem fi = new FileItem();
+                    fi.setName(path);
+                    fi.setType(FileType.FOLDER);
+                    fi.setPath(path);
+
+                    String fullPathAndName = path; //StringUtil.concatUrl(path, fi.getName());
+                    String directlink = getDirectUrl(fullPathAndName);
+                    fi.setUrl(directlink);
+                    return fi;
+                }
+            }
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw new IOException(e.getMessage());
+        }
+        return null;
+
+    }
+
+    @Override
+    public long size(String path) {
+        try {
+            FileItem fi = getFileItem(path);
+            if (fi != null) {
+                return fi.getSize();
+            }
+        } catch (IOException e) {
+           log.error(e.getMessage());
+        }
+        return 0;
+    }
+
+    @Override
+    public boolean exists(String path) {
+        try {
+            FileItem fi = getFileItem(path);
+            if (fi != null) {
+                return true;
+            }
+        } catch (IOException e) {
+            log.error(e.getMessage());
+        }
+        return false;
+    }
+
+    @Override
+    public InputStream getStream(String path) throws IOException {
+        try {
+            GetObjectRequest go = GetObjectRequest.builder()
+                    .bucket(bucket)
+                    .key(path)
+                    .build();
+
+            ResponseInputStream<GetObjectResponse> res = getS3Client().getObject(go);
+//            if (res != null) {
+//                byte[] bytes = res.readAllBytes();
+//                InputStream is = new ByteArrayInputStream(bytes);
+//                res.close();
+//                return is;
+//            }
+            return res;
+        }catch (Exception e){
+            log.error(e.getMessage());
+            throw new IOException(e.getMessage());
+        }
+    }
+
+    @Override
+    public byte[] getBytes(String path, long start, long length) throws IOException {
+        try {
+            GetObjectRequest go = GetObjectRequest.builder()
+                    .bucket(bucket)
+                    .key(path)
+                    .build();
+
+            ResponseInputStream<GetObjectResponse> res = getS3Client().getObject(go);
+
+            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+            //Array outputStream = new BufferedOutputStream(new FileOutputStream(key));
+
+            byte[] buffer = new byte[4096];
+            int bytesRead = -1;
+            res.skip(start);
+            int nowLen = 0;
+            while ((bytesRead = res.read(buffer)) !=  -1) {
+                if (nowLen + bytesRead < length) {
+                    outputStream.write(buffer, 0, bytesRead);
+                    nowLen = nowLen + bytesRead;
+                }else{
+                    byte[] bs1 = Arrays.copyOf(buffer,(int)length-nowLen);
+                    outputStream.write(bs1, 0, bs1.length);
+                    break;
+                }
+            }
+            res.close();
+
+            byte[] bs2 = outputStream.toByteArray();
+            outputStream.close();
+            return bs2;
+        }catch (Exception e){
+            log.error(e.getMessage());
+            throw new IOException(e.getMessage());
+        }
+    }
+
+    @Override
+    public long getTotalSize() {
+        return 0;
+    }
+
+    @Override
+    public List<String> getParameterNames() {
+        ArrayList<String> ls = new ArrayList();
+        ls.add("url");
+        ls.add("accessKey");
+        ls.add("secretKey");
+        ls.add("bucket");
+        return ls;
+    }
+
+    @Override
+    public boolean createDirectory(String path) throws Exception {
+        // Create object name ending with '/' (mostly called folder or directory).
+        path = StringUtil.appendLastSeparator(path);
+        boolean ok = false;
+        try {
+            PutObjectRequest request = PutObjectRequest.builder()
+                    .bucket(bucket)
+                    .key(path)
+                    .build();
+
+            PutObjectResponse res = getS3Client().putObject(request, RequestBody.empty());
+            ok = true;
+            //System.out.println(path + " is created successfully");
+            return ok;
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+    }
+
+
+    /////////////////
+    public static S3Storer create(String endpoint, String accessKey, String secretKey, String bucket) {
+        S3Storer s3 = new S3Storer(endpoint, accessKey, secretKey, bucket);
+        return s3;
+    }
+
+    public static S3Client create(String endpoint, String accessKey, String secretKey) {
+        S3Client s3 = null;
+        try {
+            AwsCredentialsProvider credentialsProvider = new AwsCredentialsProvider() {
+                @Override
+                public AwsCredentials resolveCredentials() {
+                    AwsCredentials ac = AwsBasicCredentials.create(accessKey, secretKey);
+                    return ac;
+                }
+            };
+            s3 = S3Client.builder()
+                    .region(Region.US_EAST_1)
+                    .credentialsProvider(credentialsProvider)
+                    .endpointOverride(URI.create(endpoint))
+                    .build();
+        } catch (Exception e) {
+            log.error(e.getMessage());
+            throw e;
+        }
+        return s3;
+    }
+
+}

+ 833 - 0
gfs/src/main/java/com/giantan/gfs/storer/util/FileUtil.java

@@ -0,0 +1,833 @@
+package com.giantan.gfs.storer.util;
+
+//import com.giantan.baiying.tasks.ToGbookTask;
+//import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOUtils;
+
+import java.io.*;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.security.MessageDigest;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
+//strongDelete 值得借鉴
+//@Slf4j
+public class FileUtil {
+	private static final org.slf4j.Logger log
+			= org.slf4j.LoggerFactory.getLogger(FileUtil.class);
+	static final int BUFF_SIZE = 8192;
+
+	static final byte[] buffer = new byte[BUFF_SIZE];
+
+	//protected static Logger log = LoggerFactory.getLogger(FileUtil.class);
+
+	public static void writeFile(InputStream in, String filepath) throws Exception {
+		OutputStream os = null;
+		try {
+			os = new FileOutputStream(filepath);
+
+			while (true) {
+				synchronized (buffer) {
+					int amountRead = in.read(buffer);
+					if (amountRead == -1)
+						break;
+					os.write(buffer, 0, amountRead);
+				}
+			}
+		} finally {
+			if (os != null)
+				os.flush();
+			try {
+				if (in != null)
+					in.close();
+				if (os != null)
+					os.close();
+			} catch (IOException e) {
+				logError(e.getMessage());
+			}
+		}
+	}
+
+	public static void writeFile(byte[] in, String filepath) throws Exception {
+		InputStream inStream = null;
+		try {
+			inStream = new ByteArrayInputStream(in);
+			writeFile(inStream, filepath);
+		} finally {
+			try {
+				if (inStream != null)
+					inStream.close();
+			} catch (IOException e) {
+				logError(e.getMessage());
+			}
+		}
+	}
+
+	public static void writeFile(String text, String filepath) {
+		BufferedOutputStream bos = null;
+
+		try {
+			bos = new BufferedOutputStream(new FileOutputStream(filepath));
+			bos.write(text.getBytes("UTF-8"));
+		} catch (Throwable e) {
+			logError(e.getLocalizedMessage());
+		} finally {
+			if (bos != null) {
+				try {
+					bos.flush();
+					bos.close();
+				} catch (Throwable ioe) {
+				}
+			}
+		}
+	}
+
+	public static String readFile(File file) throws IOException {
+		FileInputStream fisTargetFile = null;
+		try {
+			fisTargetFile = new FileInputStream(file);
+			return IOUtils.toString(fisTargetFile, "UTF-8");
+		} finally {
+			try {
+				fisTargetFile.close();
+			} catch (Throwable ioe) {
+			}
+		}
+	}
+
+	public static String readFile(String filePath) throws IOException {
+		return readFile(new File(filePath));
+	}
+
+	public static void appendFile(String text, String filepath) {
+		OutputStream bos = null;
+
+		try {
+			bos = new FileOutputStream(filepath, true);
+			bos.write(text.getBytes());
+		} catch (Exception e) {
+			logError(e.getLocalizedMessage());
+		} finally {
+			if (bos != null) {
+				try {
+					bos.close();
+				} catch (IOException ioe) {
+					;
+				}
+			}
+		}
+	}
+
+	private static void logError(String message) {
+		//Log logger = LogFactory.getLog(FileUtil.class);
+		//logger.error(message);
+		log.error(message);
+	}
+
+	public static String computeDigest(InputStream is) {
+		String digest = "";
+		MessageDigest sha = null;
+
+		try {
+			if (is != null) {
+				sha = MessageDigest.getInstance("SHA-1");
+				byte[] message = new byte[BUFF_SIZE];
+				int len = 0;
+				while ((len = is.read(message)) != -1) {
+					sha.update(message, 0, len);
+				}
+				byte[] messageDigest = sha.digest();
+				// convert the array to String
+				int size = messageDigest.length;
+				StringBuffer buf = new StringBuffer();
+				int unsignedValue = 0;
+				String strUnsignedValue = null;
+				for (int i = 0; i < size; i++) {
+					// convert each messageDigest byte to unsigned
+					unsignedValue = ((int) messageDigest[i]) & 0xff;
+					strUnsignedValue = Integer.toHexString(unsignedValue);
+					// at least two letters
+					if (strUnsignedValue.length() == 1)
+						buf.append("0");
+					buf.append(strUnsignedValue);
+				}
+				digest = buf.toString();
+				log.debug("Computed Digest: {}", digest);
+
+				return digest;
+			}
+		} catch (IOException io) {
+			log.error("Error generating digest: ", io);
+		} catch (Throwable t) {
+			log.error("Error generating digest: ", t);
+		} finally {
+			try {
+				is.close();
+			} catch (IOException e) {
+			}
+		}
+		return null;
+	}
+
+	/**
+	 * This method calculates the digest of a file using the algorithm SHA-1.
+	 * 
+	 * @param file The file for which will be computed the digest
+	 * @return digest
+	 */
+	public static String computeDigest(File file) {
+		InputStream is;
+		try {
+			is = new BufferedInputStream(new FileInputStream(file), BUFF_SIZE);
+			return computeDigest(is);
+		} catch (FileNotFoundException e) {
+			log.error(e.getMessage());
+		}
+		return null;
+	}
+
+	/**
+	 * This method calculates the digest of a string using the algorithm SHA-1.
+	 * 
+	 * @param src The string for which will be computed the digest
+	 * 
+	 * @return digest
+	 */
+	public static String computeDigest(String src) {
+		InputStream is = IOUtils.toInputStream(src, "UTF-8");
+		return computeDigest(is);
+//		InputStream is;
+//		try {
+//			is = IOUtils.toInputStream(src, "UTF-8");
+//			return computeDigest(is);
+//		} catch (IOException e) {
+//			log.error(e.getMessage());
+//		}
+//		return null;
+	}
+
+	/**
+	 * This method calculates the digest of a file using the algorithm SHA-1.
+	 * 
+	 * @param file The file for which will be computed the digest
+	 * @return digest
+	 */
+	public static byte[] computeSha1Hash(File file) {
+		InputStream is = null;
+		try {
+			is = new BufferedInputStream(new FileInputStream(file), BUFF_SIZE);
+			return computeSha1Hash(is);
+		} catch (IOException io) {
+			log.error(io.getMessage(), io);
+		} finally {
+			try {
+				is.close();
+			} catch (IOException e) {
+			}
+		}
+		return null;
+	}
+
+	/**
+	 * This method calculates the digest of a inputStram content using the
+	 * algorithm SHA-1.
+	 * 
+	 * @param is The content of which will be computed the digest
+	 * @return digest
+	 */
+	public static byte[] computeSha1Hash(InputStream is) {
+		MessageDigest sha = null;
+		try {
+			if (is != null) {
+				sha = MessageDigest.getInstance("SHA-1");
+				byte[] message = new byte[BUFF_SIZE];
+				int len = 0;
+				while ((len = is.read(message)) != -1) {
+					sha.update(message, 0, len);
+				}
+				byte[] messageDigest = sha.digest();
+				return messageDigest;
+			}
+		} catch (IOException io) {
+			log.error("Error generating SHA-1: ", io);
+		} catch (Throwable t) {
+			log.error("Error generating SHA-1: ", t);
+		}
+		return null;
+	}
+
+	/**
+	 * Writes the specified classpath resource into a file
+	 * 
+	 * @param resourceName Fully qualified resource name
+	 * @param out The output file
+	 * 
+	 * @throws IOException if the copy caused an error
+	 */
+	public static void copyResource(String resourceName, File out) throws IOException {
+		InputStream is = null;
+		OutputStream os = null;
+		try {
+			try {
+				is = new BufferedInputStream(FileUtil.class.getResource(resourceName).openStream());
+			} catch (Exception e) {
+				is = new BufferedInputStream(
+						Thread.currentThread().getContextClassLoader().getResource(resourceName).openStream());
+			}
+			os = new BufferedOutputStream(new FileOutputStream(out));
+
+			for (;;) {
+				int b = is.read();
+				if (b == -1)
+					break;
+				os.write(b);
+			}
+		} finally {
+			if (is != null)
+				is.close();
+			if (os != null)
+				os.close();
+		}
+	}
+
+	/**
+	 * Computes the folder size as the sum of all files directly and indirectly
+	 * contained.
+	 * 
+	 * @param folder the folder to calculate
+	 * 
+	 * @return the sum of the sizes of all contained files expressed in bytes
+	 */
+	public static long getFolderSize(File folder) {
+		long foldersize = 0;
+
+		File[] files = folder.listFiles();
+		for (int i = 0; i < files.length; i++) {
+			if (files[i].isDirectory()) {
+				foldersize += getFolderSize(files[i]);
+			} else {
+				foldersize += files[i].length();
+			}
+		}
+		return foldersize;
+	}
+
+	/**
+	 * Renders a file size in a more readable behaviour taking into account the
+	 * user locale. Depending on the size, the result will be presented in the
+	 * following measure units: GB, MB, KB or Bytes
+	 * 
+	 * @param size Size to be rendered
+	 * @param language The language for the format symbols
+	 * 
+	 * @return the size as human readable text
+	 */
+	public static String getDisplaySize(long size, String language) {
+		String displaySize = "";
+		Locale locale = new Locale("en");
+		if (isNotEmpty(language))
+			locale = new Locale(language);
+		NumberFormat nf = new DecimalFormat("###,###,###.0", new DecimalFormatSymbols(locale));
+		if (size > 1000000000) {
+			displaySize = nf.format((double) size / 1024 / 1024 / 1024) + " GB";
+		} else if (size > 1000000) {
+			displaySize = nf.format((double) size / 1024 / 1024) + " MB";
+		} else if (size > 1000) {
+			displaySize = nf.format((double) size / 1024) + " KB";
+		} else {
+			displaySize = size + " Bytes";
+		}
+		return displaySize;
+	}
+
+	/**
+	 * Renders a file size in a more readable behaviour taking into account the
+	 * user locale. The size is always rendered in the KB(kilobyte) measure
+	 * unit.
+	 * 
+	 * @param size Size to be rendered
+	 * @param language The language for the format symbols
+	 * 
+	 * @return the size in KB as human readable text
+	 */
+	public static String getDisplaySizeKB(long size, String language) {
+		String displaySize = "";
+		Locale locale = new Locale("en");
+		if (isNotEmpty(language))
+			locale = new Locale(language);
+		NumberFormat nf = new DecimalFormat("###,###,##0.0", new DecimalFormatSymbols(locale));
+		displaySize = nf.format((double) size / 1024) + " KB";
+		return displaySize;
+	}
+
+//	/**
+//	 * Check if a given filename matches the <code>includes</code> and not the
+//	 * <code>excludes</code>
+//	 *
+//	 * @param filename The filename to consider
+//	 * @param includes list of includes expressions (eg. *.doc,*dummy*)
+//	 * @param excludes list of excludes expressions (eg. *.doc,*dummy*)
+//	 *
+//	 * @return true only if the passed filename matches the includes and not the
+//	 *         excludes
+//	 */
+//	public static boolean matches(String filename, String[] includes, String[] excludes) {
+//		// First of all check if the filename must be excluded
+//		if (excludes != null && excludes.length > 0)
+//			for (String s : excludes)
+//				if (StringUtils.isNotEmpty(s) && SelectorUtils.match(s, filename, false))
+//					return false;
+//
+//		// Then check if the filename must can be included
+//		if (includes != null && includes.length > 0)
+//			for (String s : includes)
+//				if (StringUtils.isNotEmpty(s) && SelectorUtils.match(s, filename, false))
+//					return true;
+//
+//		if (includes == null || includes.length == 0)
+//			return true;
+//		else
+//			return false;
+//	}
+
+//	/**
+//	 * Check if a given filename matches the <code>includes</code> and not the
+//	 * <code>excludes</code>
+//	 *
+//	 * @param filename The filename to consider
+//	 * @param includes comma-separated list of includes expressions (eg.
+//	 *        *.doc,*dummy*)
+//	 * @param excludes comma-separated list of excludes expressions (eg.
+//	 *        *.doc,*dummy*)
+//	 * @return true only if the passed filename matches the includes and not the
+//	 *         excludes
+//	 */
+//	public static boolean matches(String filename, String includes, String excludes) {
+//		List<String> inc = new ArrayList<String>();
+//		List<String> exc = new ArrayList<String>();
+//
+//		StringTokenizer st;
+//
+//		if (StringUtils.isNotEmpty(excludes)) {
+//			st = new StringTokenizer(excludes, ",", false);
+//			while (st.hasMoreTokens())
+//				exc.add(st.nextToken().trim());
+//		}
+//
+//		if (StringUtils.isNotEmpty(includes)) {
+//			st = new StringTokenizer(includes, ",", false);
+//			while (st.hasMoreTokens())
+//				inc.add(st.nextToken().trim());
+//		}
+//
+//		return matches(filename, inc.toArray(new String[0]), exc.toArray(new String[0]));
+//	}
+
+	public static void writeUTF8(String content, File file, boolean append) {
+		BufferedWriter out = null;
+		try {
+			out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, append), "UTF8"));
+			out.write(content);
+		} catch (Exception e) {
+			e.printStackTrace();
+		} finally {
+			if (out != null)
+				try {
+					out.close();
+				} catch (IOException e) {
+
+				}
+		}
+	}
+
+	public static byte[] toByteArray(File file) {
+		InputStream is = null;
+		try {
+			is = new BufferedInputStream(new FileInputStream(file), 2048);
+			return IOUtils.toByteArray(is);
+		} catch (IOException e) {
+			log.error(e.getMessage());
+		} finally {
+			if (is != null)
+				try {
+					is.close();
+				} catch (IOException e) {
+				}
+		}
+		return null;
+	}
+
+	public static byte[] toByteArray(RandomAccessFile input, long start, long length) throws IOException {
+		ByteArrayOutputStream output = null;
+		try {
+			// Open streams.
+			output = new ByteArrayOutputStream();
+			copy(input, output, start, length);
+			output.flush();
+			return output.toByteArray();
+		} finally {
+			try {
+				input.close();
+			} catch (Throwable e) {
+			}
+			try {
+				output.close();
+			} catch (Throwable e) {
+			}
+		}
+	}
+
+	public static byte[] toByteArray(File file, long start, long length) throws IOException {
+		return toByteArray(new RandomAccessFile(file, "r"), start, length);
+	}
+
+	/**
+	 * Copies the input file into the output at the given offset
+	 * 
+	 * @param input the file to copy
+	 * @param output the target file to copy to
+	 * @param offset an offset to use in copying from the input expressed in
+	 *        number of bytes
+	 * 
+	 * @throws IOException raised in case of error
+	 */
+	public static void copy(File input, File output, long offset) throws IOException {
+		RandomAccessFile inputRa = new RandomAccessFile(input, "r");
+		RandomAccessFile outputRa = new RandomAccessFile(output, "rw");
+		FileChannel sourceChannel = inputRa.getChannel();
+		FileChannel targetChannel = outputRa.getChannel();
+		try {
+			targetChannel.transferFrom(sourceChannel, offset, input.length());
+		} finally {
+			try {
+				sourceChannel.close();
+			} catch (Throwable e) {
+
+			}
+
+			try {
+				inputRa.close();
+			} catch (Throwable e) {
+
+			}
+
+			try {
+				targetChannel.close();
+			} catch (Throwable e) {
+
+			}
+
+			try {
+				outputRa.close();
+			} catch (Throwable e) {
+
+			}
+		}
+	}
+
+	/**
+	 * Copy the given byte range of the given input to the given output.
+	 * 
+	 * @param input The input to copy the given range to the given output for.
+	 * @param output The output to copy the given range from the given input
+	 *        for.
+	 * @param start Start of the byte range.
+	 * @param length Length of the byte range.
+	 * @throws IOException If something fails at I/O level.
+	 */
+	private static void copy(RandomAccessFile input, OutputStream output, long start, long length) throws IOException {
+		byte[] buffer = new byte[BUFF_SIZE];
+		int read;
+
+		if (input.length() == length) {
+			// Write full range.
+			while ((read = input.read(buffer)) > 0) {
+				output.write(buffer, 0, read);
+			}
+		} else {
+			// Write partial range.
+			input.seek(start);
+			long toRead = length;
+
+			while ((read = input.read(buffer)) > 0) {
+				if ((toRead -= read) > 0) {
+					output.write(buffer, 0, read);
+				} else {
+					output.write(buffer, 0, (int) toRead + read);
+					break;
+				}
+			}
+		}
+	}
+
+	public static void replaceInFile(String sourcePath, String token, String newValue) throws Exception {
+		BufferedReader reader = null;
+		Writer writer = null;
+		String oldContent = "";
+
+		File tmp = new File(sourcePath + ".tmp");
+		File file = new File(sourcePath);
+
+		try {
+			reader = new BufferedReader(new InputStreamReader(new FileInputStream(sourcePath), "UTF-8"));
+
+			// Reading all the lines of input text file into oldContent
+			String line = reader.readLine();
+
+			while (line != null) {
+				oldContent = oldContent + line + System.lineSeparator();
+				line = reader.readLine();
+			}
+
+			// Replacing oldString with newString in the oldContent
+			String newContent = oldContent.replaceAll(token, newValue);
+
+			// Rewriting the input text file with newContent
+			writer = new OutputStreamWriter(new FileOutputStream(tmp), "UTF-8");
+
+			writer.write(newContent);
+		} catch (IOException ioe) {
+			log.error(ioe.getMessage(), ioe);
+		} finally {
+			try {
+				// Closing the resources
+				reader.close();
+				writer.close();
+			} catch (IOException e) {
+				log.error(e.getMessage(), e);
+			}
+		}
+
+		file.delete();
+		tmp.renameTo(file);
+	}
+
+	/**
+	 * Quickest way to copy a file in Java, makes use of NIO buffer.
+	 * 
+	 * @param source the source file to copy
+	 * @param target the target file
+	 * 
+	 * @throws IOException if the copy resulted in an error
+	 */
+	@SuppressWarnings("resource")
+	public static void copyFile(File source, File target) throws IOException {
+		FileChannel in = null;
+		FileChannel out = null;
+
+		try {
+			in = new FileInputStream(source).getChannel();
+			out = new FileOutputStream(target).getChannel();
+
+			ByteBuffer buffer = ByteBuffer.allocateDirect(BUFF_SIZE);
+			while (in.read(buffer) != -1) {
+				// For Java8 compatibility
+				((Buffer) buffer).flip();
+
+				while (buffer.hasRemaining()) {
+					out.write(buffer);
+				}
+				// For Java8 compatibility
+				((Buffer) buffer).clear();
+			}
+		} catch (IOException e) {
+			log.warn(e.getMessage());
+		} finally {
+			close(in);
+			close(out);
+		}
+	}
+
+	private static void close(Closeable closable) {
+		if (closable != null) {
+			try {
+				closable.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+	}
+
+	public static void strongDelete(File file) {
+		if (file == null || !file.exists())
+			return;
+
+		try {
+			if (file != null && file.exists())
+				FileUtils.forceDelete(file);
+		} catch (IOException e) {
+			log.warn(e.getMessage());
+		}
+
+		if (file != null && file.exists())
+			try {
+				log.debug("Delete file {} using OS command", file.getAbsolutePath());
+				if (SystemUtil.isUnix() || SystemUtil.isMac() || SystemUtil.isSolaris())
+					Runtime.getRuntime().exec("rm -rf \"" + file.getAbsolutePath() + "\"");
+				else
+					Runtime.getRuntime().exec("cmd /C del /F /Q \"" + file.getAbsolutePath() + "\"");
+			} catch (IOException e) {
+				log.warn(e.getMessage(), e);
+			}
+	}
+
+	public static boolean isDirEmpty(final Path directory) throws IOException {
+		try (DirectoryStream<Path> dirStream = Files.newDirectoryStream(directory)) {
+			return !dirStream.iterator().hasNext();
+		}
+	}
+
+	public static void merge(File f1, File f2, File merged) throws IOException {
+		copyFile(f1, merged);
+		Files.write(merged.toPath(), Files.readAllBytes(f2.toPath()), StandardOpenOption.APPEND);
+	}
+
+	public static void merge(List<File> files, File merged) throws IOException {
+		merged.createNewFile();
+		File tmp = new File(merged.getParent(), "tmp");
+		tmp.createNewFile();
+
+		try {
+			for (File chunk : files) {
+				FileUtil.merge(merged, chunk, tmp);
+				FileUtils.forceDelete(merged);
+				tmp.renameTo(merged);
+				tmp = new File(merged.getParent(), "tmp");
+				tmp.createNewFile();
+			}
+		} finally {
+			if (tmp != null && tmp.exists())
+				FileUtils.forceDelete(tmp);
+				//FileUtils.deleteQuietly(tmp);
+		}
+
+	}
+
+	public static List<File> split(File file, long chunkSize, File destDir) throws IOException {
+		RandomAccessFile raf = null;
+		List<File> chunks = new ArrayList<File>();
+		try {
+			raf = new RandomAccessFile(file, "r");
+			long numSplits = file.length() / chunkSize;
+			long sourceSize = raf.length();
+			long remainingBytes = sourceSize - (numSplits * chunkSize);
+
+			NumberFormat nf = new DecimalFormat("00000000000000");
+			int maxReadBufferSize = 8 * 1024; // 8KB
+			for (int destIx = 1; destIx <= numSplits; destIx++) {
+				File chunkFile = new File(destDir, nf.format(destIx));
+				BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream(chunkFile));
+				try {
+					if (chunkSize > maxReadBufferSize) {
+						long numReads = chunkSize / maxReadBufferSize;
+						long numRemainingRead = chunkSize % maxReadBufferSize;
+						for (int i = 0; i < numReads; i++) {
+							readWrite(raf, bw, maxReadBufferSize);
+						}
+						if (numRemainingRead > 0) {
+							readWrite(raf, bw, numRemainingRead);
+						}
+					} else {
+						readWrite(raf, bw, chunkSize);
+					}
+					chunks.add(chunkFile);
+				} finally {
+					bw.close();
+				}
+			}
+
+			if (remainingBytes > 0) {
+				File chunkFile = new File(destDir, nf.format(numSplits + 1));
+				BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream(chunkFile));
+				readWrite(raf, bw, remainingBytes);
+				bw.close();
+				chunks.add(chunkFile);
+			}
+		} finally {
+			if (raf != null)
+				try {
+					raf.close();
+				} catch (Throwable t) {
+				}
+		}
+		return chunks;
+	}
+
+	static private void readWrite(RandomAccessFile raf, BufferedOutputStream bw, long numBytes) throws IOException {
+		byte[] buf = new byte[(int) numBytes];
+		int val = raf.read(buf);
+		if (val != -1) {
+			bw.write(buf);
+		}
+	}
+
+	public static long countLines(File file) throws IOException {
+		InputStream is = new BufferedInputStream(new FileInputStream(file));
+		try {
+			byte[] c = new byte[1024];
+			long count = 0;
+			int readChars = 0;
+			boolean empty = true;
+			while ((readChars = is.read(c)) != -1) {
+				empty = false;
+				for (int i = 0; i < readChars; ++i) {
+					if (c[i] == '\n') {
+						++count;
+					}
+				}
+			}
+			return (count == 0 && !empty) ? 1 : count;
+		} finally {
+			is.close();
+		}
+	}
+
+	/**
+	 * Checks if <b>file</b> is in side <b>folder</b> at any level.
+	 *
+	 * @param folder the folder to inspect
+	 * @param file the file to check
+	 * 
+	 * @return true if <b>file</b> is in side <b>folder</b>
+	 */
+	public static boolean isInsideFolder(File folder, File file) {
+		try {
+			return file.getCanonicalPath().startsWith(folder.getCanonicalPath());
+		} catch (IOException e) {
+			log.error(e.getMessage(), e);
+			return false;
+		}
+	}
+
+	public static boolean isNotEmpty(String s){
+		if (s != null && s.length()>0){
+			return true;
+		}
+		return false;
+	}
+
+
+	public static String getFileNoExt(String fn) {
+		String fn1 = FilenameUtils.getFullPath(fn)+FilenameUtils.getBaseName(fn);
+		return fn1;
+	}
+
+	public static boolean isEndsWithSeparator(String path) {
+		if (path.endsWith("/") || path.endsWith("\\")) {
+			return true;
+		}
+		return false;
+	}
+}

+ 142 - 0
gfs/src/main/java/com/giantan/gfs/storer/util/J7Zip.java

@@ -0,0 +1,142 @@
+package com.giantan.gfs.storer.util;
+
+import net.sf.sevenzipjbinding.*;
+import net.sf.sevenzipjbinding.impl.RandomAccessFileInStream;
+import net.sf.sevenzipjbinding.simple.ISimpleInArchive;
+import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.FilenameUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.Arrays;
+import java.util.Date;
+
+public class J7Zip {
+    protected static Logger log = LoggerFactory.getLogger(J7Zip.class);
+//    static {
+//        try {
+//            SevenZip.initSevenZipFromPlatformJAR();
+//            System.out.println("7-Zip-JBinding library was initialized");
+//        } catch (SevenZipNativeInitializationException e) {
+//            e.printStackTrace();
+//        }
+//    }
+
+    public static void un7z(String filepath, String targetFilePath) {
+        //TODO: 后面还要增加 7z.001 的处理
+        if (filepath.endsWith(".part1.rar")){
+            OpenMultipartArchiveRar.un7z(filepath,targetFilePath);
+        }else {
+            un7z1(filepath, targetFilePath);
+            if (filepath.endsWith(".tgz")) {
+                String tarName = FilenameUtils.getBaseName(filepath) + ".tar";
+                File tarFile = new File(targetFilePath, tarName);
+                un7z1(tarFile.getPath(), targetFilePath);
+                FileUtils.deleteQuietly(tarFile);
+            }
+        }
+    }
+
+    public static void un7z1(String filepath, String targetFilePath) {
+        final File file = new File(targetFilePath);
+        if (!file.exists()) {
+            file.mkdirs();
+        }
+        RandomAccessFile randomAccessFile = null;
+        IInArchive inArchive = null;
+
+        try {
+            randomAccessFile = new RandomAccessFile(filepath, "r");
+            inArchive = SevenZip.openInArchive(null,
+                    new RandomAccessFileInStream(randomAccessFile));
+
+            ISimpleInArchive simpleInArchive = inArchive.getSimpleInterface();
+            //System.out.println(inArchive.getNumberOfItems());
+
+            for (final ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
+                final int[] hash = new int[]{0};
+                if (!item.isFolder()) {
+                    ExtractOperationResult result;
+
+                    final long[] sizeArray = new long[1];
+                    //System.out.println("path="+item.getPath());
+                    String archName = item.getPath();
+                    if (archName == null || archName.isEmpty()){
+                        String ext = FilenameUtils.getExtension(filepath);
+                        archName = FilenameUtils.getBaseName(filepath)+".tar";
+                    }
+                    File tarFile = new File(file + File.separator + archName);
+                    if (!tarFile.getParentFile().exists()) {
+                        tarFile.getParentFile().mkdirs();
+                    }
+
+                    final FileOutputStream fos = new FileOutputStream(tarFile.getAbsolutePath());
+
+                    result = item.extractSlow(new ISequentialOutStream() {
+                        public int write(byte[] data) throws SevenZipException {
+                            try {
+                                //System.out.println(tarFile.getName()+"  "+data.length);
+                                fos.write(data);
+                            } catch (Exception e) {
+                               log.error(e.getMessage());
+                            }
+
+                            hash[0] ^= Arrays.hashCode(data);
+                            sizeArray[0] += data.length;
+                            return data.length;
+                        }
+                    });
+                    fos.close();
+
+                    Date lastWriteTime = item.getLastWriteTime();
+                    if ( lastWriteTime!= null) {
+                        tarFile.setLastModified(lastWriteTime.getTime());
+                    }
+//                    if (result == ExtractOperationResult.OK) {
+//                        // System.out.println(String.format("%9X | %10s | %s", //
+//                        //  hash[0], sizeArray[0], item.getPath()));
+//                    } else {
+//                        // System.err.println("Error extracting item: " + result);
+//                    }
+
+                    if (result == ExtractOperationResult.OK) {
+                        log.debug(String.format("%9X | %10s | %s",
+                                hash[0], sizeArray[0], item.getPath()));
+                    } else {
+                        log.error("Error extracting item: " + result);
+                    }
+                }
+            }
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        } finally {
+            if (inArchive != null) {
+                try {
+                    inArchive.close();
+                } catch (SevenZipException e) {
+                    //e.printStackTrace();
+                    log.error(e.getMessage());
+                }
+            }
+            if (randomAccessFile != null) {
+                try {
+                    randomAccessFile.close();
+                } catch (IOException e) {
+                    //e.printStackTrace();
+                    log.error(e.getMessage());
+                }
+            }
+        }
+    }
+
+//    public static void main(String[] args) {
+//        String fn = "D:\\data\\语料\\石油化工\\规章制度\\CS01-BGJD-001-2015长庆石化公司公务接待管理规定.zip";
+//        String dst = "d:\\tmp\\cq";
+//        un7z(fn, dst);
+//    }
+}

+ 21 - 0
gfs/src/main/java/com/giantan/gfs/storer/util/ObjectUtil.java

@@ -0,0 +1,21 @@
+
+package com.giantan.gfs.storer.util;
+
+import org.springframework.lang.Nullable;
+
+/**
+ * 对象工具类
+ *
+ */
+public class ObjectUtil extends org.springframework.util.ObjectUtils {
+
+	/**
+	 * 判断元素不为空
+	 * @param obj object
+	 * @return boolean
+	 */
+	public static boolean isNotEmpty(@Nullable Object obj) {
+		return !ObjectUtil.isEmpty(obj);
+	}
+
+}

+ 229 - 0
gfs/src/main/java/com/giantan/gfs/storer/util/OpenMultipartArchiveRar.java

@@ -0,0 +1,229 @@
+package com.giantan.gfs.storer.util;
+
+//import lombok.extern.slf4j.Slf4j;
+import net.sf.sevenzipjbinding.*;
+import net.sf.sevenzipjbinding.impl.RandomAccessFileInStream;
+import net.sf.sevenzipjbinding.simple.ISimpleInArchive;
+import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
+
+import java.io.*;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+//@Slf4j
+public class OpenMultipartArchiveRar {
+    private static final org.slf4j.Logger log
+            = org.slf4j.LoggerFactory.getLogger(OpenMultipartArchiveRar.class);
+
+    private static class ArchiveOpenVolumeCallback
+            implements IArchiveOpenVolumeCallback, IArchiveOpenCallback {
+
+        /**
+         * Cache for opened file streams
+         */
+        private Map<String, RandomAccessFile> openedRandomAccessFileList =
+                new HashMap<String, RandomAccessFile>();
+
+        /**
+         * Name of the last volume returned by {@link #getStream(String)}
+         */
+        private String name ;
+
+        /**
+         * This method should at least provide the name of the last
+         * opened volume (propID=PropID.NAME).
+         *
+         * @see IArchiveOpenVolumeCallback#getProperty(PropID)
+         */
+        public Object getProperty(PropID propID) throws SevenZipException {
+            switch (propID) {
+                case NAME:
+                    return name ;
+            }
+            return null;
+        }
+
+        /**
+         * The name of the required volume will be calculated out of the
+         * name of the first volume and a volume index. In case of RAR file,
+         * the substring ".partNN." in the name of the volume file will
+         * indicate a volume with id NN. For example:
+         * <ul>
+         * <li>test.rar - single part archive or multi-part archive with
+         * a single volume</li>
+         * <li>test.part23.rar - 23-th part of a multi-part archive</li>
+         * <li>test.part001.rar - first part of a multi-part archive.
+         * "00" indicates, that at least 100 volumes must exist.</li>
+         * </ul>
+         */
+        public IInStream getStream(String filename) throws SevenZipException {
+            try {
+                // We use caching of opened streams, so check cache first
+                RandomAccessFile randomAccessFile = openedRandomAccessFileList
+                        .get(filename);
+                if (randomAccessFile != null) { // Cache hit.
+                    // Move the file pointer back to the beginning
+                    // in order to emulating new stream
+                    randomAccessFile.seek(0);
+
+                    // Save current volume name in case getProperty() will be called
+                    name = filename;
+
+                    return new RandomAccessFileInStream(randomAccessFile);
+                }
+
+                // Nothing useful in cache. Open required volume.
+                randomAccessFile = new RandomAccessFile(filename, "r");
+
+                // Put new stream in the cache
+                openedRandomAccessFileList.put(filename, randomAccessFile);
+
+                // Save current volume name in case getProperty() will be called
+                name = filename;
+                return new RandomAccessFileInStream(randomAccessFile);
+            } catch (FileNotFoundException fileNotFoundException) {
+                // Required volume doesn't exist. This happens if the volume:
+                // 1. never exists. 7-Zip doesn't know how many volumes should
+                //    exist, so it have to try each volume.
+                // 2. should be there, but doesn't. This is an error case.
+
+                // Since normal and error cases are possible,
+                // we can't throw an error message
+                return null; // We return always null in this case
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        /**
+         * Close all opened streams
+         */
+        void close() throws IOException {
+            for (RandomAccessFile file : openedRandomAccessFileList.values()) {
+                file.close();
+            }
+        }
+
+        public void setCompleted(Long files, Long bytes) throws SevenZipException {
+        }
+
+        public void setTotal(Long files, Long bytes) throws SevenZipException {
+        }
+    }
+
+    public static void un7z(String filepath, String targetFilePath) {
+
+        final File file = new File(targetFilePath);
+        if (!file.exists()) {
+            file.mkdirs();
+        }
+        ArchiveOpenVolumeCallback archiveOpenVolumeCallback = null;
+        IInArchive inArchive = null;
+        try {
+
+            archiveOpenVolumeCallback = new ArchiveOpenVolumeCallback();
+            IInStream inStream = archiveOpenVolumeCallback.getStream(filepath);
+            inArchive = SevenZip.openInArchive(ArchiveFormat.RAR, inStream,
+                    archiveOpenVolumeCallback);
+
+
+            ISimpleInArchive simpleInArchive = inArchive.getSimpleInterface();
+
+            for (final ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
+                final int[] hash = new int[]{0};
+                if (!item.isFolder()) {
+                    ExtractOperationResult result;
+
+                    final long[] sizeArray = new long[1];
+                    //System.out.println("path="+item.getPath());
+                    String archName = item.getPath();
+//                    if (archName == null || archName.isEmpty()) {
+//                        String ext = FilenameUtils.getExtension(filepath);
+//                        archName = FilenameUtils.getBaseName(filepath) + ".tar";
+//                    }
+                    File tarFile = new File(file + File.separator + archName);
+                    if (!tarFile.getParentFile().exists()) {
+                        tarFile.getParentFile().mkdirs();
+                    }
+
+                    final FileOutputStream fos = new FileOutputStream(tarFile.getAbsolutePath());
+
+                    result = item.extractSlow(new ISequentialOutStream() {
+                        public int write(byte[] data) throws SevenZipException {
+                            try {
+                                //System.out.println(tarFile.getName()+"  "+data.length);
+                                fos.write(data);
+                            } catch (Exception e) {
+                                log.error(e.getMessage());
+                                //e.printStackTrace();
+                            }
+
+                            hash[0] ^= Arrays.hashCode(data);
+                            sizeArray[0] += data.length;
+                            return data.length;
+                        }
+                    });
+                    fos.close();
+
+                    Date lastWriteTime = item.getLastWriteTime();
+                    if (lastWriteTime != null) {
+                        tarFile.setLastModified(lastWriteTime.getTime());
+                    }
+//                    if (result == ExtractOperationResult.OK) {
+//                        // System.out.println(String.format("%9X | %10s | %s", //
+//                        //  hash[0], sizeArray[0], item.getPath()));
+//                    } else {
+//                        // System.err.println("Error extracting item: " + result);
+//                    }
+
+                    if (result == ExtractOperationResult.OK) {
+                        log.debug(String.format("%9X | %10s | %s",
+                                hash[0], sizeArray[0], item.getPath()));
+                    } else {
+                        log.error("Error extracting item: " + result);
+                    }
+                }
+            }
+
+//            System.out.println("   Size   | Compr.Sz. | Filename");
+//            System.out.println("----------+-----------+---------");
+//            int itemCount = inArchive.getNumberOfItems();
+//            for (int i = 0; i < itemCount; i++) {
+//                System.out.println(String.format("%9s | %9s | %s",
+//                        inArchive.getProperty(i, PropID.SIZE),
+//                        inArchive.getProperty(i, PropID.PACKED_SIZE),
+//                        inArchive.getProperty(i, PropID.PATH)));
+//            }
+        } catch (Exception e) {
+            log.error("Error occurs: " + e);
+        } finally {
+            if (inArchive != null) {
+                try {
+                    inArchive.close();
+                } catch (SevenZipException e) {
+                    log.error("Error closing archive: " + e);
+                }
+            }
+            if (archiveOpenVolumeCallback != null) {
+                try {
+                    archiveOpenVolumeCallback.close();
+                } catch (IOException e) {
+                    log.error("Error closing file: " + e);
+                }
+            }
+        }
+    }
+
+//    public static void main(String[] args) {
+////        if (args.length == 0) {
+////            System.out.println(
+////                    "Usage: java OpenMultipartArchiveRar <first-volume>");
+////            return;
+////        }
+//        String targetFilePath="f:\\test4\\";
+//        String fn = "F:\\企业文档管理\\其他\\吉林寰球\\施工图\\22373-1300-JCEC-LZSH-TL-0531高密装置系统、电气、电信、仪表、给排水、建筑、结构、应力、粉体、总图、暖通专业图纸(2020.10.30)\\应力.part1.rar";
+//        un7z(fn,targetFilePath);
+//    }
+}

+ 316 - 0
gfs/src/main/java/com/giantan/gfs/storer/util/StringUtil.java

@@ -0,0 +1,316 @@
+package com.giantan.gfs.storer.util;
+
+import java.io.*;
+import java.nio.charset.CharacterCodingException;
+import java.text.Normalizer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.stream.Collectors;
+
+public class StringUtil {
+
+    /**
+     * Splits a string into tokens separated by a separator
+     *
+     * @param src       The source string
+     * @param separator The separator character
+     * @param tokenSize Size or each token
+     * @return the string with separators
+     */
+    public static String split(String src, char separator, int tokenSize) {
+        StringBuffer sb = new StringBuffer();
+        String[] tokens = split(src, tokenSize);
+        for (int i = 0; i < tokens.length; i++) {
+            if (sb.length() > 0)
+                sb.append(separator);
+            sb.append(tokens[i]);
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Splits a string into an array of tokens
+     *
+     * @param src       The source string
+     * @param tokenSize size of each token
+     * @return array of splitted tokens
+     */
+    public static String[] split(String src, int tokenSize) {
+        ArrayList<String> buf = new ArrayList<String>();
+        for (int i = 0; i < src.length(); i += tokenSize) {
+            int j = i + tokenSize;
+            if (j > src.length())
+                j = src.length();
+            buf.add(src.substring(i, j));
+        }
+        return buf.toArray(new String[]{});
+    }
+
+    /**
+     * Writes to UFT-8 encoding.
+     *
+     * @param reader the reader over a string
+     * @return a string with the contents readed from the <code>reader</code>
+     * @throws IOException raised in case the <code>reader</code> failed to read
+     *                     or if the string cannot be written
+     */
+    public static String writeToString(Reader reader) throws IOException {
+        return writeToString(reader, "UTF-8");
+    }
+
+    /**
+     * Writes the content from the reader in a string encoded as specified.
+     *
+     * @param reader         Attention, this will be closed at the end of invocation
+     * @param targetEncoding The output string encoding
+     * @return The encoded string
+     * @throws IOException raised in case the <code>reader</code> is unable to
+     *                     get the contents
+     */
+    public static String writeToString(Reader reader, String targetEncoding) throws IOException {
+        String enc = "UTF-8";
+        if (isNotEmpty(targetEncoding))
+            enc = targetEncoding;
+
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        OutputStreamWriter osw = null;
+        try {
+            baos = new ByteArrayOutputStream();
+            osw = new OutputStreamWriter(baos, enc);
+            BufferedWriter bw = new BufferedWriter(osw);
+            BufferedReader br = new BufferedReader(reader);
+            String inputLine;
+            while ((inputLine = br.readLine()) != null) {
+                bw.write(inputLine);
+                bw.newLine();
+            }
+            bw.flush();
+            osw.flush();
+            return new String(baos.toByteArray(), enc);
+        } finally {
+            try {
+                if (reader != null)
+                    reader.close();
+                if (osw != null)
+                    osw.close();
+            } catch (IOException e) {
+            }
+        }
+    }
+
+    public static String writeToString(InputStream is, String targetEncoding) throws IOException {
+        String enc = "UTF-8";
+        if (isNotEmpty(targetEncoding))
+            enc = targetEncoding;
+
+        Writer writer = new StringWriter();
+        char[] buffer = new char[1024];
+        try {
+            Reader reader = new BufferedReader(new InputStreamReader(is, enc));
+            int n;
+            while ((n = reader.read(buffer)) != -1) {
+                writer.write(buffer, 0, n);
+            }
+        } finally {
+            if (is != null)
+                is.close();
+        }
+        return writer.toString();
+    }
+
+    public static String arrayToString(Object[] a, String separator) {
+        StringBuffer result = new StringBuffer("");
+        if (a.length > 0) {
+            result.append(a[0].toString()); // start with the first element
+            for (int i = 1; i < a.length; i++) {
+                result.append(separator);
+                result.append(a[i]);
+            }
+        }
+        return result.toString();
+    }
+
+    public static String collectionToString(Collection<?> collection, String separator) {
+        return String.join(separator, collection.stream().map(o -> o.toString()).collect(Collectors.toList()));
+    }
+
+    public static String removeNonUtf8Chars(String src) throws CharacterCodingException {
+        return src.replace('\uFFFF', ' ').replace('\uD835', ' ');
+    }
+
+    /**
+     * Check if a given string matches the <code>includes</code> and not the
+     * <code>excludes</code>
+     *
+     * @param str      The string to consider
+     * @param includes list of includes regular expressions
+     * @param excludes list of excludes regular expressions
+     * @return true only if the passed string matches the includes and not the
+     * excludes
+     */
+    public static boolean matches(String str, String[] includes, String[] excludes) {
+        // First of all check if the string must be excluded
+        if (excludes != null && excludes.length > 0)
+            for (String s : excludes)
+                if (isNotEmpty(s) && str.matches(s))
+                    return false;
+
+        // Then check if the string must can be included
+        if (includes != null && includes.length > 0)
+            for (String s : includes)
+                if (isNotEmpty(s) && str.matches(s))
+                    return true;
+
+        if (includes == null || includes.length == 0)
+            return true;
+        else
+            return false;
+    }
+
+    /**
+     * Converts the non latin chars in the nearest ASCII char
+     *
+     * @param src the source string to process
+     * @return the unaccented string
+     */
+    public static String unaccent(String src) {
+        return Normalizer.normalize(src, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "");
+    }
+
+
+    public static final char DELIMITER = '/';
+
+    public static final String PATH_SEPARATOR = "/";
+
+    public static final String HTTP_PROTOCAL = "http://";
+
+    public static final String HTTPS_PROTOCAL = "https://";
+
+    /**
+     * 移除 URL 中的第一个 '/'
+     *
+     * @return 如 path = '/folder1/file1', 返回 'folder1/file1'
+     */
+    public static String removeFirstSeparator(String path) {
+        if (path.length() > 0 && path.charAt(0) == DELIMITER) {
+            path = path.substring(1);
+        }
+        return path;
+    }
+
+    /**
+     * 移除 URL 中的最后一个 '/'
+     *
+     * @return 如 path = '/folder1/file1/', 返回 '/folder1/file1'
+     */
+    public static String removeLastSeparator(String path) {
+        if (path.length() > 0 && path.charAt(path.length() - 1) == DELIMITER) {
+            path = path.substring(0, path.length() - 1);
+        }
+        return path;
+    }
+
+    public static String appendLastSeparator(String path) {
+        if (path.endsWith("/")) {
+        } else if (path.endsWith("\\")) {
+            path = path.substring(0, path.length() - 1) + "/";
+        } else {
+            path = path + "/";
+        }
+        return path;
+    }
+
+
+    public static String concatUrl(String path, String name) {
+        return removeDuplicateSeparator(DELIMITER + path + DELIMITER + name);
+    }
+
+
+    /**
+     * 将域名和路径组装成 URL, 主要用来处理分隔符 '/'
+     *
+     * @param domain 域名
+     * @param path   路径
+     * @return URL
+     */
+    public static String concatPath(String domain, String path) {
+        if (path != null && path.length() > 1 && path.charAt(0) != DELIMITER) {
+            path = DELIMITER + path;
+        }
+
+        if (domain != null && domain.charAt(domain.length() - 1) == DELIMITER) {
+            // 这里应该有问题,不应该 -2
+            domain = domain.substring(0, domain.length() - 2);
+        }
+
+        return domain + path;
+    }
+
+    public static String removeDuplicateSeparator(String path) {
+        if (path == null || path.length() < 2) {
+            return path;
+        }
+
+        StringBuilder sb = new StringBuilder();
+
+        if (path.indexOf(HTTP_PROTOCAL) == 0) {
+            sb.append(HTTP_PROTOCAL);
+        } else if (path.indexOf(HTTPS_PROTOCAL) == 0) {
+            sb.append(HTTPS_PROTOCAL);
+        }
+
+        for (int i = sb.length(); i < path.length() - 1; i++) {
+            char current = path.charAt(i);
+            char next = path.charAt(i + 1);
+            if (!(current == DELIMITER && next == DELIMITER)) {
+                sb.append(current);
+            }
+        }
+        sb.append(path.charAt(path.length() - 1));
+        return sb.toString();
+    }
+
+    public static boolean isNullOrEmpty(String s) {
+        return s == null || "".equals(s);
+    }
+
+    public static boolean isNotNullOrEmpty(String s) {
+        return !isNullOrEmpty(s);
+    }
+
+    /**
+     * 获取 basePath + path 的全路径地址.
+     *
+     * @return basePath + path 的全路径地址.
+     */
+    public static String getFullPath(String basePath, String path) {
+        //basePath = ObjectUtil.defaultIfNull(basePath, "");
+        //path = ObjectUtil.defaultIfNull(path, "");
+        return removeDuplicateSeparator(basePath + PATH_SEPARATOR + path);
+    }
+
+
+    public static boolean isEmpty(final CharSequence cs) {
+        return cs == null || cs.length() == 0;
+    }
+
+    public static boolean isNotEmpty(final CharSequence cs) {
+        return !isEmpty(cs);
+    }
+
+//	public static boolean isNoneEmpty(final CharSequence... css) {
+//		return !isAnyEmpty(css);
+//	}
+
+    public static String removePrefix(String path, String prefix) {
+//		if (!"".equals(path) && path.charAt(0) == DELIMITER) {
+//			path = path.substring(1);
+//		}
+        if (path.startsWith(prefix)) {
+            path = path.substring(prefix.length());
+        }
+        return path;
+    }
+
+
+}

+ 176 - 0
gfs/src/main/java/com/giantan/gfs/storer/util/SystemUtil.java

@@ -0,0 +1,176 @@
+package com.giantan.gfs.storer.util;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.lang.management.ManagementFactory;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+
+public class SystemUtil {
+
+	private static Logger log = LoggerFactory.getLogger(SystemUtil.class);
+
+	public static final String SUN_JAVA_COMMAND = "sun.java.command";
+
+	private static String OS = System.getProperty("os.name").toLowerCase();
+
+	public static void main(String[] args) {
+		printEnvironment();
+		System.out.println(printStackTrace());
+	}
+
+	public static String printEnvironment() {
+		StringBuffer sb = new StringBuffer();
+		sb.append(printSystemEnvironment());
+		sb.append("\n\n");
+		sb.append(printJavaEnvironment());
+		return sb.toString();
+	}
+
+	public static String printStackTrace() {
+		StringBuffer sb = new StringBuffer();
+		StackTraceElement[] elements = Thread.currentThread().getStackTrace();
+		for (StackTraceElement element : elements) {
+			if(element.getClassName().equals(Thread.class.getName()) && element.getMethodName().equals("getStackTrace"))
+				continue;
+			if(element.getClassName().equals(SystemUtil.class.getName()) && element.getMethodName().equals("printStackTrace"))
+				continue;
+			sb.append(element.getClassName());
+			sb.append(".");
+			sb.append(element.getMethodName());
+			sb.append("(");
+			sb.append(element.getLineNumber());
+			sb.append(")\n");
+		}
+		return sb.toString();
+	}
+	
+	public static String printSystemEnvironment() {
+		StringBuffer sb = new StringBuffer();
+		Map<String, String> env = System.getenv();
+		for (String key : env.keySet()) {
+			if (sb.length() > 0)
+				sb.append("\n");
+			sb.append(key);
+			sb.append("=");
+			sb.append(env.get(key));
+		}
+		return "#System Environment\n#" + new Date() + "\n" + sb.toString();
+	}
+
+	public static String printJavaEnvironment() {
+		StringWriter writer = new StringWriter();
+		try {
+			System.getProperties().store(new PrintWriter(writer), "Java Environment");
+			return writer.getBuffer().toString();
+		} catch (IOException e) {
+
+		}
+		return "";
+	}
+
+	public static boolean isWindows() {
+		return (OS.indexOf("win") >= 0);
+	}
+
+	public static boolean isMac() {
+		return (OS.indexOf("mac") >= 0);
+	}
+
+	public static boolean isUnix() {
+		return (OS.indexOf("nix") >= 0 || OS.indexOf("nux") >= 0 || OS.indexOf("aix") > 0);
+	}
+
+	public static boolean isSolaris() {
+		return (OS.indexOf("sunos") >= 0);
+	}
+
+	public static String getOS() {
+		if (isWindows()) {
+			return "win";
+		} else if (isMac()) {
+			return "osx";
+		} else if (isUnix()) {
+			return "uni";
+		} else if (isSolaris()) {
+			return "sol";
+		} else {
+			return "err";
+		}
+	}
+
+	/**
+	 * Restarts the current Java application
+	 * 
+	 * @param runBeforeRestart some custom code to be run before restarting
+	 * 
+	 * @throws IOException raised if the application cannot be restarted 
+	 */
+	public static void restartApplication(Runnable runBeforeRestart) throws IOException {
+		try {
+			// java binary
+			String java = System.getProperty("java.home") + "/bin/java";
+			// vm arguments
+			List<String> vmArguments = ManagementFactory.getRuntimeMXBean().getInputArguments();
+			StringBuffer vmArgsOneLine = new StringBuffer();
+			for (String arg : vmArguments) {
+				// if it's the agent argument : we ignore it otherwise the
+				// address of the old application and the new one will be in
+				// conflict
+				if (!arg.contains("-agentlib")) {
+					vmArgsOneLine.append(arg);
+					vmArgsOneLine.append(" ");
+				}
+			}
+			// init the command to execute, add the vm args
+			final StringBuffer cmd = new StringBuffer("\"" + java + "\" " + vmArgsOneLine);
+
+			// program main and program arguments
+			String[] mainCommand = System.getProperty(SUN_JAVA_COMMAND).split(" ");
+			// program main is a jar
+			if (mainCommand[0].endsWith(".jar")) {
+				// if it's a jar, add -jar mainJar
+				cmd.append("-jar " + new File(mainCommand[0]).getPath());
+			} else {
+				// else it's a .class, add the classpath and mainClass
+				cmd.append("-cp \"" + System.getProperty("java.class.path") + "\" " + mainCommand[0]);
+			}
+			// finally add program arguments
+			for (int i = 1; i < mainCommand.length; i++) {
+				cmd.append(" ");
+				cmd.append(mainCommand[i]);
+			}
+
+			// execute the command in a shutdown hook, to be sure that all the
+			// resources have been disposed before restarting the application
+			Runtime.getRuntime().addShutdownHook(new Thread() {
+				@Override
+				public void run() {
+					try {
+						final String message = "Restarting java: " + cmd.toString();
+						log.warn(message);
+						System.out.println(message);
+						Runtime.getRuntime().exec(cmd.toString());
+					} catch (IOException e) {
+						e.printStackTrace();
+					}
+				}
+			});
+			// execute some custom code before restarting
+			if (runBeforeRestart != null)
+				runBeforeRestart.run();
+
+			// exit
+			System.exit(0);
+		} catch (Exception e) {
+			// something went wrong
+			throw new IOException("Error while trying to restart the application", e);
+		}
+	}
+}

+ 73 - 0
gfs/src/main/java/com/giantan/gfs/storer/util/ZipUtil2.java

@@ -0,0 +1,73 @@
+package com.giantan.gfs.storer.util;
+
+
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+
+//使用apache的commons-compress 来解压,避免文件名的中文问题
+public class ZipUtil2 {
+    protected static Logger log = LoggerFactory.getLogger(ZipUtil2.class);
+
+    public static void unzip(File zipFile, String descDir) {
+        ZipArchiveInputStream inputStream = null;
+        try  {
+            inputStream = getZipFile(zipFile);
+            File pathFile = new File(descDir);
+            if (!pathFile.exists()) {
+                pathFile.mkdirs();
+            }
+            ZipArchiveEntry entry = null;
+            while ((entry = inputStream.getNextZipEntry()) != null) {
+                if (entry.isDirectory()) {
+                    File directory = new File(descDir, entry.getName());
+                    directory.mkdirs();
+                } else {
+                    OutputStream os = null;
+                    try {
+                        os = new BufferedOutputStream(new FileOutputStream(new File(descDir, entry.getName())));
+                        //输出文件路径信息
+                        //log.info("解压文件的当前路径为:{}", descDir + entry.getName());
+                        IOUtils.copy(inputStream, os);
+                    } finally {
+                        IOUtils.closeQuietly(os);
+                    }
+                }
+            }
+            final File[] files = pathFile.listFiles();
+            if (files != null && files.length == 1 && files[0].isDirectory()) {
+                // 说明只有一个文件夹
+                FileUtils.copyDirectory(files[0], pathFile);
+                //免得删除错误, 删除的文件必须在/data/demand/目录下。
+                boolean isValid = files[0].getPath().contains("/data/www/");
+                if (isValid) {
+                    FileUtils.forceDelete(files[0]);
+                }
+            }
+            //log.info("******************解压完毕********************");
+            log.info("Unzip {} finished .",zipFile.getName());
+        } catch (Exception e) {
+            log.error("[unzip] ",zipFile.getName(), e.getMessage());
+        }finally {
+            if ( inputStream != null){
+                try {
+                    inputStream.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+    }
+
+    private static ZipArchiveInputStream getZipFile(File zipFile) throws Exception {
+        return new ZipArchiveInputStream(new BufferedInputStream(new FileInputStream(zipFile)));
+    }
+
+
+
+}

+ 78 - 0
gfs/src/test/java/com/giantan/gfs/service/impl/S3GkbServiceTest.java

@@ -0,0 +1,78 @@
+package com.giantan.gfs.service.impl;
+
+import com.giantan.gfs.storer.FileItem;
+import com.giantan.gfs.storer.impl.S3Storer;
+import software.amazon.awssdk.services.s3.S3Client;
+
+import java.util.List;
+
+public class S3GkbServiceTest {
+    //  endpoint: http://127.0.0.1:9000
+    //  accesskey: admin
+    //  secretkey: 12345678
+    //  bucket: gkb
+    //  bucketWeb: baiying
+    String endpoint = "http://127.0.0.1:9000";
+    String accesskey = "admin";
+    String secretkey = "12345678";
+
+    String bucket = "gkb";
+
+    S3Client client;
+
+    S3GkbService gkb;
+
+    public S3GkbServiceTest(){
+
+    }
+    public void init(){
+        S3Client s3 = S3Storer.create(endpoint, accesskey, secretkey);
+        this.client = s3;
+
+        S3GkbService gkb = new S3GkbService(client,endpoint,bucket);
+        gkb.init();
+        this.gkb = gkb;
+    }
+
+    public void renameTest() throws Exception {
+        List<FileItem> ls = gkb.getFileItems("demo11");
+        for (int i = 0; i < ls.size(); i++) {
+            System.out.println(ls.get(i));
+        }
+        String src = "/demo11/source/1000万吨年常减压蒸馏装置-应急操作卡3.md";
+        String dest = "/demo11/source/1000万吨年常减压蒸馏装置-应急操作卡.md";
+        String s = gkb.renameFile(bucket, src, dest);
+        System.out.println(s);
+        System.out.println("=====");
+
+        ls = gkb.getFileItems("demo11");
+        for (int i = 0; i < ls.size(); i++) {
+            System.out.println(ls.get(i));
+        }
+    }
+
+    public void renameTest2() throws Exception {
+        List<FileItem> ls = gkb.getFileItems("demo11");
+        for (int i = 0; i < ls.size(); i++) {
+            System.out.println(ls.get(i));
+        }
+        String src = "/demo11/a";
+        String dest = "/demo11/source/";
+//        src = "/demo11/sourc/";
+//        dest = "/demo11/";
+        int s = gkb.renameFolder(bucket, src, dest);
+        System.out.println(s);
+        System.out.println("=====");
+
+        ls = gkb.getFileItems("demo11");
+        for (int i = 0; i < ls.size(); i++) {
+            System.out.println(ls.get(i));
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        S3GkbServiceTest st = new S3GkbServiceTest();
+        st.init();
+        st.renameTest2();
+    }
+}

+ 133 - 0
gtbook/pom.xml

@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>com.giantan.mds</groupId>
+        <artifactId>mds</artifactId>
+        <version>1.0.0</version>
+    </parent>
+
+    <groupId>org.cnnlp.data.md</groupId>
+    <artifactId>gtbook</artifactId>
+
+    <properties>
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <spring-ai.version>1.0.0</spring-ai.version>
+    </properties>
+
+
+    <dependencies>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>2.0.17</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>2.0.17</version>
+        </dependency>
+
+        <!-- https://search.maven.org/artifact/com.github.f4b6a3/ulid-creator -->
+        <dependency>
+            <groupId>com.github.f4b6a3</groupId>
+            <artifactId>ulid-creator</artifactId>
+            <version>5.2.3</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>2.15.0</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+            <version>2.18.3</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.18.3</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/com.github.cliftonlabs/json-simple -->
+        <dependency>
+            <groupId>com.github.cliftonlabs</groupId>
+            <artifactId>json-simple</artifactId>
+            <version>4.0.1</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/net.sf.trove4j/trove4j -->
+        <dependency>
+            <groupId>net.sf.trove4j</groupId>
+            <artifactId>trove4j</artifactId>
+            <version>2.1.0</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.apache.opennlp/opennlp-tools -->
+        <dependency>
+            <groupId>org.apache.opennlp</groupId>
+            <artifactId>opennlp-tools</artifactId>
+            <version>2.4.0</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.17.1</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/com.vladsch.flexmark/flexmark-all -->
+        <dependency>
+            <groupId>com.vladsch.flexmark</groupId>
+            <artifactId>flexmark-all</artifactId>
+            <version>0.64.8</version>
+        </dependency>
+        <dependency>
+            <groupId>org.projectlombok</groupId>
+            <artifactId>lombok</artifactId>
+            <version>1.18.38</version>
+            <scope>compile</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.springframework.ai</groupId>
+            <artifactId>spring-ai-commons</artifactId>
+            <!--            <version>${SPRING_AI_VERSION}</version>-->
+            <!--            <scope>system</scope>-->
+            <!--            <systemPath>${project.basedir}/lib/spring-ai-core-1.0.0-M6.jar</systemPath>-->
+        </dependency>
+
+        <!--        <dependency>-->
+        <!--            <groupId>org.junit.jupiter</groupId>-->
+        <!--            <artifactId>junit-jupiter</artifactId>-->
+        <!--            <scope>test</scope>-->
+        <!--        </dependency>-->
+
+    </dependencies>
+
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>org.springframework.ai</groupId>
+                <artifactId>spring-ai-bom</artifactId>
+                <version>${spring-ai.version}</version>
+                <type>pom</type>
+                <scope>import</scope>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
+
+</project>

+ 61 - 0
gtbook/src/main/java/com/vladsch/flexmark/ext/obs/comments/Comments.java

@@ -0,0 +1,61 @@
+package com.vladsch.flexmark.ext.obs.comments;
+
+import com.vladsch.flexmark.util.ast.DelimitedNode;
+import com.vladsch.flexmark.util.ast.Node;
+import com.vladsch.flexmark.util.sequence.BasedSequence;
+import org.jetbrains.annotations.NotNull;
+
+public class Comments  extends Node implements DelimitedNode {
+    protected BasedSequence openingMarker = BasedSequence.NULL;
+    protected BasedSequence text = BasedSequence.NULL;
+    protected BasedSequence closingMarker = BasedSequence.NULL;
+
+    @NotNull
+    @Override
+    public BasedSequence[] getSegments() {
+        return new BasedSequence[] { openingMarker, text, closingMarker };
+    }
+
+    @Override
+    public void getAstExtra(@NotNull StringBuilder out) {
+        delimitedSegmentSpan(out, openingMarker, text, closingMarker, "text");
+    }
+
+    public Comments() {
+    }
+
+    public Comments(BasedSequence chars) {
+        super(chars);
+    }
+
+    public Comments(BasedSequence openingMarker, BasedSequence text, BasedSequence closingMarker) {
+        super(openingMarker.baseSubSequence(openingMarker.getStartOffset(), closingMarker.getEndOffset()));
+        this.openingMarker = openingMarker;
+        this.text = text;
+        this.closingMarker = closingMarker;
+    }
+
+    public BasedSequence getOpeningMarker() {
+        return openingMarker;
+    }
+
+    public void setOpeningMarker(BasedSequence openingMarker) {
+        this.openingMarker = openingMarker;
+    }
+
+    public BasedSequence getText() {
+        return text;
+    }
+
+    public void setText(BasedSequence text) {
+        this.text = text;
+    }
+
+    public BasedSequence getClosingMarker() {
+        return closingMarker;
+    }
+
+    public void setClosingMarker(BasedSequence closingMarker) {
+        this.closingMarker = closingMarker;
+    }
+}

+ 42 - 0
gtbook/src/main/java/com/vladsch/flexmark/ext/obs/comments/CommentsExtension.java

@@ -0,0 +1,42 @@
+package com.vladsch.flexmark.ext.obs.comments;
+
+import com.vladsch.flexmark.ext.obs.comments.internal.CommentsDelimiterProcessor;
+import com.vladsch.flexmark.ext.obs.comments.internal.CommentsNodeRenderer;
+import com.vladsch.flexmark.html.HtmlRenderer;
+import com.vladsch.flexmark.parser.Parser;
+import com.vladsch.flexmark.util.data.MutableDataHolder;
+import com.vladsch.flexmark.util.data.NullableDataKey;
+import org.jetbrains.annotations.NotNull;
+
+public class CommentsExtension  implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension {
+    final public static NullableDataKey<String> COMMENTS_STYLE_HTML_OPEN = new NullableDataKey<>("COMMENTS_STYLE_HTML_OPEN");
+    final public static NullableDataKey<String> COMMENTS_STYLE_HTML_CLOSE = new NullableDataKey<>("COMMENTS_STYLE_HTML_CLOSE");
+    private CommentsExtension(){
+
+    }
+
+    public static CommentsExtension create(){
+        return new CommentsExtension();
+    }
+    @Override
+    public void parserOptions(MutableDataHolder mutableDataHolder) {
+
+    }
+
+    @Override
+    public void extend(Parser.Builder parserBuilder) {
+        parserBuilder.customDelimiterProcessor(new CommentsDelimiterProcessor());
+    }
+
+    @Override
+    public void rendererOptions(@NotNull MutableDataHolder mutableDataHolder) {
+
+    }
+
+    @Override
+    public void extend(HtmlRenderer.@NotNull Builder htmlRendererBuilder, @NotNull String s) {
+        if (htmlRendererBuilder.isRendererType("HTML")) {
+            htmlRendererBuilder.nodeRendererFactory(new CommentsNodeRenderer.Factory());
+        }
+    }
+}

+ 64 - 0
gtbook/src/main/java/com/vladsch/flexmark/ext/obs/comments/internal/CommentsDelimiterProcessor.java

@@ -0,0 +1,64 @@
+package com.vladsch.flexmark.ext.obs.comments.internal;
+
+
+import com.vladsch.flexmark.ext.obs.comments.Comments;
+import com.vladsch.flexmark.parser.InlineParser;
+import com.vladsch.flexmark.parser.core.delimiter.Delimiter;
+import com.vladsch.flexmark.parser.delimiter.DelimiterProcessor;
+import com.vladsch.flexmark.parser.delimiter.DelimiterRun;
+import com.vladsch.flexmark.util.ast.Node;
+import com.vladsch.flexmark.util.sequence.BasedSequence;
+public class CommentsDelimiterProcessor  implements DelimiterProcessor {
+
+    @Override
+    public char getOpeningCharacter() {
+        return '%';
+    }
+
+    @Override
+    public char getClosingCharacter() {
+        return '%';
+    }
+
+    @Override
+    public int getMinLength() {
+        return 2;
+    }
+
+    @Override
+    public boolean canBeOpener(String before, String after, boolean leftFlanking, boolean rightFlanking, boolean beforeIsPunctuation, boolean afterIsPunctuation, boolean beforeIsWhitespace, boolean afterIsWhiteSpace) {
+        return leftFlanking;
+    }
+
+    @Override
+    public boolean canBeCloser(String before, String after, boolean leftFlanking, boolean rightFlanking, boolean beforeIsPunctuation, boolean afterIsPunctuation, boolean beforeIsWhitespace, boolean afterIsWhiteSpace) {
+        return rightFlanking;
+    }
+
+    @Override
+    public Node unmatchedDelimiterNode(InlineParser inlineParser, DelimiterRun delimiter) {
+        return null;
+    }
+
+    @Override
+    public boolean skipNonOpenerCloser() {
+        return false;
+    }
+
+    @Override
+    public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) {
+        if (opener.length() >= 2 && closer.length() >= 2) {
+            // Use exactly two delimiters even if we have more, and don't care about internal openers/closers.
+            return 2;
+        } else {
+            return 0;
+        }
+    }
+
+    @Override
+    public void process(Delimiter opener, Delimiter closer, int delimitersUsed) {
+        // wrap nodes between delimiters in strikethrough.
+        Comments strikethrough = new Comments(opener.getTailChars(delimitersUsed), BasedSequence.NULL, closer.getLeadChars(delimitersUsed));
+        opener.moveNodesBetweenDelimitersTo(strikethrough, closer);
+    }
+}

+ 80 - 0
gtbook/src/main/java/com/vladsch/flexmark/ext/obs/comments/internal/CommentsNodeRenderer.java

@@ -0,0 +1,80 @@
+package com.vladsch.flexmark.ext.obs.comments.internal;
+
+
+import com.vladsch.flexmark.ext.obs.comments.Comments;
+import com.vladsch.flexmark.ext.obs.comments.CommentsExtension;
+import com.vladsch.flexmark.html.HtmlWriter;
+import com.vladsch.flexmark.html.renderer.NodeRenderer;
+import com.vladsch.flexmark.html.renderer.NodeRendererContext;
+import com.vladsch.flexmark.html.renderer.NodeRendererFactory;
+import com.vladsch.flexmark.html.renderer.NodeRenderingHandler;
+import com.vladsch.flexmark.util.data.DataHolder;
+import org.jetbrains.annotations.NotNull;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class CommentsNodeRenderer implements NodeRenderer {
+
+    final private String strikethroughStyleHtmlOpen;
+    final private String strikethroughStyleHtmlClose;
+
+
+    public CommentsNodeRenderer(DataHolder options) {
+        strikethroughStyleHtmlOpen = CommentsExtension.COMMENTS_STYLE_HTML_OPEN.get(options);
+        strikethroughStyleHtmlClose = CommentsExtension.COMMENTS_STYLE_HTML_CLOSE.get(options);
+//        subscriptStyleHtmlOpen = StrikethroughSubscriptExtension.SUBSCRIPT_STYLE_HTML_OPEN.get(options);
+//        subscriptStyleHtmlClose = StrikethroughSubscriptExtension.SUBSCRIPT_STYLE_HTML_CLOSE.get(options);
+    }
+
+    @Override
+    public Set<NodeRenderingHandler<?>> getNodeRenderingHandlers() {
+        HashSet<NodeRenderingHandler<?>> set = new HashSet<>();
+        set.add(new NodeRenderingHandler<>(Comments.class, this::render));
+        //set.add(new NodeRenderingHandler<>(Subscript.class, this::render));
+        return set;
+    }
+
+    private void render(Comments node, NodeRendererContext context, HtmlWriter html) {
+        if (strikethroughStyleHtmlOpen == null || strikethroughStyleHtmlClose == null) {
+//            if (context.getHtmlOptions().sourcePositionParagraphLines) {
+//                html.withAttr().tag("<!-");
+//            } else {
+//                html.srcPos(node.getText()).withAttr().tag("<!-");
+//            }
+//            context.renderChildren(node);
+//            html.tag("->");
+            html.raw("<!-");
+            context.renderChildren(node);
+            html.raw("->");
+        } else {
+            html.raw(strikethroughStyleHtmlOpen);
+            context.renderChildren(node);
+            html.raw(strikethroughStyleHtmlClose);
+        }
+    }
+
+//    private void render(Subscript node, NodeRendererContext context, HtmlWriter html) {
+//        if (subscriptStyleHtmlOpen == null || subscriptStyleHtmlClose == null) {
+//            if (context.getHtmlOptions().sourcePositionParagraphLines) {
+//                html.withAttr().tag("sub");
+//            } else {
+//                html.srcPos(node.getText()).withAttr().tag("sub");
+//            }
+//            context.renderChildren(node);
+//            html.tag("/sub");
+//        } else {
+//            html.raw(subscriptStyleHtmlOpen);
+//            context.renderChildren(node);
+//            html.raw(subscriptStyleHtmlClose);
+//        }
+//    }
+
+    public static class Factory implements NodeRendererFactory {
+        @NotNull
+        @Override
+        public NodeRenderer apply(@NotNull DataHolder options) {
+            return new CommentsNodeRenderer(options);
+        }
+    }
+}

+ 45 - 0
gtbook/src/main/java/opennlp/tools/svm/IOneClassModel.java

@@ -0,0 +1,45 @@
+package opennlp.tools.svm;
+
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.svm.libsvm.svm_parameter;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Path;
+import java.util.function.Supplier;
+
+public interface IOneClassModel {
+    void setNu(double nu);
+
+    void setGamma(double gamma);
+
+    void setKernalType(int kernalType);
+
+    String[] getOutcomeNames();
+
+    void setOutcomeNames(String[] outcomeNames);
+
+    void setQuiet(boolean quiet);
+
+    void setThresholdSupplier(Supplier<Double> thresholdSupplier);
+
+    svm_parameter getParam();
+
+    void train(DataIndexer di) throws IOException;
+
+    double eval(String[] keys, float[] values);
+
+    boolean isAnomaly(String[] keys, float[] values);
+
+    void serialize(Path p) throws IOException;
+
+    void serialize(File f) throws IOException;
+
+    void serialize(OutputStream out) throws IOException;
+
+    void loadModel(File file) throws IOException;
+
+    void loadModel(InputStream in) throws IOException;
+}

+ 303 - 0
gtbook/src/main/java/opennlp/tools/svm/OneClassModel.java

@@ -0,0 +1,303 @@
+package opennlp.tools.svm;
+
+/*
+<dependency>
+  <groupId>com.github.chen0040</groupId>
+  <artifactId>java-libsvm</artifactId>
+  <version>1.0.4</version>
+</dependency>
+ */
+//import com.github.svm.data.frame.DataRow;
+//import com.github.svm.libsvm.*;
+import lombok.extern.slf4j.Slf4j;
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.svm.libsvm.*;
+
+import java.io.*;
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Vector;
+import java.util.function.Supplier;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.*;
+
+@Slf4j
+public class OneClassModel implements Serializable, IOneClassModel {
+    public static final String MAGIC = "OneClassModel_v1";
+    /**
+     * Mapping between predicates/contexts and an integer representing them.
+     */
+    //protected Map<String, Context> pmap;
+    protected Map<String, Integer> pmap;
+
+    /**
+     * The names of the outcomes.
+     */
+    protected String[] outcomeNames;
+
+    // -g gamma : set gamma in kernel function (default 1/num_features)
+    // -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
+
+    private svm_parameter param;
+    private int cross_validation;
+    private svm_model model;
+    private boolean quiet;
+
+    private static svm_print_interface svm_print_null = new svm_print_interface() {
+        public void print(String s) {
+        }
+    };
+
+    public Supplier<Double> thresholdSupplier;
+
+    /*
+        svm_parameter param = new svm_parameter();
+        param.svm_type = svm_parameter.ONE_CLASS;
+        param.kernel_type = svm_parameter.RBF;
+        param.nu = 0.1;
+        param.gamma = 0.1;
+     */
+
+    public OneClassModel() {
+        pmap = new HashMap<>();
+        outcomeNames = new String[1];
+
+        svm_print_interface print_func = null;    // default printing to stdout
+
+        param = new svm_parameter();
+        // default values
+        param.svm_type = svm_parameter.ONE_CLASS;
+        param.kernel_type = svm_parameter.RBF;
+        param.degree = 3;
+        param.gamma = 0;    // 1/num_features
+        param.coef0 = 0;
+        param.nu = 0.5;
+        param.cache_size = 100;
+        param.C = 1;
+        param.eps = 1e-3;
+        param.p = 0.1;
+        param.shrinking = 1;
+        param.probability = 0;
+        param.nr_weight = 0;
+        param.weight_label = new int[0];
+        param.weight = new double[0];
+        cross_validation = 0;
+
+        svm_set_print_string_function(svm_print_null);
+        quiet = true;
+    }
+
+    private double threshold() {
+        if (thresholdSupplier == null) {
+            return 0;
+        } else {
+            return thresholdSupplier.get();
+        }
+    }
+
+    @Override
+    public void setNu(double nu) {
+        param.nu = nu;
+    }
+
+    @Override
+    public void setGamma(double gamma) {
+        param.gamma = gamma;
+    }
+
+    @Override
+    public void setKernalType(int kernalType) {
+        param.kernel_type = kernalType;
+    }
+
+    @Override
+    public String[] getOutcomeNames() {
+        return outcomeNames;
+    }
+
+    @Override
+    public void setOutcomeNames(String[] outcomeNames) {
+        this.outcomeNames = outcomeNames;
+    }
+
+    public boolean isQuiet() {
+        return quiet;
+    }
+
+    @Override
+    public void setQuiet(boolean quiet) {
+        this.quiet = quiet;
+    }
+
+    public Supplier<Double> getThresholdSupplier() {
+        return thresholdSupplier;
+    }
+
+    @Override
+    public void setThresholdSupplier(Supplier<Double> thresholdSupplier) {
+        this.thresholdSupplier = thresholdSupplier;
+    }
+
+    @Override
+    public svm_parameter getParam() {
+        return param;
+    }
+
+    public void setParam(svm_parameter param) {
+        this.param = param;
+    }
+
+    @Override
+    public void train(DataIndexer di) throws IOException {
+        if (this.quiet) {
+            svm_set_print_string_function(svm_print_null);
+        } else {
+            svm_set_print_string_function(null);
+        }
+
+        /* Incorporate all of the needed info *****/
+        log.info("Incorporating indexed data for training...");
+        int[][] contexts = di.getContexts();
+        float[][] values = di.getValues();
+        String[] predLabels = di.getPredLabels();
+
+        // 构造pmp
+        for (int i = 0; i < predLabels.length; i++) {
+            pmap.put(predLabels[i], i);
+        }
+
+        Vector<SupportVectorMachineNode[]> vx = new Vector<>();
+        int max_index = 0;
+
+        int m = contexts.length;
+
+        for (int i = 0; i < m; ++i) {
+            //DataRow tuple = dataFrame.row(i);
+            //double[] x0 = tuple.toArray();
+
+            double[] x0 = new double[predLabels.length];
+            int n = x0.length;
+
+            SupportVectorMachineNode[] x = new SupportVectorMachineNode[n];
+            for (int j = 0; j < n; j++) {
+                x[j] = new SupportVectorMachineNode();
+                x[j].index = j + 1;
+                //x[j].value = x0[j];
+            }
+            for (int j = 0; j < contexts[i].length; ++j) {
+                x[contexts[i][j]].value = values[i][j];
+            }
+            if (n > 0) max_index = Math.max(max_index, x[n - 1].index);
+
+            vx.addElement(x);
+        }
+
+        svm_problem prob = new svm_problem();
+        prob.l = m;
+        prob.x = new SupportVectorMachineNode[m][];
+        for (int i = 0; i < prob.l; i++)
+            prob.x[i] = vx.elementAt(i);
+        prob.y = new double[m];
+        for (int i = 0; i < prob.l; i++)
+            prob.y[i] = 0;
+
+        if (param.gamma == 0 && max_index > 0)
+            param.gamma = 1.0 / max_index;
+
+
+        model = svm_train(prob, param);
+    }
+
+    @Override
+    public double eval(String[] keys, float[] values) {
+        //double[] x0 = row.toArray();
+        double[] x0 = new double[pmap.size()];
+        int n = x0.length;
+
+        SupportVectorMachineNode[] x = new SupportVectorMachineNode[n];
+        for (int j = 0; j < n; j++) {
+            x[j] = new SupportVectorMachineNode();
+            x[j].index = j + 1;
+            //x[j].value = x0[j];
+        }
+        for (int j = 0; j < keys.length; j++) {
+            x[pmap.get(keys[j])].value = values[j];
+        }
+
+        double v = svm_predict(model, x);
+
+        return v;
+    }
+
+    @Override
+    public boolean isAnomaly(String[] keys, float[] values) {
+        double p = eval(keys, values);
+        ///QQQ dwp
+//			if(model.param.svm_type == svm_parameter.ONE_CLASS)
+//				return (sum>0)?1:-1;
+//			else
+//				return sum;
+
+        return p < threshold();
+    }
+
+
+
+    @Override
+    public final void serialize(Path p) throws IOException {
+        serialize(p.toFile());
+    }
+    @Override
+    public final void serialize(File f) throws IOException {
+        try (OutputStream out = new BufferedOutputStream(new FileOutputStream(f))) {
+            serialize(out);
+        }
+    }
+    @Override
+    public final void serialize(OutputStream out) throws IOException {
+        try {
+            GZIPOutputStream zout = new GZIPOutputStream(out);
+            ObjectOutputStream oos = new ObjectOutputStream(zout);
+            writeObject(oos);
+            oos.flush();
+            zout.close();
+        }catch (Exception e){
+            throw new IOException(e);
+        }
+    }
+
+    private void writeObject(ObjectOutputStream out) throws IOException {
+        out.writeUTF(MAGIC);
+        out.writeObject(pmap);
+        out.writeObject(model);
+    }
+
+    @Override
+    public void loadModel(File file) throws IOException {
+        try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
+            loadModel(in);
+        }
+    }
+
+    @Override
+    public void loadModel(InputStream in) throws IOException {
+        try {
+            GZIPInputStream zip = new GZIPInputStream(in);
+            ObjectInputStream zin = new ObjectInputStream(new BufferedInputStream(zip));
+            readObject(zin);
+            zin.close();
+            zip.close();
+        } catch (Exception e) {
+            throw new IOException(e);
+        }
+    }
+
+    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
+        String m = in.readUTF();
+        pmap = (Map<String, Integer> )in.readObject();
+        model = (svm_model) in.readObject();
+    }
+}

+ 260 - 0
gtbook/src/main/java/opennlp/tools/svm/data/evaluators/BinaryClassifierEvaluator.java

@@ -0,0 +1,260 @@
+package opennlp.tools.svm.data.evaluators;
+
+import java.io.Serializable;
+
+
+/**
+ * Created by xschen on 10/9/2016.
+ */
+public class BinaryClassifierEvaluator implements Serializable {
+   private static final long serialVersionUID = -6175902545631384642L;
+   private int truePositive = 0;
+   private int trueNegative = 0;
+
+   private int falsePositive = 0;
+   private int falseNegative = 0;
+
+   private double precision;
+   private double fallout; // Fall-out, or the false positive rate, is the number of false positive divided by the total number of negatives.
+   private double accuracy;
+   private double recall;
+   private double specificity;
+   private double sensitivity; // note that recalls is that same as sensitivity by definition
+   private double misclassificationRate;
+
+   private double f1Score;
+   private long startTime = 0L;
+   private long endTime = 0L;
+
+   private boolean isValid = false;
+
+
+
+   public BinaryClassifierEvaluator(){
+
+   }
+
+   public BinaryClassifierEvaluator(int truePositive,
+           int trueNegative,
+           int falsePositive,
+           int falseNegative){
+      this.truePositive = truePositive;
+      this.trueNegative = trueNegative;
+      this.falsePositive = falsePositive;
+      this.falseNegative = falseNegative;
+
+      update();
+   }
+
+   public void reset(){
+      trueNegative = 0;
+      truePositive = 0;
+      falsePositive = 0;
+      falseNegative = 0;
+      isValid = false;
+
+      precision = 0;
+      recall = 0;
+      specificity = 0;
+      misclassificationRate = 0;
+      f1Score = 0;
+   }
+
+   public void evaluate(boolean actual, boolean predicted) {
+      if(predicted){
+         assertTruePositive(actual, predicted);
+      } else {
+         assertTrueNegative(actual, predicted);
+      }
+   }
+
+   private void assertTruePositive(boolean actual, boolean predicted){
+      if(actual == predicted){
+         truePositive++;
+      } else {
+         falsePositive++;
+      }
+   }
+
+   private void assertTrueNegative(boolean actual, boolean predicted){
+      if(actual == predicted){
+         trueNegative++;
+      } else {
+         falseNegative++;
+      }
+   }
+
+   private void update(){
+      this.precision = (double)(truePositive) / (truePositive + falsePositive);
+
+      this.sensitivity = (double)(truePositive) / (truePositive + falseNegative);
+      this.specificity = (double)(trueNegative) / (trueNegative + falsePositive);
+
+      // recall = sensitivity
+      this.recall = (double)(truePositive) / (truePositive + falseNegative);
+      this.accuracy = (double)(truePositive + trueNegative) / (truePositive + trueNegative + falsePositive + falseNegative);
+
+      // fallout = 1 - specificity
+      if(falsePositive + trueNegative == 0){
+         this.fallout = Double.POSITIVE_INFINITY;
+      } else {
+         this.fallout = (double) (falsePositive) / (falsePositive + trueNegative);
+      }
+
+      this.misclassificationRate = (double)(falsePositive + falseNegative) / (truePositive + trueNegative + falsePositive + falseNegative);
+
+      this.f1Score = 2 * (precision * recall) / (precision + recall);
+      isValid = true;
+   }
+
+
+   public int getTruePositive() {
+      return truePositive;
+   }
+
+
+   public void setTruePositive(int truePositive) {
+      this.truePositive = truePositive;
+   }
+
+
+   public int getTrueNegative() {
+      return trueNegative;
+   }
+
+
+   public void setTrueNegative(int trueNegative) {
+      this.trueNegative = trueNegative;
+   }
+
+
+   public int getFalsePositive() {
+      return falsePositive;
+   }
+
+
+   public void setFalsePositive(int falsePositive) {
+      this.falsePositive = falsePositive;
+   }
+
+
+   public int getFalseNegative() {
+      return falseNegative;
+   }
+
+
+   public void setFalseNegative(int falseNegative) {
+      this.falseNegative = falseNegative;
+   }
+
+
+   public double getPrecision() {
+      if(!isValid) {
+         update();
+      }
+      return precision;
+   }
+
+
+   public void setPrecision(double precision) {
+      this.precision = precision;
+   }
+
+
+   public double getAccuracy() {
+      if(!isValid) {
+         update();
+      }
+      return accuracy;
+   }
+
+
+   public void setAccuracy(double accuracy) {
+      this.accuracy = accuracy;
+   }
+
+
+   public double getRecall() {
+      if(!isValid) {
+         update();
+      }
+      return recall;
+   }
+
+
+   public double getSpecificity() {
+      if(!isValid) {
+         update();
+      }
+      return specificity;
+   }
+
+
+   public double getF1Score() {
+      if(!isValid) {
+         update();
+      }
+      return f1Score;
+   }
+
+
+   public double getSensitivity() {
+      if(!isValid) {
+         update();
+      }
+      return sensitivity;
+   }
+
+
+   public void setSensitivity(double sensitivity) {
+      this.sensitivity = sensitivity;
+   }
+
+
+   public double getMisclassificationRate() {
+      if(!isValid) {
+         update();
+      }
+      return misclassificationRate;
+   }
+
+
+   public void setMisclassificationRate(double misclassificationRate) {
+      this.misclassificationRate = misclassificationRate;
+   }
+
+
+   public double getFallout() {
+      return fallout;
+   }
+
+
+   public void report() {
+
+
+      System.out.println(getSummary());
+   }
+   
+   public String getSummary() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("Training accuracy: ").append(getAccuracy());
+      sb.append("\nTraining mis-classification: ").append(getMisclassificationRate());
+      sb.append("\nTraining f1-score: ").append(getF1Score());
+      sb.append("\nDuration (seconds): ").append(durationInSeconds());
+
+      return sb.toString();
+   }
+
+
+   public void startTimer() {
+      startTime = System.currentTimeMillis();
+   }
+
+   public void stopTimer() {
+      endTime = System.currentTimeMillis();
+   }
+
+   public long durationInSeconds() {
+      return (endTime - startTime) / 1000;
+   }
+}

+ 245 - 0
gtbook/src/main/java/opennlp/tools/svm/data/evaluators/ClassifierEvaluator.java

@@ -0,0 +1,245 @@
+package opennlp.tools.svm.data.evaluators;
+
+
+//import com.github.svm.data.utils.NumberUtils;
+
+import opennlp.tools.svm.data.utils.NumberUtils;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * Created by xschen on 11/16/16.
+ */
+public class ClassifierEvaluator implements Serializable {
+   private static final long serialVersionUID = -6691826271325237852L;
+   private ConfusionMatrix confusionMatrix = new ConfusionMatrix();
+   public void evaluate(String actual, String predicted){
+      confusionMatrix.incCount(actual, predicted);
+   }
+
+   public List<String> classLabels() {
+      return confusionMatrix.getLabels();
+   }
+
+
+   public void reset() {
+      confusionMatrix.reset();
+   }
+
+
+   public ConfusionMatrix getConfusionMatrix() {
+      return confusionMatrix;
+   }
+
+
+   public void setConfusionMatrix(ConfusionMatrix confusionMatrix) {
+      this.confusionMatrix = confusionMatrix;
+   }
+
+   public double getAccuracy() {
+      double accuracy = 0;
+
+      List<String> list = confusionMatrix.getLabels();
+      int correctCount = 0;
+      int totalCount = 0;
+      for(int i=0; i < list.size(); ++i) {
+         String actual = list.get(i);
+         for(int j=0; j < list.size(); ++j) {
+            String predicted = list.get(j);
+            int value = confusionMatrix.getCount(actual, predicted);
+            correctCount += (i == j) ? value : 0;
+            totalCount += value;
+         }
+      }
+
+      if(totalCount > 0) {
+         accuracy = (double) correctCount / totalCount;
+      }
+
+      return accuracy;
+   }
+
+   public double getMisclassificationRate(){
+      return 1- getAccuracy();
+   }
+
+   public int getTruePositiveCount(String classLabel) {
+      return confusionMatrix.getCount(classLabel, classLabel);
+   }
+
+   public int getFalsePositiveCount(String classLabel) {
+      return confusionMatrix.getColumnSum(classLabel) - getTruePositiveCount(classLabel);
+   }
+
+   public double avgTruePositive() {
+      List<String> labels = classLabels();
+      if(labels.isEmpty())  return 0;
+
+      int sum = 0;
+      for(String label : labels) {
+         sum += getTruePositiveCount(label);
+      }
+      return (double)sum / labels.size();
+   }
+
+   public double avgFalsePositive() {
+      List<String> labels = classLabels();
+      if(labels.isEmpty())  return 0;
+
+      int sum = 0;
+      for(String label : labels) {
+         sum += getFalsePositiveCount(label);
+      }
+      return (double)sum / labels.size();
+   }
+
+   // Precision is the proportion of cases correctly identified as belonging to class c
+   // among all cases of which the classifier claims that they belong to class c
+   public Map<String, Double> getPrecisionByClass() {
+      Map<String, Double> result = new HashMap<>();
+      List<String> list = classLabels();
+      for(int i=0; i < list.size(); ++i) {
+         String label = list.get(i);
+         int correctCount = confusionMatrix.getCount(label, label);
+         int totalPredictedCount = confusionMatrix.getColumnSum(label);
+         double precision = 0;
+         if(totalPredictedCount > 0){
+            precision = (double)correctCount / totalPredictedCount;
+         }
+         result.put(label, precision);
+      }
+      return result;
+   }
+
+   // Recall is the proportion of cases correctly identified as belonging to class c among all
+   // cases that truely belong to class c.
+   public Map<String, Double> getRecallByClass(){
+
+      Map<String, Double> result = new HashMap<>();
+
+      List<String> list = classLabels();
+
+      for(int i=0; i < list.size(); ++i) {
+         String label = list.get(i);
+         int correctCount = confusionMatrix.getCount(label, label);
+         int totalTrueCount = confusionMatrix.getRowSum(label);
+         double recall = 0;
+         if(totalTrueCount > 0) {
+            recall = (double)correctCount / totalTrueCount;
+         }
+
+         result.put(label,recall);
+      }
+
+      return result;
+   }
+
+   // fallout is the proportion of cases incorrectly identified as belonging to class c among all
+   // cases that truely not belonging to class c.
+   // fallout is the false-positive rate.
+   public Map<String, Double> getFalloutByClass(){
+
+      Map<String, Double> result = new HashMap<>();
+
+      List<String> list = classLabels();
+
+      for(int i=0; i < list.size(); ++i) {
+         String label = list.get(i);
+
+         int totalNegativeCount = 0;
+
+         int falsePositiveCount = 0;
+         for(int j=0; j < list.size(); ++j) {
+            if(i==j) continue;
+            String notTrueLabel = list.get(j);
+            falsePositiveCount += confusionMatrix.getCount(notTrueLabel, label);
+            totalNegativeCount += confusionMatrix.getRowSum(notTrueLabel);
+         }
+         double fallout = 0;
+         if(totalNegativeCount > 0) {
+            fallout = (double)falsePositiveCount / totalNegativeCount;
+         }
+
+         result.put(label,fallout);
+      }
+
+      return result;
+   }
+
+   public Map<String, Double> getF1ScoreByClass() {
+      Map<String, Double> precisions = getPrecisionByClass();
+      Map<String, Double> recalls = getRecallByClass();
+
+      List<String> labels = classLabels();
+
+      Map<String, Double> result = new HashMap<>();
+
+      for(String label : labels) {
+         double precision = precisions.get(label);
+         double recall = recalls.get(label);
+         if(NumberUtils.isZero(precision+recall)){
+            continue;
+         }
+
+         double f1score = 2 * (precision * recall) / (precision + recall);
+         result.put(label, f1score);
+      }
+
+      return result;
+   }
+
+   // concept of macro-f1 score can be found here: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.8244&rep=rep1&type=pdf
+   public double getMacroF1Score() {
+      double sum = 0;
+      int count = 0;
+      Map<String, Double> data = getF1ScoreByClass();
+      for(Map.Entry<String, Double> entry : data.entrySet()) {
+         sum += entry.getValue();
+         count++;
+      }
+      if(count == 0) return 0;
+      return sum / count;
+   }
+
+   // concept of micro-f1 score can be found here: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.8244&rep=rep1&type=pdf
+   public double getMicroF1Score() {
+      Map<String, Double> precisions = getPrecisionByClass();
+      Map<String, Double> recalls = getRecallByClass();
+
+      List<String> labels = classLabels();
+
+      double precisionAvg = 0;
+      double recallAvg = 0;
+      for(String label : labels) {
+         double precision = precisions.get(label);
+         double recall = recalls.get(label);
+         precisionAvg += precision;
+         recallAvg += recall;
+      }
+
+      precisionAvg /= labels.size();
+      recallAvg /= labels.size();
+
+
+      return 2 * (precisionAvg * recallAvg) / (precisionAvg + recallAvg);
+   }
+
+   public String getSummary() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("accuracy: ").append(getAccuracy());
+      sb.append("\nmis-classification: ").append(getMisclassificationRate());
+      sb.append("\nmacro f1-score: ").append(getMacroF1Score());
+      sb.append("\nmicro f1-score: ").append(getMicroF1Score());
+
+      return sb.toString();
+   }
+
+   public void report(){
+      System.out.println(getSummary());
+   }
+
+}

+ 68 - 0
gtbook/src/main/java/opennlp/tools/svm/data/evaluators/ConfusionMatrix.java

@@ -0,0 +1,68 @@
+package opennlp.tools.svm.data.evaluators;
+
+import opennlp.tools.svm.data.utils.TupleTwo;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+
+/**
+ * Created by xschen on 11/16/16.
+ */
+public class ConfusionMatrix {
+   private Map<TupleTwo<String, String>, Integer> matrix = new HashMap<>();
+   private Set<String> labels = new HashSet<>();
+
+   public void incCount(String actual, String predicted) {
+      labels.add(actual);
+      labels.add(predicted);
+      TupleTwo<String, String> key = new TupleTwo<>(actual, predicted);
+      matrix.put(key, matrix.getOrDefault(key, 0) + 1);
+   }
+
+   public List<String> getLabels(){
+      List<String> result = new ArrayList<>();
+
+      result.addAll(labels.stream().collect(Collectors.toList()));
+
+      return result;
+   }
+
+
+
+   // sum of a row representing class c, which is sum of cases that truely belong to class c
+   public int getRowSum(String actual) {
+      List<String> list = this.getLabels();
+      int sum = 0;
+      for(int i=0; i < list.size(); ++i) {
+         String predicted = list.get(i);
+         sum += getCount(actual, predicted);
+      }
+      return sum;
+   }
+
+
+   // sum of a column representing class c, which is sum of cases the classifiers claims to belong to class c
+   public int getColumnSum(String predicted) {
+      List<String> list = this.getLabels();
+      int sum = 0;
+      for(int i=0; i < list.size(); ++i) {
+         String actual = list.get(i);
+         sum += getCount(actual, predicted);
+      }
+      return sum;
+   }
+
+
+
+   public int getCount(String actual, String predicted) {
+      return matrix.getOrDefault(new TupleTwo<>(actual, predicted), 0);
+   }
+
+
+   public void reset() {
+      matrix.clear();
+   }
+
+
+}

+ 91 - 0
gtbook/src/main/java/opennlp/tools/svm/data/evaluators/RegressionEvaluator.java

@@ -0,0 +1,91 @@
+package opennlp.tools.svm.data.evaluators;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Created by xschen on 10/13/16.
+ */
+public class RegressionEvaluator {
+
+   private List<Double> predictedOutputValues = new ArrayList<>();
+   private List<Double> actualOutputValues = new ArrayList<>();
+
+   // mean square error penalize series that have values which has large error
+   private double meanSquaredError = 0;
+   private double rootMeanSquaredError = 0;
+
+   // mean absolute error penalize series that have lots of values with small errors
+   private double meanAbsoluteError = 0;
+
+   // R-square describes the proportion of variance in the response variable explained by the regression model
+   private double RSquare = 0;
+
+   public void reset(){
+      predictedOutputValues.clear();
+      actualOutputValues.clear();
+   }
+
+   public void evaluate(double predicted,double actual){
+      predictedOutputValues.add(predicted);
+      actualOutputValues.add(actual);
+   }
+
+   public void update(){
+      int size = predictedOutputValues.size();
+
+      if(size == 0) return;
+
+      double mu_expected = 0;
+
+      for(int i=0; i < size; ++i) {
+         double expected = predictedOutputValues.get(i);
+         mu_expected += expected;
+      }
+      mu_expected /= size;
+
+      double squaredSum = 0;
+      double absoluteSum = 0;
+      double SS_total = 0;
+      double SS_res = 0;
+      for(int i=0; i < size; ++i){
+         double expected = predictedOutputValues.get(i);
+         double actual = actualOutputValues.get(i);
+
+         double difference = expected - actual;
+
+         squaredSum += difference * difference;
+         absoluteSum += Math.abs(difference);
+
+         SS_total += Math.pow(expected - mu_expected, 2);
+         SS_res += difference * difference;
+      }
+
+      meanSquaredError = squaredSum / size;
+      meanAbsoluteError = absoluteSum / size;
+
+      rootMeanSquaredError = Math.sqrt(meanSquaredError);
+
+      RSquare = 1 - SS_res / SS_total;
+
+   }
+
+   public double getMeanSquaredError(){
+      return meanSquaredError;
+   }
+
+   public double getRootMeanSquaredError(){
+      return rootMeanSquaredError;
+   }
+
+
+   public double getMeanAbsoluteError() {
+      return meanAbsoluteError;
+   }
+
+   public double getRSquare() {
+      return RSquare;
+   }
+}

+ 8 - 0
gtbook/src/main/java/opennlp/tools/svm/data/exceptions/NotImplementedException.java

@@ -0,0 +1,8 @@
+package opennlp.tools.svm.data.exceptions;
+
+
+/**
+ * Created by xschen on 11/5/2017.
+ */
+public class NotImplementedException extends RuntimeException {
+}

+ 316 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/BasicDataFrame.java

@@ -0,0 +1,316 @@
+package opennlp.tools.svm.data.frame;
+
+import opennlp.tools.svm.data.utils.CollectionUtils;
+import opennlp.tools.svm.data.utils.TupleTwo;
+
+import java.io.Serializable;
+import java.util.*;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+
+/**
+ * Created by xschen on 1/5/2017.
+ */
+public class BasicDataFrame implements DataFrame, Serializable {
+
+   private static final long serialVersionUID = 4096198912048757493L;
+   private final List<DataRow> rows = new ArrayList<>();
+   private final List<InputDataColumn> inputDataColumns = new ArrayList<>();
+   private final List<OutputDataColumn> outputDataColumns = new ArrayList<>();
+   private boolean locked = false;
+   private final Map<String, List<String>> levels = new HashMap<>();
+
+   @Override public int rowCount() {
+      return rows.size();
+   }
+
+
+   @Override public DataRow row(int i) {
+      return rows.get(i);
+   }
+
+
+   @Override public List<InputDataColumn> getInputColumns() {
+      return inputDataColumns;
+   }
+
+
+   @Override public List<OutputDataColumn> getOutputColumns() {
+      return outputDataColumns;
+   }
+
+
+   @Override public List<DataColumn> getAllColumns() {
+      List<DataColumn> result = new ArrayList<>();
+      result.addAll(inputDataColumns);
+      result.addAll(outputDataColumns);
+      return result;
+   }
+
+
+   @Override public List<String> rowArrayDescriptors() {
+      List<String> numericInputColumns = inputDataColumns.stream().filter(c -> !c.isCategorical()).map(InputDataColumn::getColumnName).collect(Collectors.toList());
+      List<String> categoricalInputColumns = inputDataColumns.stream().filter(InputDataColumn::isCategorical).map(InputDataColumn::getColumnName).collect(Collectors.toList());
+
+      List<String> result = new ArrayList<>();
+      result.addAll(numericInputColumns);
+      for(String c : categoricalInputColumns){
+         List<String> levelsInFactor = levels.get(c);
+         int count = levelsInFactor.size();
+         if(count == 2) count = 1;
+         for(int j=0; j < count;++j){
+            result.add(c + ":" + levelsInFactor.get(j));
+         }
+      }
+      return result;
+   }
+
+
+   @Override public void unlock(){
+      locked = false;
+   }
+
+   @Override
+   public boolean isLocked() {
+      return locked;
+   }
+
+
+   @Override public void lock() {
+
+      Map<String, Set<String>> inputLevels = new HashMap<>();
+      Map<String, Set<String>> outputLevels = new HashMap<>();
+
+      for(DataRow row : rows){
+         List<String> keys = row.getColumnNames();
+         for(String key: keys) {
+            Set<String> set;
+
+            if(!inputLevels.containsKey(key)){
+               set = new HashSet<>();
+               inputLevels.put(key, set);
+            }
+         }
+
+         keys = row.getCategoricalColumnNames();
+         for(String key: keys) {
+            Set<String> set;
+
+            if(inputLevels.containsKey(key)){
+               set = inputLevels.get(key);
+            } else {
+               set = new HashSet<>();
+               inputLevels.put(key, set);
+            }
+
+            set.add(row.getCategoricalCell(key));
+         }
+
+         keys = row.getTargetColumnNames();
+         for(String key: keys) {
+            Set<String> set;
+
+            if(!outputLevels.containsKey(key)){
+               set = new HashSet<>();
+               outputLevels.put(key, set);
+            }
+         }
+
+         keys = row.getCategoricalTargetColumnNames();
+         for(String key: keys) {
+            Set<String> set;
+
+            if(outputLevels.containsKey(key)){
+               set = outputLevels.get(key);
+            } else {
+               set = new HashSet<>();
+               outputLevels.put(key, set);
+            }
+
+            set.add(row.getCategoricalTargetCell(key));
+         }
+      }
+
+      inputDataColumns.clear();
+      for(Map.Entry<String, Set<String>> entry : inputLevels.entrySet()){
+         Set<String> set = entry.getValue();
+         InputDataColumn inputDataColumn = new InputDataColumn();
+         inputDataColumn.setColumnName(entry.getKey());
+
+         List<String> levels = set.stream().collect(Collectors.toList());
+         levels.sort(String::compareTo);
+         inputDataColumn.setLevels(levels);
+         inputDataColumns.add(inputDataColumn);
+      }
+
+      outputDataColumns.clear();
+      for(Map.Entry<String, Set<String>> entry : outputLevels.entrySet()){
+         Set<String> set = entry.getValue();
+         OutputDataColumn outputDataColumn = new OutputDataColumn();
+         outputDataColumn.setColumnName(entry.getKey());
+
+         List<String> levels = set.stream().collect(Collectors.toList());
+         levels.sort(String::compareTo);
+         outputDataColumn.setLevels(levels);
+         outputDataColumns.add(outputDataColumn);
+      }
+
+      inputDataColumns.sort((a, b) -> a.getColumnName().compareTo(b.getColumnName()));
+      outputDataColumns.sort((a, b) -> a.getColumnName().compareTo(b.getColumnName()));
+
+      List<String> numericInputColumns = inputDataColumns.stream().filter(c -> !c.isCategorical()).map(InputDataColumn::getColumnName).collect(Collectors.toList());
+      List<String> categoricalInputColumns = inputDataColumns.stream().filter(InputDataColumn::isCategorical).map(InputDataColumn::getColumnName).collect(Collectors.toList());
+      List<String> numericOutputColumns = outputDataColumns.stream().filter(c -> !c.isCategorical()).map(OutputDataColumn::getColumnName).collect(Collectors.toList());
+      List<String> categoricalOutputColumns = outputDataColumns.stream().filter(OutputDataColumn::isCategorical).map(OutputDataColumn::getColumnName).collect(Collectors.toList());
+
+      numericInputColumns.sort(String::compareTo);
+      categoricalInputColumns.sort(String::compareTo);
+      numericOutputColumns.sort(String::compareTo);
+      categoricalOutputColumns.sort(String::compareTo);
+
+      levels.clear();
+
+      for(Map.Entry<String, Set<String>> entry : inputLevels.entrySet()){
+         List<String> levelsInFactor = entry.getValue().stream().collect(Collectors.toList());
+         levelsInFactor.sort(String::compareTo);
+         levels.put(entry.getKey(), levelsInFactor);
+      }
+
+      for(Map.Entry<String, Set<String>> entry : outputLevels.entrySet()){
+         List<String> levelsInFactor = entry.getValue().stream().collect(Collectors.toList());
+         levelsInFactor.sort(String::compareTo);
+         levels.put(entry.getKey(), levelsInFactor);
+      }
+
+      for(int i=0; i < rowCount(); ++i) {
+         DataRow row = row(i);
+         row.setColumnNames(numericInputColumns);
+         row.setCategoricalColumnNames(categoricalInputColumns);
+         row.setTargetColumnNames(numericOutputColumns);
+         row.setCategoricalTargetColumnNames(categoricalOutputColumns);
+         row.setLevels(levels);
+      }
+
+      locked = true;
+   }
+
+
+   @Override public DataRow newRow() {
+      return new BasicDataRow();
+   }
+
+
+   @Override public void addRow(DataRow row) {
+      if(locked) {
+         throw new RuntimeException("Data frame is currently locked, please unlock first");
+      }
+      rows.add(row);
+   }
+
+
+   @Override public String head(int limit) {
+      StringBuilder sb = new StringBuilder();
+      int max = Math.min(limit, rowCount());
+      for(int i=0; i < max; ++i) {
+         if(i != 0){
+            sb.append("\n");
+         }
+         sb.append(row(i));
+      }
+      return sb.toString();
+   }
+
+
+   @Override public DataFrame shuffle() {
+      Random random = new Random(System.currentTimeMillis());
+      for(int i=1; i < rows.size(); ++i) {
+         int j = random.nextInt(i+1);
+         CollectionUtils.exchange(rows, i, j);
+      }
+      return this;
+   }
+
+
+   @Override public TupleTwo<DataFrame, DataFrame> split(double ratio) {
+      assert this.locked;
+
+      BasicDataFrame frame1 = new BasicDataFrame();
+      BasicDataFrame frame2 = new BasicDataFrame();
+
+      frame1.inputDataColumns.addAll(inputDataColumns.stream().map(InputDataColumn::makeCopy).collect(Collectors.toList()));
+      frame2.inputDataColumns.addAll(inputDataColumns.stream().map(InputDataColumn::makeCopy).collect(Collectors.toList()));
+
+      frame1.outputDataColumns.addAll(outputDataColumns.stream().map(OutputDataColumn::makeCopy).collect(Collectors.toList()));
+      frame2.outputDataColumns.addAll(outputDataColumns.stream().map(OutputDataColumn::makeCopy).collect(Collectors.toList()));
+
+      int split = (int)(rows.size() * ratio);
+      for(int i=0; i < split; ++i) {
+         frame1.addRow(rows.get(i).makeCopy());
+      }
+      for(int i=split; i < rows.size(); ++i){
+         frame2.addRow(rows.get(i).makeCopy());
+      }
+
+      return new TupleTwo<>(frame1, frame2);
+
+   }
+
+
+   @Override public Stream<DataRow> stream() {
+      return rows.stream();
+   }
+
+
+   @Override public DataFrame makeCopy() {
+      BasicDataFrame clone = new BasicDataFrame();
+      clone.copy(this);
+      return clone;
+   }
+
+   private void copy(DataFrame that){
+      rows.clear();
+      inputDataColumns.clear();
+      outputDataColumns.clear();
+      levels.clear();
+
+      unlock();
+
+      for(DataRow row : that.rows()){
+         DataRow newRow = newRow();
+         newRow.copy(row);
+         addRow(row);
+      }
+
+      lock();
+   }
+
+
+   @Override public DataFrame filter(Predicate<DataRow> predicate) {
+      DataFrame clone = new BasicDataFrame();
+      for(DataRow row : rows){
+         if(predicate.test(row)){
+            DataRow newRow = clone.newRow();
+            newRow.copy(row);
+            clone.addRow(newRow);
+         }
+      }
+      clone.lock();
+      return clone;
+   }
+
+
+   @Override public Iterable<? extends DataRow> rows() {
+      return rows;
+   }
+
+
+   @Override public Iterator<DataRow> iterator() {
+      return rows.iterator();
+   }
+
+   @Override public Map<String, List<String>> getLevels(){
+      return levels;
+   }
+}

+ 321 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/BasicDataRow.java

@@ -0,0 +1,321 @@
+package opennlp.tools.svm.data.frame;
+
+import opennlp.tools.svm.data.utils.CollectionUtils;
+import opennlp.tools.svm.data.utils.StringUtils;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+
+/**
+ * Created by xschen on 1/5/2017.
+ * A data row consists two types of columns:
+ *
+ * column: a column represents an input column for which values are numeric
+ * target column: a target column represents an output column for which values are numeric
+ */
+public class BasicDataRow implements DataRow, Serializable {
+
+   private static final long serialVersionUID = -1932292620125077757L;
+   private final Map<String, Double> targets = new HashMap<>();
+   private final Map<String, String> categoricalTargets = new HashMap<>();
+
+   private final Map<String, Double> values = new HashMap<>();
+   private final Map<String, String> categoricalValues = new HashMap<>();
+
+   private final List<String> columns = new ArrayList<>();
+   private final List<String> categoricalColumns = new ArrayList<>();
+
+   private final List<String> targetColumns = new ArrayList<>();
+   private final List<String> categoricalTargetColumns = new ArrayList<>();
+
+   private final Map<String, List<String>> levels = new HashMap<>();
+
+   @Override public double target() {
+      return getTargetCell(targetColumnName());
+   }
+
+   @Override public String categoricalTarget() {
+      return getCategoricalTargetCell(categoricalTargetColumnName());
+   }
+
+   @Override
+   public double getTargetCell(String columnName){
+      return targets.getOrDefault(columnName, 0.0);
+   }
+
+   @Override
+   public String getCategoricalTargetCell(String columnName){
+      return categoricalTargets.getOrDefault(columnName, "");
+   }
+
+   @Override
+   public void setTargetCell(String columnName, double value) {
+      if(value == 0.0) {
+         targets.remove(columnName);
+      }
+      targets.put(columnName, value);
+   }
+
+   @Override
+   public DataRow setCategoricalTargetCell(String columnName, String value) {
+      if(StringUtils.isEmpty(value)) {
+         categoricalTargets.remove(columnName);
+      }
+      categoricalTargets.put(columnName, value);
+      return this;
+   }
+
+   @Override public DataRow setColumnNames(List<String> inputColumns) {
+      columns.clear();
+      columns.addAll(inputColumns);
+      return this;
+   }
+
+   @Override public DataRow setCategoricalColumnNames(List<String> inputColumns) {
+      categoricalColumns.clear();
+      categoricalColumns.addAll(inputColumns);
+      return this;
+   }
+
+   @Override public DataRow setLevels(Map<String, List<String>> levels){
+      this.levels.clear();
+      this.levels.putAll(levels);
+      return this;
+   }
+
+
+
+   @Override public DataRow setTargetColumnNames(List<String> outputColumns) {
+      targetColumns.clear();
+      targetColumns.addAll(outputColumns);
+      return this;
+   }
+
+   @Override public DataRow setCategoricalTargetColumnNames(List<String> outputColumns) {
+      categoricalTargetColumns.clear();
+      categoricalTargetColumns.addAll(outputColumns);
+      return this;
+   }
+
+   @Override public DataRow makeCopy() {
+      DataRow clone = new BasicDataRow();
+      clone.copy(this);
+      return clone;
+   }
+
+
+   @Override public void copy(DataRow that) {
+
+      targets.clear();
+      categoricalTargets.clear();
+      values.clear();
+      categoricalValues.clear();
+      columns.clear();
+      categoricalColumns.clear();
+      targetColumns.clear();
+      categoricalTargetColumns.clear();
+
+      for(String c : that.getTargetColumnNames()){
+         targets.put(c, that.getTargetCell(c));
+      }
+
+      for(String c : that.getColumnNames()) {
+         values.put(c, that.getCell(c));
+      }
+
+      for(String c : that.getCategoricalColumnNames()){
+         categoricalValues.put(c, that.getCategoricalCell(c));
+      }
+
+      for(String c : that.getCategoricalTargetColumnNames()) {
+         categoricalTargets.put(c, that.getCategoricalTargetCell(c));
+      }
+
+      setColumnNames(that.getColumnNames());
+      setCategoricalColumnNames(that.getCategoricalColumnNames());
+      setTargetColumnNames(that.getTargetColumnNames());
+      setCategoricalTargetColumnNames(that.getCategoricalTargetColumnNames());
+
+      levels.clear();
+      for(Map.Entry<String, List<String>> entry : that.getLevels().entrySet()){
+         levels.put(entry.getKey(), CollectionUtils.clone(entry.getValue(), x-> x));
+      }
+   }
+
+
+   @Override public String targetColumnName() {
+      return getTargetColumnNames().get(0);
+   }
+
+   @Override public String categoricalTargetColumnName() {
+      return getCategoricalTargetColumnNames().get(0);
+   }
+
+
+   @Override public double[] toArray() {
+      List<String> cols = getColumnNames();
+
+      List<Double> result = new ArrayList<>();
+      for(int i=0; i < cols.size(); ++i) {
+         result.add(getCell(cols.get(i)));
+      }
+
+      cols = getCategoricalColumnNames();
+      for(int i=0; i < cols.size(); ++i) {
+         String name = cols.get(i);
+         String val = getCategoricalCell(name);
+         List<String> levelsInFactor = levels.get(name);
+         int index = levelsInFactor.indexOf(val);
+         int count = levelsInFactor.size();
+         if(count == 2) count = 1;
+         for(int j=0; j < count; ++j){
+            result.add(j == index ? 1.0 : 0.0);
+         }
+      }
+
+      return CollectionUtils.toDoubleArray(result);
+   }
+
+   private void buildColumns(){
+      List<String> cols = values.keySet().stream().collect(Collectors.toList());
+      cols.sort(String::compareTo);
+      columns.addAll(cols);
+   }
+
+   private void buildCategoricalColumns(){
+      List<String> cols = categoricalValues.keySet().stream().collect(Collectors.toList());
+      cols.sort(String::compareTo);
+      categoricalColumns.addAll(cols);
+   }
+
+   private void buildTargetColumns(){
+      List<String> cols = targets.keySet().stream().collect(Collectors.toList());
+      cols.sort(String::compareTo);
+      targetColumns.addAll(cols);
+   }
+
+   private void buildCategoricalTargetColumns(){
+      List<String> cols = categoricalTargets.keySet().stream().collect(Collectors.toList());
+      cols.sort(String::compareTo);
+      categoricalTargetColumns.addAll(cols);
+   }
+
+   @Override public DataRow setCell(String columnName, double value) {
+      if(value == 0.0) {
+         values.remove(columnName);
+      }
+
+      values.put(columnName, value);
+      return this;
+   }
+
+
+   @Override public DataRow setCategoricalCell(String columnName, String value) {
+      if(StringUtils.isEmpty(value)) {
+         categoricalValues.remove(columnName);
+      }
+
+      categoricalValues.put(columnName, value);
+      return this;
+   }
+
+   @Override public List<String> getColumnNames() {
+      if(columns.size() < values.size()) {
+         buildColumns();
+      }
+      return columns;
+   }
+
+   @Override public List<String> getCategoricalColumnNames() {
+      if(categoricalColumns.size() < categoricalValues.size()){
+         buildCategoricalColumns();
+      }
+      return categoricalColumns;
+   }
+
+   @Override
+   public List<String> getTargetColumnNames() {
+      if(targetColumns.size() < targets.size()){
+         buildTargetColumns();
+      }
+      return targetColumns;
+   }
+
+   @Override
+   public List<String> getCategoricalTargetColumnNames() {
+      if(categoricalTargetColumns.size() < categoricalTargets.size()){
+         buildCategoricalTargetColumns();
+      }
+      return categoricalTargetColumns;
+   }
+
+   @Override public double getCell(String key) {
+      return values.getOrDefault(key, 0.0);
+   }
+
+   @Override public String getCategoricalCell(String key) {
+      return categoricalValues.getOrDefault(key, "");
+   }
+
+
+   @Override public Map<String, List<String>> getLevels() {
+      return levels;
+   }
+
+
+   @Override
+   public String toString(){
+      StringBuilder sb = new StringBuilder();
+      List<String> keys = getColumnNames();
+      for(int i=0; i < keys.size(); ++i){
+         if(i != 0){
+            sb.append(", ");
+         }
+         sb.append(keys.get(i)).append(":").append(getCell(keys.get(i)));
+      }
+
+      List<String> keys2 = getCategoricalColumnNames();
+      if(!keys2.isEmpty() && !keys.isEmpty()){
+         sb.append(", ");
+      }
+      for(int i=0; i < keys2.size(); ++i){
+         if(i != 0){
+            sb.append(", ");
+         }
+         sb.append(keys2.get(i)).append(":").append(getCategoricalCell(keys2.get(i)));
+      }
+      sb.append(" =>");
+
+      keys = getTargetColumnNames();
+      if(!keys.isEmpty()) {
+         sb.append(" (");
+         for (int i = 0; i < keys.size(); ++i) {
+            if (i != 0) {
+               sb.append(", ");
+            }
+            sb.append(keys.get(i)).append(":").append(getTargetCell(keys.get(i)));
+         }
+         sb.append(")");
+      }
+
+      keys = getCategoricalTargetColumnNames();
+      if(!keys.isEmpty()){
+         sb.append(" (");
+         for(int i=0; i < keys.size(); ++i){
+            if(i != 0){
+               sb.append(", ");
+            }
+            sb.append(keys.get(i)).append(":").append(getCategoricalTargetCell(keys.get(i)));
+         }
+         sb.append(")");
+      }
+
+
+      return sb.toString();
+   }
+}

+ 30 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/DataColumn.java

@@ -0,0 +1,30 @@
+package opennlp.tools.svm.data.frame;
+
+
+import java.util.List;
+
+
+/**
+ * Created by xschen on 21/5/2017.
+ */
+public interface DataColumn {
+   String getColumnName();
+
+   void setColumnName(String columnName);
+
+   boolean isCategorical();
+
+   void setLevels(List<String> levels);
+
+   List<String> getLevels();
+
+   boolean isOutputColumn();
+
+   default boolean isInputColumn(){
+      return !isOutputColumn();
+   }
+
+   default boolean isNumerical(){
+      return !isCategorical();
+   }
+}

+ 10 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/DataFileType.java

@@ -0,0 +1,10 @@
+package opennlp.tools.svm.data.frame;
+
+
+/**
+ * Created by xschen on 2/5/2017.
+ */
+public enum DataFileType {
+   Csv,
+   Memory, HeartScale
+}

+ 52 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/DataFrame.java

@@ -0,0 +1,52 @@
+package opennlp.tools.svm.data.frame;
+
+import opennlp.tools.svm.data.utils.TupleTwo;
+
+import java.util.List;
+import java.util.Map;
+import java.util.function.Predicate;
+import java.util.stream.Stream;
+
+
+/**
+ * Created by xschen on 28/4/2017.
+ */
+public interface DataFrame extends Iterable<DataRow> {
+   int rowCount();
+
+   DataRow row(int i);
+
+   List<InputDataColumn> getInputColumns();
+
+   List<OutputDataColumn> getOutputColumns();
+
+   List<DataColumn> getAllColumns();
+
+   List<String> rowArrayDescriptors();
+
+   void unlock();
+
+   boolean isLocked();
+
+   void lock();
+
+   DataRow newRow();
+
+   void addRow(DataRow row);
+
+   String head(int limit);
+
+   DataFrame shuffle();
+
+   TupleTwo<DataFrame, DataFrame> split(double ratio);
+
+   Stream<DataRow> stream();
+
+   DataFrame makeCopy();
+
+   DataFrame filter(Predicate<DataRow> predicate);
+
+   Iterable<? extends DataRow> rows();
+
+   Map<String, List<String>> getLevels();
+}

+ 298 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/DataQuery.java

@@ -0,0 +1,298 @@
+package opennlp.tools.svm.data.frame;
+
+import opennlp.tools.svm.data.utils.CsvUtils;
+import opennlp.tools.svm.data.utils.NumberUtils;
+import opennlp.tools.svm.data.utils.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+
+
+/**
+ * Created by xschen on 1/5/2017.
+ */
+
+
+
+public class DataQuery {
+
+   public interface DataFrameQueryBuilder {
+      DataFrameQueryBuilder skipRows(int skippedRowCount);
+      DataColumnBuilder selectColumn(int columnIndex);
+      DataFrame build();
+   }
+
+   public interface DataColumnBuilder {
+      DataColumnBuilder transform(Function<String, Object> columnTransformer);
+      default DataColumnBuilder asNumeric(){
+         return transform(StringUtils::parseDouble);
+      }
+      default DataColumnBuilder asCategory(){
+         return transform(String::trim);
+      }
+      DataFrameQueryBuilder asInput(String columnName);
+      DataFrameQueryBuilder asOutput(String columnName);
+   }
+
+   public interface FormatBuilder {
+      @Deprecated
+      SourceBuilder csv(String splitter, boolean skipFirstLine);
+      SourceBuilder csv(String splitter);
+      default SourceBuilder csv() {
+         return csv("\\s");
+      }
+
+      SourceBuilder libsvm();
+      DataTableBuilder blank();
+   }
+
+   public interface  DataTableBuilder {
+      DataTableBuilder newInput(String columnName);
+      DataTableBuilder newOutput(String columnName);
+      DataFrameQueryBuilder end();
+   }
+
+   public interface SourceBuilder {
+      DataFrameQueryBuilder from(InputStream inputStream);
+   }
+
+   private static class DataFrameColumn {
+      private int index;
+      private Function<String, Object> transformer;
+      private String columnName;
+
+      public DataFrameColumn(String columnName, int index, Function<String, Object> transformer){
+         this.columnName = columnName;
+         this.index = index;
+         this.transformer = transformer;
+      }
+   }
+
+   private static class DataFrameBuilderX implements SourceBuilder, DataFrameQueryBuilder, DataColumnBuilder, FormatBuilder, DataTableBuilder {
+
+      private final List<DataFrameColumn> inputColumns = new ArrayList<>();
+      private final List<DataFrameColumn> outputColumns = new ArrayList<>();
+      private InputStream dataInputStream;
+      private String csvSplitter = "\\s";
+      private DataFileType fileType;
+
+      @Deprecated
+      private boolean skipFirstLine = false;
+      private int skippedRowCount = 0;
+
+      private static final Logger logger = LoggerFactory.getLogger(DataFrameBuilderX.class);
+
+      private DataFrameColumn selected = null;
+
+      @Override public DataColumnBuilder selectColumn(int columnIndex) {
+         selected = new DataFrameColumn("", columnIndex, x -> x);
+         return this;
+      }
+
+      @Override public DataFrameQueryBuilder skipRows(int skippedRowCount) {
+         this.skippedRowCount = skippedRowCount;
+         return this;
+      }
+
+      @Override public DataFrame build() {
+         final BasicDataFrame dataFrame = new BasicDataFrame();
+
+         if(fileType == DataFileType.Csv) {
+            if(inputColumns.isEmpty()){
+               throw new RuntimeException("data frame should not have empty input columns");
+            }
+
+            int skippedLines = Math.max(this.skipFirstLine ? 1 : 0, this.skippedRowCount);
+
+            CsvUtils.csv(dataInputStream, csvSplitter, skippedLines, (words) -> {
+               DataRow row = dataFrame.newRow();
+
+               for (int i = 0; i < words.length; ++i) {
+                  for (DataFrameColumn c : inputColumns) {
+                     if (c.index == i) {
+                        Object data = c.transformer.apply(words[i]);
+                        if(data instanceof String){
+                           row.setCategoricalCell(c.columnName, (String)data);
+                        } else {
+                           row.setCell(c.columnName, NumberUtils.toDouble(data));
+                        }
+                     }
+                  }
+                  for (DataFrameColumn c : outputColumns) {
+                     if (c.index == i) {
+                        Object target = c.transformer.apply(words[i]);
+                        if(target instanceof String) {
+                           row.setCategoricalTargetCell(c.columnName, (String)target);
+                        } else {
+                           row.setTargetCell(c.columnName, NumberUtils.toDouble(target));
+                        }
+                     }
+                  }
+               }
+
+               dataFrame.addRow(row);
+               return true;
+            }, (e) -> logger.error("Failed to read csv file", e));
+         } else if(fileType == DataFileType.HeartScale) {
+            List<Map<Integer, String>> rows = CsvUtils.readHeartScale(dataInputStream);
+            if(inputColumns.isEmpty() && outputColumns.isEmpty()) {
+               for(Map<Integer, String> row : rows) {
+                  DataRow newRow = dataFrame.newRow();
+                  for(Map.Entry<Integer, String> entry : row.entrySet()){
+
+                     int columnIndex = entry.getKey();
+                     if(columnIndex != 0) {
+                        newRow.setCell("" + columnIndex, StringUtils.parseDouble(entry.getValue()));
+                     } else {
+                        newRow.setTargetCell("label", StringUtils.parseDouble(entry.getValue()));
+                     }
+                  }
+                  dataFrame.addRow(newRow);
+               }
+            } else if(inputColumns.isEmpty() || outputColumns.isEmpty()) {
+               throw new RuntimeException("data frame should not have either empty input columns or empty output columns");
+            } else {
+               for (Map<Integer, String> row : rows) {
+                  DataRow newRow = dataFrame.newRow();
+                  for (DataFrameColumn c : inputColumns) {
+                     Object data = c.transformer.apply(row.get(c.index));
+                     if(data instanceof String) {
+                        newRow.setCategoricalCell(c.columnName, (String)data);
+                     } else {
+                        newRow.setCell(c.columnName, NumberUtils.toDouble(data));
+                     }
+                  }
+                  for (DataFrameColumn c : outputColumns) {
+                     Object target = c.transformer.apply(row.get(c.index));
+                     if(target instanceof String) {
+                        newRow.setCategoricalTargetCell(c.columnName, (String) target);
+                     } else {
+                        newRow.setTargetCell(c.columnName, NumberUtils.toDouble(target));
+                     }
+                  }
+                  dataFrame.addRow(newRow);
+               }
+            }
+         } else if(fileType == DataFileType.Memory) {
+            dataFrame.getInputColumns().clear();
+            dataFrame.getOutputColumns().clear();
+
+            for(DataFrameColumn c : inputColumns) {
+               dataFrame.getInputColumns().add(new InputDataColumn(c.columnName));
+            }
+            for(DataFrameColumn c : outputColumns) {
+               dataFrame.getOutputColumns().add(new OutputDataColumn(c.columnName));
+            }
+         }
+
+         if(fileType != DataFileType.Memory) {
+            dataFrame.lock();
+         }
+
+         return dataFrame;
+      }
+
+      @Deprecated
+      @Override public SourceBuilder csv(String splitter, boolean skipFirstLine) {
+         this.skipFirstLine = skipFirstLine;
+         csvSplitter = splitter;
+         fileType = DataFileType.Csv;
+         return this;
+      }
+
+      @Override public SourceBuilder csv(String splitter){
+         csvSplitter = splitter;
+         fileType = DataFileType.Csv;
+         return this;
+      }
+
+      @Override public DataFrameQueryBuilder from(InputStream inputStream) {
+         dataInputStream = inputStream;
+         return this;
+      }
+
+
+      @Override public SourceBuilder libsvm() {
+         fileType = DataFileType.HeartScale;
+         return this;
+      }
+
+
+      @Override public DataTableBuilder blank() {
+         fileType = DataFileType.Memory;
+         return this;
+      }
+
+
+      @Override public DataColumnBuilder transform(Function<String, Object> columnTransformer) {
+         selected.transformer = columnTransformer;
+         return this;
+      }
+
+
+      @Override public DataFrameQueryBuilder asInput(String columnName) {
+         selected.columnName = columnName;
+         inputColumns.add(selected);
+         selected = null;
+         return this;
+      }
+
+
+      @Override public DataFrameQueryBuilder asOutput(String columnName) {
+         selected.columnName = columnName;
+         outputColumns.add(selected);
+         selected = null;
+         return this;
+      }
+
+
+      @Override public DataTableBuilder newInput(String columnName) {
+         inputColumns.add(new DataFrameColumn(columnName, -1, StringUtils::parseDouble));
+         return this;
+      }
+
+
+      @Override public DataTableBuilder newOutput(String columnName) {
+         outputColumns.add(new DataFrameColumn(columnName, -1, StringUtils::parseDouble));
+         return this;
+      }
+
+
+      @Override public DataFrameQueryBuilder end() {
+         if(inputColumns.isEmpty()){
+            throw new RuntimeException("input columns cannot be empty!");
+         }
+         if(outputColumns.isEmpty()) {
+            throw new RuntimeException("output columns cannot be empty!");
+         }
+         return this;
+      }
+   }
+
+
+   public static SourceBuilder libsvm() {
+      return new DataFrameBuilderX().libsvm();
+   }
+
+   @Deprecated
+   public static SourceBuilder csv(String splitter, boolean skipFirstLine) {
+      return new DataFrameBuilderX().csv(splitter, skipFirstLine);
+   }
+
+   public static SourceBuilder csv(String splitter) {
+      return new DataFrameBuilderX().csv(splitter);
+   }
+
+   public static SourceBuilder csv(){
+      return new DataFrameBuilderX().csv();
+   }
+
+   public static DataTableBuilder blank() {
+      return new DataFrameBuilderX().blank();
+   }
+}

+ 60 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/DataRow.java

@@ -0,0 +1,60 @@
+package opennlp.tools.svm.data.frame;
+
+
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * Created by xschen on 28/4/2017.
+ */
+public interface DataRow {
+   double target();
+   String categoricalTarget();
+
+   double[] toArray();
+
+   DataRow setCell(String columnName, double value);
+
+   DataRow setCategoricalCell(String columnName, String value);
+
+   List<String> getColumnNames();
+
+   List<String> getCategoricalColumnNames();
+
+   List<String> getTargetColumnNames();
+
+   List<String> getCategoricalTargetColumnNames();
+
+   double getCell(String key);
+
+   double getTargetCell(String columnName);
+
+   String getCategoricalTargetCell(String columnName);
+
+   void setTargetCell(String columnName, double value);
+
+   DataRow setCategoricalTargetCell(String columnName, String label);
+
+   DataRow setColumnNames(List<String> inputColumns);
+
+   DataRow setCategoricalColumnNames(List<String> inputColumns);
+
+   DataRow setLevels(Map<String, List<String>> levels);
+
+   DataRow setTargetColumnNames(List<String> outputColumns);
+
+   DataRow setCategoricalTargetColumnNames(List<String> outputColumns);
+
+   DataRow makeCopy();
+
+   void copy(DataRow that);
+
+   String targetColumnName();
+
+   String categoricalTargetColumnName();
+
+   String getCategoricalCell(String key);
+
+   Map<String,List<String>> getLevels();
+}

+ 80 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/InputDataColumn.java

@@ -0,0 +1,80 @@
+package opennlp.tools.svm.data.frame;
+
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Created by xschen on 29/4/2017.
+ */
+public class InputDataColumn implements Serializable, DataColumn {
+
+   private static final long serialVersionUID = -3355436850471047863L;
+   private int sourceColumnIndex;
+   private String columnName;
+   private final List<String> levels = new ArrayList<>();
+
+   public InputDataColumn(){
+
+   }
+
+   public InputDataColumn(String columnName) {
+      this.columnName = columnName;
+   }
+
+   public InputDataColumn makeCopy() {
+      InputDataColumn clone = new InputDataColumn();
+
+      clone.copy(this);
+      return clone;
+   }
+
+   public void copy(InputDataColumn that) {
+      this.sourceColumnIndex = that.sourceColumnIndex;
+      this.columnName = that.columnName;
+      this.levels.clear();
+      this.levels.addAll(that.levels);
+   }
+
+   public boolean isCategorical(){
+      return !levels.isEmpty();
+   }
+
+   public void setSourceColumnIndex(int key) {
+      this.sourceColumnIndex = key;
+   }
+
+   public void setColumnName(String columnName) {
+      this.columnName = columnName;
+   }
+
+   public String getColumnName() {
+      return columnName;
+   }
+
+   public void setLevels(List<String> levels) {
+      this.levels.clear();
+      this.levels.addAll(levels);
+   }
+
+   public List<String> getLevels(){
+      return levels;
+   }
+
+   @Override
+   public String toString(){
+      return columnName;
+   }
+
+
+   public String summary() {
+      return columnName + ":discrete=" + levels.size();
+   }
+
+   @Override
+   public boolean isOutputColumn(){
+      return false;
+   }
+}

+ 68 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/OutputDataColumn.java

@@ -0,0 +1,68 @@
+package opennlp.tools.svm.data.frame;
+
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Created by xschen on 5/5/2017.
+ */
+public class OutputDataColumn implements Serializable, DataColumn {
+   private static final long serialVersionUID = -3482432625139406828L;
+   private String columnName;
+   private final List<String> levels = new ArrayList<>();
+
+   public OutputDataColumn(){
+
+   }
+
+   public OutputDataColumn(String columnName) {
+      this.columnName = columnName;
+   }
+
+   @Override
+   public String getColumnName(){
+      return columnName;
+   }
+
+   @Override
+   public void setColumnName(String columnName) {
+      this.columnName = columnName;
+   }
+
+   @Override
+   public boolean isCategorical(){
+      return !levels.isEmpty();
+   }
+
+
+   public OutputDataColumn makeCopy() {
+      OutputDataColumn clone = new OutputDataColumn(columnName);
+      clone.copy(this);
+      return clone;
+   }
+
+   @Override
+   public void setLevels(List<String> levels) {
+      this.levels.clear();
+      this.levels.addAll(levels);
+   }
+
+   @Override
+   public List<String> getLevels(){
+      return levels;
+   }
+
+   public void copy(OutputDataColumn that){
+      columnName = that.columnName;
+      levels.clear();
+      levels.addAll(that.levels);
+   }
+
+   @Override
+   public boolean isOutputColumn(){
+      return true;
+   }
+}

+ 95 - 0
gtbook/src/main/java/opennlp/tools/svm/data/frame/Sampler.java

@@ -0,0 +1,95 @@
+package opennlp.tools.svm.data.frame;
+
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.BiFunction;
+import java.util.stream.Collectors;
+
+
+/**
+ * Created by xschen on 5/5/2017.
+ */
+public class Sampler {
+
+   public Sampler(){
+
+   }
+
+   public ColumnBuilder forColumn(String column){
+      return new SampleBuilder().forColumn(column);
+   }
+
+   public interface RowBuilder {
+      ColumnBuilder forColumn(String columnName);
+      DataSampleBuilder end();
+   }
+
+   public interface ColumnBuilder {
+      RowBuilder generate(BiFunction<String, Integer, Double> generator);
+   }
+
+   public interface DataSampleBuilder {
+      DataFrame sample(DataFrame dataFrame, int count);
+   }
+
+   private static class SampleBuilder implements RowBuilder, ColumnBuilder, DataSampleBuilder {
+
+      private int count;
+      private String currentColumnName;
+
+      private Map<String, BiFunction<String, Integer, Double>> generators = new HashMap<>();
+
+      public SampleBuilder() {
+
+      }
+
+
+      @Override public RowBuilder generate(BiFunction<String, Integer, Double> generator) {
+         generators.put(currentColumnName, generator);
+         return this;
+      }
+
+
+      @Override public ColumnBuilder forColumn(String columnName) {
+         currentColumnName = columnName;
+         return this;
+      }
+
+
+      @Override public DataSampleBuilder end() {
+         return this;
+      }
+
+
+      @Override public DataFrame sample(DataFrame dataFrame, int count) {
+         if(generators.isEmpty()) {
+            throw new RuntimeException("No column generators are designed to build a row!");
+         }
+
+         this.count = count;
+         currentColumnName = null;
+         dataFrame.unlock();
+         Set<String> outputColumns = dataFrame.getOutputColumns().stream().map(OutputDataColumn::getColumnName).collect(Collectors.toSet());
+
+         for(int i=0; i < count; ++i) {
+            DataRow newRow = dataFrame.newRow();
+            for(Map.Entry<String, BiFunction<String, Integer, Double>> entry : generators.entrySet()) {
+               String columnName = entry.getKey();
+               BiFunction<String, Integer, Double> generator = entry.getValue();
+               if(outputColumns.contains(columnName)){
+                  newRow.setTargetCell(columnName, generator.apply(columnName, i));
+               } else {
+                  newRow.setCell(columnName, generator.apply(columnName, i));
+               }
+            }
+
+            dataFrame.addRow(newRow);
+         }
+         dataFrame.lock();
+
+         return dataFrame;
+      }
+   }
+}

+ 60 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/CollectionUtils.java

@@ -0,0 +1,60 @@
+package opennlp.tools.svm.data.utils;
+
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Function;
+
+
+/**
+ * Created by xschen on 1/5/2017.
+ */
+public class CollectionUtils {
+   public static <T> List<T> clone(List<T> that, Function<T, T> transformer) {
+      List<T> result = new ArrayList<>();
+      for(int i=0; i < that.size(); ++i){
+         result.add(transformer.apply(that.get(i)));
+      }
+      return result;
+   }
+
+
+   public static <T> List<T> toList(T[] that, Function<T, T> transformer) {
+      List<T> result = new ArrayList<>();
+      for(int i=0; i < that.length; ++i){
+         result.add(transformer.apply(that[i]));
+      }
+      return result;
+   }
+
+   public static List<Double> toList(double[] that) {
+      List<Double> result = new ArrayList<>();
+      for(int i=0; i < that.length; ++i){
+         result.add(that[i]);
+      }
+      return result;
+   }
+
+   public static <T> void exchange(List<T> a, int i, int j) {
+      T temp = a.get(i);
+      a.set(i, a.get(j));
+      a.set(j, temp);
+   }
+
+
+   public static double[] toDoubleArray(List<Double> list) {
+      double[] result = new double[list.size()];
+      for(int i=0; i < list.size(); ++i) {
+         result[i] = list.get(i);
+      }
+      return result;
+   }
+
+   public static String[] toArray(List<String> list) {
+      String[] result = new String[list.size()];
+      for(int i=0; i < list.size(); ++i) {
+         result[i] = list.get(i);
+      }
+      return result;
+   }
+}

+ 206 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/CountRepository.java

@@ -0,0 +1,206 @@
+package opennlp.tools.svm.data.utils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * Created by xschen on 18/8/15.
+ */
+public class CountRepository {
+    private Map<String, CountRepository> chain = new HashMap<String, CountRepository>();
+    private String eventName;
+    private double storedValue;
+
+    public void copy(CountRepository rhs){
+        storedValue = rhs.storedValue;
+        eventName = rhs.eventName;
+        chain.clear();
+
+        for(String key : rhs.chain.keySet()){
+            chain.put(key, rhs.chain.get(key).makeCopy());
+        }
+
+    }
+
+
+    public CountRepository makeCopy(){
+        CountRepository clone = new CountRepository();
+        clone.copy(this);
+
+        return clone;
+    }
+
+    public CountRepository(){
+        storedValue = 0;
+    }
+
+    public CountRepository(String evt){
+        this.eventName = evt;
+    }
+
+    public String getEventName(){
+        return eventName;
+    }
+
+    public double getStoredValue(){
+        return storedValue;
+    }
+
+    public Map<String, CountRepository> getChain(){
+        return chain;
+    }
+
+    public List<String> getSubEventNames(String... eventNames){
+        return getSubEventNames(this, eventNames);
+    }
+
+    private List<String> getSubEventNames(CountRepository repo, String... eventNames){
+        int eventNameCount = eventNames.length;
+        if(eventNameCount == 0){
+            List<String> events = new ArrayList<>();
+            for(String eventName : repo.chain.keySet()){
+                events.add(eventName);
+            }
+            return events;
+        }
+
+        if(repo.chain.containsKey(eventNames[0])) {
+            repo = repo.chain.get(eventNames[0]);
+
+            if (eventNameCount == 1) {
+                return getSubEventNames(repo);
+            } else {
+                String[] subevents = new String[eventNameCount - 1];
+                for (int j = 1; j < eventNameCount; ++j) {
+                    subevents[j - 1] = eventNames[j];
+                }
+                return getSubEventNames(repo, subevents);
+            }
+        }else{
+            return new ArrayList<>();
+        }
+    }
+
+    public void addSupportCount(String... events){
+        addSupportCount(1, events);
+    }
+
+    public void addSupportCount(double increment, String... events){
+        int eventCount = events.length;
+        if(eventCount==0){
+            storedValue +=increment;
+        }else {
+            String evt = events[0];
+            CountRepository c;
+            if (chain.containsKey(evt)) {
+                c = chain.get(evt);
+            } else {
+                c = new CountRepository(evt);
+                chain.put(evt, c);
+            }
+            if (eventCount == 1) {
+                c.addSupportCount(increment);
+            } else {
+                String[] subevents = new String[eventCount - 1];
+                for (int j = 1; j < eventCount; ++j) {
+                    subevents[j - 1] = events[j];
+                }
+                c.addSupportCount(increment, subevents);
+            }
+        }
+    }
+
+    public void setValue(double value, String... events){
+        int eventCount = events.length;
+        if(eventCount==0){
+            storedValue =value;
+        }else {
+            String evt = events[0];
+            CountRepository c;
+            if (chain.containsKey(evt)) {
+                c = chain.get(evt);
+            } else {
+                c = new CountRepository(evt);
+                chain.put(evt, c);
+            }
+            if (eventCount == 1) {
+                c.setValue(value);
+            } else {
+                String[] subevents = new String[eventCount - 1];
+                for (int j = 1; j < eventCount; ++j) {
+                    subevents[j - 1] = events[j];
+                }
+                c.setValue(value, subevents);
+            }
+        }
+    }
+
+    public double getProbability(String eventName){
+        if(storedValue ==0) return 0;
+        double count = getSupportCount(eventName);
+        return count / storedValue;
+    }
+
+    // return the conditional probability of B given A happened
+    public double getConditionalProbability(String eventA, String eventB){
+        double givenCount = getSupportCount(eventA);
+        if(givenCount == 0) return 0;
+        return getSupportCount(eventA, eventB) / givenCount;
+
+    }
+
+    public double getSupportCount(String... events){
+        int eventCount = events.length;
+        if(eventCount == 0){
+            return storedValue;
+        }
+        else{
+            String evt = events[0];
+            CountRepository c = null;
+            if(chain.containsKey(evt)){
+                c = chain.get(evt);
+            }
+
+            if(c == null) return 0;
+
+            if(eventCount == 1){
+                return c.getStoredValue();
+            }else{
+                String[] subevents = new String[eventCount-1];
+                for(int j=1; j < eventCount; ++j){
+                    subevents[j-1] = events[j];
+                }
+                return c.getSupportCount(subevents);
+            }
+        }
+    }
+
+    public double getValue(String... events){
+        int eventCount = events.length;
+        if(eventCount == 0){
+            return storedValue;
+        }
+        else{
+            String evt = events[0];
+            CountRepository c = null;
+            if(chain.containsKey(evt)){
+                c = chain.get(evt);
+            }
+
+            if(c == null) return 0;
+
+            if(eventCount == 1){
+                return c.getValue();
+            }else{
+                String[] subevents = new String[eventCount-1];
+                for(int j=1; j < eventCount; ++j){
+                    subevents[j-1] = events[j];
+                }
+                return c.getSupportCount(subevents);
+            }
+        }
+    }
+}

+ 137 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/CsvUtils.java

@@ -0,0 +1,137 @@
+package opennlp.tools.svm.data.utils;
+
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.*;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+
+/**
+ * Created by xschen on 1/5/2017.
+ */
+public class CsvUtils {
+   public static final String quoteSplitPM = "(?=([^\"]*\"[^\"]*\")*[^\"]*$)";
+   private static final Logger logger = LoggerFactory.getLogger(CsvUtils.class);
+
+
+
+   public static int atoi(String s)
+   {
+      int value = 0;
+      try {
+         value = Integer.parseInt(s);
+      }catch(NumberFormatException ex){
+         value = 0;
+      }
+      return value;
+   }
+
+   public static List<Map<Integer, String>> readHeartScale(InputStream inputStream){
+      try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))){
+         List<String> lines = reader.lines().collect(Collectors.toList());
+         return lines.stream()
+                 .filter(line -> !StringUtils.isEmpty(line))
+                 .map(line -> {
+
+                    StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:");
+
+                    String label = st.nextToken();
+                    Map<Integer, String> row = new HashMap<>();
+
+                    int m = st.countTokens() / 2;
+                    for (int j = 0; j < m; j++) {
+                       int index = atoi(st.nextToken());
+                       String value = st.nextToken();
+
+                       row.put(index, value);
+                    }
+
+
+                    row.put(0, label);
+                    return row;
+                 })
+                 .collect(Collectors.toList());
+      }
+      catch (IOException e) {
+         logger.error("Failed to read the heartScale data", e);
+      }
+
+      return new ArrayList<>();
+
+
+   }
+
+   public static boolean csv(InputStream inputStream, String cvsSplitBy, int skippedLineCount, Function<String[], Boolean> onLineReady, Consumer<Exception> onFailed){
+
+      String line;
+      if(cvsSplitBy==null) cvsSplitBy = ",";
+
+
+      boolean success = true;
+      try(BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) {
+         int lineCount = 0;
+         while ((line = br.readLine()) != null) {
+
+            lineCount++;
+
+            if(lineCount <= skippedLineCount) {
+               continue;
+            }
+
+            line = line.trim();
+
+            if(line.equals("")) continue;
+
+            boolean containsQuote = false;
+            if(line.contains("\"")){
+               containsQuote = true;
+               cvsSplitBy = cvsSplitBy + quoteSplitPM;
+            }
+
+            String[] values = filterEmpty(line.split(cvsSplitBy));
+
+            if(containsQuote){
+               for(int i=0; i < values.length; ++i){
+                  values[i] = StringUtils.stripQuote(values[i]);
+               }
+            }
+
+            if(onLineReady != null){
+               onLineReady.apply(values);
+            }
+
+         }
+
+      }
+      catch (IOException e) {
+         success = false;
+         if(onFailed != null) onFailed.accept(e);
+         else e.printStackTrace();
+      }
+
+      return success;
+   }
+
+   private static String[] filterEmpty(String[] a) {
+      List<String> result = new ArrayList<>();
+      for(int i=0; i < a.length; ++i){
+         String v = a[i].trim();
+         if(StringUtils.isEmpty(v)){
+            continue;
+         }
+         result.add(v);
+      }
+
+      return CollectionUtils.toArray(result);
+
+   }
+
+}

+ 16 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/Mean.java

@@ -0,0 +1,16 @@
+package opennlp.tools.svm.data.utils;
+
+/**
+ * Created by xschen on 14/8/15.
+ */
+public class Mean {
+    public static double apply(double[] values){
+        int length = values.length;
+        if(length==0) return Double.NaN;
+        double sum = 0;
+        for(int i=0; i < length; ++i){
+            sum += values[i];
+        }
+        return sum / length;
+    }
+}

+ 56 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/NumberUtils.java

@@ -0,0 +1,56 @@
+package opennlp.tools.svm.data.utils;
+
+import opennlp.tools.svm.data.exceptions.NotImplementedException;
+
+import java.math.BigInteger;
+
+
+/**
+ * Created by xschen on 1/5/2017.
+ */
+public class NumberUtils {
+   public static int toInt(double value){
+      return (int)value;
+   }
+
+   public static boolean isZero(Double val) {
+      return Math.abs(val) < 0.0000000000000000000001;
+   }
+
+   public static double toDouble(Object obj) {
+      if(obj instanceof Double){
+         return (Double)obj;
+      } else if(obj instanceof String) {
+         return Double.parseDouble((String)obj);
+      } else if(obj instanceof Float) {
+         return ((Float)obj).doubleValue();
+      } else if(obj instanceof Integer) {
+         return ((Integer)obj).doubleValue();
+      } else if(obj instanceof Long) {
+         return ((Long)obj).doubleValue();
+      } else if(obj instanceof Boolean) {
+         return (Boolean) obj ? 1.0 : 0.0;
+      } else if(obj instanceof BigInteger) {
+         return ((BigInteger)obj).doubleValue();
+      } else if(obj == null) {
+         return 0;
+      } else {
+
+         throw new NotImplementedException();
+      }
+   }
+
+   public static double[] toDoubleArray( float[] array) {
+      if (array == null) {
+         return null;
+      }
+      if (array.length == 0) {
+         return new double[0];
+      }
+      final double[] result = new double[array.length];
+      for (int i = 0; i < array.length; i++) {
+         result[i] = array[i];
+      }
+      return result;
+   }
+}

+ 127 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/Scaler.java

@@ -0,0 +1,127 @@
+package opennlp.tools.svm.data.utils;
+
+//import com.github.svm.data.frame.DataFrame;
+//import com.github.svm.data.frame.DataRow;
+import opennlp.tools.svm.data.frame.InputDataColumn;
+import opennlp.tools.svm.data.frame.OutputDataColumn;
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+
+/**
+ * Created by xschen on 5/5/2017.
+ */
+public class Scaler {
+   private Map<String, Double> means = new HashMap<>();
+   private Map<String, Double> sds = new HashMap<>();
+
+   public Scaler makeCopy() {
+      Scaler clone = new Scaler();
+      clone.copy(this);
+      return clone;
+   }
+
+   public void copy(Scaler that) {
+      means.clear();
+      sds.clear();
+      means.putAll(that.means);
+      sds.putAll(that.sds);
+   }
+
+   public void fit(DataFrame frame) {
+
+      means.clear();
+      sds.clear();
+
+      List<String> inputColumns = frame.getInputColumns().stream().map(InputDataColumn::getColumnName).collect(Collectors.toList());
+      List<String> outputColumns = frame.getOutputColumns().stream().map(OutputDataColumn::getColumnName).collect(Collectors.toList());
+
+
+      for(String c : inputColumns){
+
+         double[] values = new double[frame.rowCount()];
+         for(int i=0; i < frame.rowCount(); ++i){
+            double value = frame.row(i).getCell(c);
+            values[i] = value;
+         }
+
+         double mean = Mean.apply(values);
+         means.put(c, mean);
+
+         double sd = StdDev.apply(values, mean);
+         sds.put(c, sd);
+      }
+
+      for(String c : outputColumns){
+
+         double[] values = new double[frame.rowCount()];
+         for(int i=0; i < frame.rowCount(); ++i){
+            double value = frame.row(i).getTargetCell(c);
+            values[i] = value;
+         }
+
+         double mean = Mean.apply(values);
+         means.put(c, mean);
+
+         double sd = StdDev.apply(values, mean);
+         sds.put(c, sd);
+      }
+   }
+
+   public double transform(String columnName, double value) {
+      double mean = means.getOrDefault(columnName, 0.0);
+      double sd = sds.getOrDefault(columnName, 0.0);
+
+      if(sd != 0){
+         return (value - mean) / sd;
+      } else {
+         return value;
+      }
+   }
+
+   public double inverseTransform(String columnName, double value) {
+      double mean = means.getOrDefault(columnName, 0.0);
+      double sd = sds.getOrDefault(columnName, 0.0);
+
+      if(sd != 0){
+         return value * sd + mean;
+      } else {
+         return value;
+      }
+   }
+
+   public DataRow transform(DataRow row) {
+      DataRow scaled = row.makeCopy();
+      List<String> inputColumns = scaled.getColumnNames();
+      for(String c : inputColumns){
+         scaled.setCell(c, transform(c, scaled.getCell(c)));
+      }
+
+      List<String> outputColumns = scaled.getTargetColumnNames();
+      for(String c : outputColumns) {
+         scaled.setTargetCell(c, transform(c, scaled.getTargetCell(c)));
+      }
+
+      return scaled;
+   }
+
+   public DataRow inverseTransform(DataRow row) {
+      DataRow scaled = row.makeCopy();
+      List<String> inputColumns = scaled.getColumnNames();
+      for(String c : inputColumns){
+         scaled.setCell(c, inverseTransform(c, scaled.getCell(c)));
+      }
+
+      List<String> outputColumns = scaled.getTargetColumnNames();
+      for(String c : outputColumns) {
+         scaled.setTargetCell(c, inverseTransform(c, scaled.getTargetCell(c)));
+      }
+
+      return scaled;
+   }
+}

+ 10 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/StdDev.java

@@ -0,0 +1,10 @@
+package opennlp.tools.svm.data.utils;
+
+/**
+ * Created by xschen on 14/8/15.
+ */
+public class StdDev {
+    public static double apply(double[] values, double mu){
+        return Math.sqrt(Variance.apply(values, mu));
+    }
+}

+ 27 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/StringUtils.java

@@ -0,0 +1,27 @@
+package opennlp.tools.svm.data.utils;
+
+
+/**
+ * Created by xschen on 1/5/2017.
+ */
+public class StringUtils {
+   public static double parseDouble(String text) {
+      try {
+         return Double.parseDouble(text);
+      } catch(NumberFormatException ex) {
+         return 0;
+      }
+   }
+
+   public static String stripQuote(String sentence){
+      if(sentence.startsWith("\"") && sentence.endsWith("\"")){
+         return sentence.substring(1, sentence.length()-1);
+      }
+      return sentence;
+   }
+
+
+   public static boolean isEmpty(String line) {
+      return line == null || line.equals("");
+   }
+}

+ 45 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/TupleTwo.java

@@ -0,0 +1,45 @@
+package opennlp.tools.svm.data.utils;
+
+
+/**
+ * Created by xschen on 4/5/2017.
+ */
+public class TupleTwo<T, T2> {
+   private final T v1;
+   private final T2 v2;
+
+   public TupleTwo(T v1, T2 v2) {
+      this.v1 = v1;
+      this.v2 = v2;
+   }
+
+   public T _1(){
+      return v1;
+   }
+
+   public T2 _2(){
+      return v2;
+   }
+
+
+   @Override public boolean equals(Object o) {
+      if (this == o)
+         return true;
+      if (o == null || getClass() != o.getClass())
+         return false;
+
+      TupleTwo<?, ?> tupleTwo = (TupleTwo<?, ?>) o;
+
+      if (v1 != null ? !v1.equals(tupleTwo.v1) : tupleTwo.v1 != null)
+         return false;
+      return v2 != null ? v2.equals(tupleTwo.v2) : tupleTwo.v2 == null;
+
+   }
+
+
+   @Override public int hashCode() {
+      int result = v1 != null ? v1.hashCode() : 0;
+      result = 31 * result + (v2 != null ? v2.hashCode() : 0);
+      return result;
+   }
+}

+ 20 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/Variance.java

@@ -0,0 +1,20 @@
+package opennlp.tools.svm.data.utils;
+
+/**
+ * Created by xschen on 14/8/15.
+ */
+public class Variance {
+    public static double apply(double[] values, double mu) {
+        int length = values.length;
+        if(length <= 1) return Double.NaN;
+
+        double num1;
+        double sum = 0;
+        for(int i=0; i < length; ++i){
+            num1 = (values[i] - mu);
+            sum += num1 * num1;
+        }
+
+        return sum / (length - 1);
+    }
+}

+ 14 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/discretizers/AttributeValueDiscretizer.java

@@ -0,0 +1,14 @@
+package opennlp.tools.svm.data.utils.discretizers;
+
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+
+
+/**
+ * Created by xschen on 18/8/15.
+ */
+public interface AttributeValueDiscretizer  {
+    int discretize(double value, String index);
+    DataRow transform(DataRow tuple);
+    DataFrame fitAndTransform(DataFrame frame);
+}

+ 136 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/discretizers/KMeansDiscretizer.java

@@ -0,0 +1,136 @@
+package opennlp.tools.svm.data.utils.discretizers;
+
+import lombok.AccessLevel;
+import lombok.Getter;
+import lombok.Setter;
+import opennlp.tools.svm.data.frame.*;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * Created by xschen on 18/8/15.
+ */
+@Getter
+@Setter
+public class KMeansDiscretizer implements AttributeValueDiscretizer, Serializable {
+
+    private static final long serialVersionUID = 2193706516691610469L;
+    @Setter(AccessLevel.NONE)
+    private final Map<String, KMeansFilter> filters = new HashMap<>();
+
+    private int maxLevelCount = 10;
+
+    private int maxIters = 500;
+
+    public void copy(KMeansDiscretizer that){
+        maxLevelCount = that.maxLevelCount;
+
+        filters.clear();
+        for(String index : that.filters.keySet()){
+            filters.put(index, that.filters.get(index).makeCopy());
+        }
+    }
+
+    public KMeansDiscretizer makeCopy(){
+        KMeansDiscretizer clone = new KMeansDiscretizer();
+        clone.copy(this);
+        return clone;
+    }
+
+    public KMeansDiscretizer(){
+
+    }
+
+    @Override
+    public int discretize(double value, String columnName) {
+        if(filters.containsKey(columnName)){
+            return filters.get(columnName).discretize(value);
+        }else{
+            return (int)value;
+        }
+    }
+
+
+    @Override public DataRow transform(DataRow tuple) {
+        DataRow newRow = new BasicDataRow();
+
+        for(String columnName : tuple.getCategoricalColumnNames()){
+            newRow.setCategoricalCell(columnName, tuple.getCategoricalCell(columnName));
+        }
+
+        for(String columnName: tuple.getColumnNames()){
+            int value = discretize(tuple.getCell(columnName), columnName);
+            newRow.setCategoricalCell(columnName, "" + value);
+        }
+
+        for(String columnName : tuple.getCategoricalTargetColumnNames()){
+            newRow.setCategoricalTargetCell(columnName, tuple.getCategoricalTargetCell(columnName));
+        }
+
+        for(String columnName : tuple.getTargetColumnNames()){
+            int value = discretize(tuple.getTargetCell(columnName), columnName);
+            newRow.setCategoricalTargetCell(columnName, "" + value);
+        }
+
+        return newRow;
+
+    }
+
+
+    @Override public DataFrame fitAndTransform(DataFrame frame) {
+        fit(frame);
+
+        DataFrame newFrame = new BasicDataFrame();
+
+        for(int rowIndex = 0; rowIndex < frame.rowCount(); ++rowIndex){
+            newFrame.addRow(transform(frame.row(rowIndex)));
+        }
+
+        newFrame.lock();
+
+        return newFrame;
+    }
+
+    public void fit(DataFrame frame) {
+
+        int m = frame.rowCount();
+
+        filters.clear();
+        for(DataColumn c : frame.getAllColumns()){
+            if(!c.isCategorical()){
+                KMeansFilter f = new KMeansFilter(c.getColumnName(), maxLevelCount);
+                f.setMaxIters(maxIters);
+                filters.put(c.getColumnName(), f);
+            }
+        }
+
+        Map<String, List<Double>> values = new HashMap<>();
+        for(String columnName : filters.keySet()){
+            values.put(columnName, new ArrayList<>());
+        }
+
+        for(int i=0; i < m; ++i){
+            DataRow tuple = frame.row(i);
+            for(DataColumn c : frame.getAllColumns()) {
+                if(!c.isCategorical()) {
+                    if(c.isOutputColumn()) {
+                        values.get(c.getColumnName()).add(tuple.getTargetCell(c.getColumnName()));
+                    } else {
+                        values.get(c.getColumnName()).add(tuple.getCell(c.getColumnName()));
+                    }
+                }
+            }
+        }
+
+        for(Map.Entry<String, KMeansFilter> filter : filters.entrySet()){
+            filter.getValue().build(values.get(filter.getKey()));
+        }
+    }
+
+
+}

+ 140 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/discretizers/KMeansFilter.java

@@ -0,0 +1,140 @@
+package opennlp.tools.svm.data.utils.discretizers;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+
+
+/**
+ * Created by xschen on 18/8/15.
+ */
+public class KMeansFilter implements Serializable {
+
+    private static final long serialVersionUID = -5850969045455896812L;
+    private static Random random = new Random();
+    private int clusterCount;
+    private double[] clusters;
+    private int maxIters = 500;
+    private String columnName;
+
+    public KMeansFilter makeCopy(){
+        KMeansFilter clone = new KMeansFilter();
+        clone.copy(this);
+        return clone;
+    }
+
+    public void copy(KMeansFilter rhs)
+    {
+        clusterCount = rhs.clusterCount;
+        columnName = rhs.columnName;
+        clusters = rhs.clusters == null ? null : rhs.clusters.clone();
+        maxIters = rhs.maxIters;
+    }
+
+    public KMeansFilter(String columnName, int k) {
+        this.columnName = columnName;
+        clusterCount = k;
+    }
+
+    public KMeansFilter(){
+
+        clusterCount = 10;
+    }
+
+    public int getClusterCount() {
+        return clusterCount;
+    }
+
+    public void setClusterCount(int clusterCount) {
+        this.clusterCount = clusterCount;
+    }
+
+    public int getMaxIters() {
+        return maxIters;
+    }
+
+    public void setMaxIters(int maxIters) {
+        this.maxIters = maxIters;
+    }
+
+    public void build(List<Double> values) {
+        int m = values.size();
+        HashSet<Integer> initialCenters = new HashSet<Integer>();
+        if(clusterCount * 3 > m) {
+            clusterCount = Math.min(clusterCount, m);
+            for(int i=0; i < clusterCount; ++i){
+                initialCenters.add(i);
+            }
+        }
+        else{
+            while (initialCenters.size() < clusterCount) {
+                int r = random.nextInt(m);
+                if (!initialCenters.contains(r)) {
+                    initialCenters.add(r);
+                }
+            }
+        }
+
+        clusters = new double[clusterCount];
+
+        int centerIndex = 0;
+        for(Integer index : initialCenters){
+            clusters[centerIndex] = values.get(index);
+            centerIndex++;
+        }
+
+        List<List<Integer>> cluster_groups = new ArrayList<List<Integer>>();
+
+        for(int i=0; i< clusterCount; ++i){
+            cluster_groups.add(new ArrayList<Integer>());
+        }
+
+        for(int iter= 0; iter < maxIters; ++iter) {
+            for(int i=0; i < clusterCount; ++i){
+                cluster_groups.get(i).clear();
+            }
+
+            for (int i = 0; i < m; ++i) {
+                int clusterIndex = closestClusterIndex(values.get(i));
+
+                cluster_groups.get(clusterIndex).add(i);
+            }
+
+            for(int i=0; i < clusterCount; ++i){
+                clusters[i] = calcCenter(values, cluster_groups.get(i), clusters[i]);
+            }
+
+        }
+    }
+
+    private double calcCenter(List<Double> values, List<Integer> cluster, double center){
+        double newCenter = 0;
+        int m = cluster.size();
+        for(int i=0; i < m; ++i){
+            newCenter += values.get(cluster.get(i));
+        }
+        if(m==0) return center;
+        newCenter /= m;
+        return newCenter;
+    }
+
+    public int discretize(double value) {
+        return closestClusterIndex(value);
+    }
+
+    private int closestClusterIndex(double value){
+        double min_distance = Double.MAX_VALUE;
+        int closest_cluster_index = -1;
+        double distance;
+        for(int i=0; i < clusters.length; ++i){
+            distance = (clusters[i] - value) * (clusters[i] - value);
+            if(distance < min_distance){
+                min_distance = distance;
+                closest_cluster_index = i;
+            }
+        }
+        return closest_cluster_index;
+    }
+}

+ 104 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/transforms/ComplementaryCoding.java

@@ -0,0 +1,104 @@
+package opennlp.tools.svm.data.utils.transforms;
+
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+
+import java.util.List;
+
+
+/**
+ * Created by xschen on 21/8/15.
+ */
+public class ComplementaryCoding implements Cloneable {
+    private double[] minValues;
+    private double[] maxValues;
+
+    public void copy(ComplementaryCoding rhs){
+        minValues = rhs.minValues.clone();
+        maxValues = rhs.maxValues.clone();
+    }
+
+    @Override
+    public Object clone() throws CloneNotSupportedException {
+        ComplementaryCoding clone = (ComplementaryCoding)super.clone();
+        clone.copy(this);
+
+        return clone;
+    }
+
+    public ComplementaryCoding(){
+
+    }
+
+    public ComplementaryCoding(DataFrame batch) {
+        query(batch);
+    }
+
+    public ComplementaryCoding(List<double[]> batch){
+        query(batch);
+    }
+
+    public double[] revert(double[] x){
+        int m = x.length / 2;
+        double[] y = new double[m];
+        for(int i = 0; i < m ; ++i){
+            y[i] = x[i] * (maxValues[i] - minValues[i]) + minValues[i];
+        }
+        return y;
+    }
+
+    public double[] normalize(double[] x){
+        double[] y = new double[x.length * 2];
+        for(int i = 0; i < x.length; ++i){
+            y[i] = (x[i] - minValues[i]) / (maxValues[i] - minValues[i]);
+        }
+        for(int i=x.length; i < x.length * 2; ++i){
+            y[i] = 1 - y[i-x.length];
+        }
+        return y;
+    }
+
+    protected void query(List<double[]> batch){
+        int dimension = batch.get(0).length;
+        int m = batch.size();
+
+        //normalization
+        minValues = new double[dimension];
+        maxValues = new double[dimension];
+        for (int i = 0; i < dimension; ++i) {
+            minValues[i] = Double.MAX_VALUE;
+            maxValues[i] = Double.MIN_VALUE;
+        }
+
+        for (int i = 0; i < m; ++i) {
+            double[] x = batch.get(i);
+            for (int j = 0; j < dimension; ++j) {
+                maxValues[j] = Math.max(x[j], maxValues[j]);
+                minValues[j] = Math.min(x[j], minValues[j]);
+            }
+        }
+    }
+
+    protected void query(DataFrame batch) {
+
+        int dimension = batch.row(0).toArray().length;
+        int m = batch.rowCount();
+
+        //normalization
+        minValues = new double[dimension];
+        maxValues = new double[dimension];
+        for (int i = 0; i < dimension; ++i) {
+            minValues[i] = Double.MAX_VALUE;
+            maxValues[i] = Double.MIN_VALUE;
+        }
+
+        for (int i = 0; i < m; ++i) {
+            DataRow tuple = batch.row(i);
+            double[] x = tuple.toArray();
+            for (int j = 0; j < dimension; ++j) {
+                maxValues[j] = Math.max(x[j], maxValues[j]);
+                minValues[j] = Math.min(x[j], minValues[j]);
+            }
+        }
+    }
+}

+ 147 - 0
gtbook/src/main/java/opennlp/tools/svm/data/utils/transforms/Standardization.java

@@ -0,0 +1,147 @@
+package opennlp.tools.svm.data.utils.transforms;
+
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+
+import java.util.List;
+
+
+/**
+ * Created by xschen on 21/8/15.
+ */
+public class Standardization implements Cloneable {
+    private double[] mu;
+    private double[] std;
+
+    public void copy(Standardization rhs){
+        mu = rhs.mu.clone();
+        std = rhs.std.clone();
+    }
+
+    @Override
+    public Object clone() throws CloneNotSupportedException {
+        Standardization clone = (Standardization)super.clone();
+        clone.copy(this);
+
+        return clone;
+    }
+
+    public Standardization(){
+
+    }
+
+    public Standardization(DataFrame batch) {
+        query(batch);
+    }
+
+    public Standardization(List<double[]> batch){
+        query(batch);
+    }
+
+    public void fit(List<double[]> batch) {
+        query(batch);
+    }
+
+    public void fit(DataFrame batch) {
+        query(batch);
+    }
+
+
+    public double[] revert(double[] x){
+        double[] y = new double[x.length];
+        for(int i = 0; i < x.length; ++i){
+            y[i] = x[i] * std[i] + mu[i];
+        }
+        return y;
+    }
+
+    public double[] standardize(double[] x){
+        double[] y = new double[x.length];
+        for(int i = 0; i < x.length; ++i){
+            y[i] = (x[i] - mu[i]) / std[i];
+        }
+        return y;
+    }
+
+    protected void query(List<double[]> batch){
+        int dimension = batch.get(0).length;
+        int m = batch.size();
+
+        //normalization
+        mu = new double[dimension];
+        std = new double[dimension];
+        for (int i = 0; i < dimension; ++i) {
+            mu[i] = 0;
+            std[i] = 0;
+        }
+
+        for (int i = 0; i < m; ++i) {
+            double[] x = batch.get(i);
+            for (int j = 0; j < dimension; ++j) {
+                mu[j] += x[j];
+            }
+        }
+
+        for (int i = 0; i < dimension; ++i) {
+            mu[i] /= m;
+        }
+
+        for (int i = 0; i < m; ++i) {
+            double[] x = batch.get(i);
+            for (int j = 0; j < dimension; ++j) {
+                std[j] += Math.pow(x[j] - mu[j], 2);
+            }
+        }
+
+        for (int i = 0; i < dimension; ++i) {
+            std[i] /= (m - 1);
+        }
+
+        for (int i = 0; i < dimension; ++i) {
+            std[i] = Math.sqrt(std[i]);
+        }
+    }
+
+    protected void query(DataFrame batch) {
+
+        int dimension = batch.row(0).toArray().length;
+        int m = batch.rowCount();
+
+        //normalization
+        mu = new double[dimension];
+        std = new double[dimension];
+        for (int i = 0; i < dimension; ++i) {
+            mu[i] = 0;
+            std[i] = 0;
+        }
+
+        for (int i = 0; i < m; ++i) {
+            DataRow tuple = batch.row(i);
+            double[] x = tuple.toArray();
+            for (int j = 0; j < dimension; ++j) {
+                mu[j] += x[j];
+            }
+        }
+
+        for (int i = 0; i < dimension; ++i) {
+            mu[i] /= m;
+        }
+
+        for (int i = 0; i < m; ++i) {
+            DataRow tuple = batch.row(i);
+            double[] x = tuple.toArray();
+            for (int j = 0; j < dimension; ++j) {
+                std[j] += Math.pow(x[j] - mu[j], 2);
+            }
+        }
+
+        for (int i = 0; i < dimension; ++i) {
+            std[i] /= (m - 1);
+        }
+
+        for (int i = 0; i < dimension; ++i) {
+            std[i] = Math.sqrt(std[i]);
+        }
+
+    }
+}

+ 2878 - 0
gtbook/src/main/java/opennlp/tools/svm/libsvm/SupportVectorMachine.java

@@ -0,0 +1,2878 @@
+
+
+
+
+
+package opennlp.tools.svm.libsvm;
+
+import java.io.*;
+import java.util.Random;
+import java.util.StringTokenizer;
+
+
+//
+// Kernel Cache
+//
+// l is the number of total data items
+// size is the cache size limit in bytes
+//
+class Cache {
+	private final int l;
+	private long size;
+	private static final class head_t
+	{
+		head_t prev, next;	// a cicular list
+		float[] data;
+		int len;		// data[0,len) is cached in this entry
+	}
+	private final head_t[] head;
+	private head_t lru_head;
+
+	Cache(int l_, long size_)
+	{
+		l = l_;
+		size = size_;
+		head = new head_t[l];
+		for(int i=0;i<l;i++) head[i] = new head_t();
+		size /= 4;
+		size -= times4(l);
+		size = Math.max(size, 2* (long) l);  // cache must be large enough for two columns
+		lru_head = new head_t();
+		lru_head.next = lru_head.prev = lru_head;
+	}
+
+	private long times4(long val) {
+		return val  * 4;
+	}
+
+
+	private void lru_delete(head_t h)
+	{
+		// delete from current location
+		h.prev.next = h.next;
+		h.next.prev = h.prev;
+	}
+
+	private void lru_insert(head_t h)
+	{
+		// insert to last position
+		h.next = lru_head;
+		h.prev = lru_head.prev;
+		h.prev.next = h;
+		h.next.prev = h;
+	}
+
+	// request data [0,len)
+	// return some position p where [p,len) need to be filled
+	// (p >= len if nothing needs to be filled)
+	// java: simulate pointer using single-element array
+	int get_data(int index, float[][] data, int len)
+	{
+		head_t h = head[index];
+		if(h.len > 0) lru_delete(h);
+		int more = len - h.len;
+
+		if(more > 0)
+		{
+			// free old space
+			while(size < more)
+			{
+				head_t old = lru_head.next;
+				lru_delete(old);
+				size += old.len;
+				old.data = null;
+				old.len = 0;
+			}
+
+			// allocate new space
+			float[] new_data = new float[len];
+			if(h.data != null) System.arraycopy(h.data,0,new_data,0,h.len);
+			h.data = new_data;
+			size -= more;
+			do {int tmp = h.len; h.len=len; len = tmp;} while(false);
+		}
+
+		lru_insert(h);
+		data[0] = h.data;
+		return len;
+	}
+
+	void swap_index(int i, int j)
+	{
+		if(i==j) return;
+		
+		if(head[i].len > 0) lru_delete(head[i]);
+		if(head[j].len > 0) lru_delete(head[j]);
+		do {float[] tmp = head[i].data; head[i].data=head[j].data; head[j].data = tmp;} while(false);
+		do {int tmp = head[i].len; head[i].len=head[j].len; head[j].len = tmp;} while(false);
+		if(head[i].len > 0) lru_insert(head[i]);
+		if(head[j].len > 0) lru_insert(head[j]);
+
+		if(i>j) do {int tmp = i; i=j; j = tmp;} while(false);
+		for(head_t h = lru_head.next; h!=lru_head; h=h.next)
+		{
+			if(h.len > i)
+			{
+				if(h.len > j)
+					do {float tmp = h.data[i]; h.data[i]=h.data[j]; h.data[j] = tmp;} while(false);
+				else
+				{
+					// give up
+					lru_delete(h);
+					size += h.len;
+					h.data = null;
+					h.len = 0;
+				}
+			}
+		}
+	}
+}
+
+
+//
+// Kernel evaluation
+//
+// the static method k_function is for doing single kernel evaluation
+// the constructor of Kernel prepares to calculate the l*l kernel matrix
+// the member function get_Q is for getting one column from the Q Matrix
+//
+abstract class QMatrix {
+	abstract float[] get_Q(int column, int len);
+	abstract double[] get_QD();
+	abstract void swap_index(int i, int j);
+}
+
+
+abstract class Kernel extends QMatrix {
+	private SupportVectorMachineNode[][] x;
+	private final double[] x_square;
+
+	// svm_parameter
+	private final int kernel_type;
+	private final int degree;
+	private final double gamma;
+	private final double coef0;
+
+	abstract float[] get_Q(int column, int len);
+	abstract double[] get_QD();
+
+	void swap_index(int i, int j)
+	{
+		do {SupportVectorMachineNode[] tmp = x[i]; x[i]=x[j]; x[j] = tmp;} while(false);
+		if(x_square != null) do {double tmp = x_square[i]; x_square[i]=x_square[j]; x_square[j] = tmp;} while(false);
+	}
+
+	private static double powi(double base, int times)
+	{
+		double tmp = base, ret = 1.0;
+
+		for(int t = times; t>0; t/=2)
+		{
+			if( isOdd(t) ) ret *= tmp;
+			tmp = tmp * tmp;
+		}
+		return ret;
+	}
+
+	private static boolean isOdd(int t) {
+		return  Math.abs(t) % 2 == 1;
+	}
+
+	double kernel_function(int i, int j)
+	{
+		switch(kernel_type)
+		{
+			case svm_parameter.LINEAR:
+				return dot(x[i],x[j]);
+			case svm_parameter.POLY:
+				return powi(gamma*dot(x[i],x[j])+coef0,degree);
+			case svm_parameter.RBF:
+				return Math.exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
+			case svm_parameter.SIGMOID:
+				return Math.tanh(gamma*dot(x[i],x[j])+coef0);
+			case svm_parameter.PRECOMPUTED:
+				return x[i][(int)(x[j][0].value)].value;
+			default:
+				return 0;	// java
+		}
+	}
+
+	Kernel(int l, SupportVectorMachineNode[][] x_, svm_parameter param)
+	{
+		this.kernel_type = param.kernel_type;
+		this.degree = param.degree;
+		this.gamma = param.gamma;
+		this.coef0 = param.coef0;
+
+		x = x_.clone();
+
+		if(kernel_type == svm_parameter.RBF)
+		{
+			x_square = new double[l];
+			for(int i=0;i<l;i++)
+				x_square[i] = dot(x[i],x[i]);
+		}
+		else x_square = null;
+	}
+
+	static double dot(SupportVectorMachineNode[] x, SupportVectorMachineNode[] y)
+	{
+		double sum = 0;
+		int xlen = x.length;
+		int ylen = y.length;
+		int i = 0;
+		int j = 0;
+		while(i < xlen && j < ylen)
+		{
+			if(x[i].index == y[j].index)
+				sum += x[i++].value * y[j++].value;
+			else
+			{
+				if(x[i].index > y[j].index)
+					++j;
+				else
+					++i;
+			}
+		}
+		return sum;
+	}
+
+	static double k_function(SupportVectorMachineNode[] x, SupportVectorMachineNode[] y,
+					svm_parameter param)
+	{
+		switch(param.kernel_type)
+		{
+			case svm_parameter.LINEAR:
+				return dot(x,y);
+			case svm_parameter.POLY:
+				return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
+			case svm_parameter.RBF:
+			{
+				double sum = 0;
+				int xlen = x.length;
+				int ylen = y.length;
+				int i = 0;
+				int j = 0;
+				while(i < xlen && j < ylen)
+				{
+					if(x[i].index == y[j].index)
+					{
+						double d = x[i++].value - y[j++].value;
+						sum += d*d;
+					}
+					else if(x[i].index > y[j].index)
+					{
+						sum += y[j].value * y[j].value;
+						++j;
+					}
+					else
+					{
+						sum += x[i].value * x[i].value;
+						++i;
+					}
+				}
+
+				while(i < xlen)
+				{
+					sum += x[i].value * x[i].value;
+					++i;
+				}
+
+				while(j < ylen)
+				{
+					sum += y[j].value * y[j].value;
+					++j;
+				}
+
+				return Math.exp(-param.gamma*sum);
+			}
+			case svm_parameter.SIGMOID:
+				return Math.tanh(param.gamma*dot(x,y)+param.coef0);
+			case svm_parameter.PRECOMPUTED:
+				return	x[(int)(y[0].value)].value;
+			default:
+				return 0;	// java
+		}
+	}
+}
+
+
+// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
+// Solves:
+//
+//	min 0.5(\alpha^T Q \alpha) + p^T \alpha
+//
+//		y^T \alpha = \delta
+//		y_i = +1 or -1
+//		0 <= alpha_i <= Cp for y_i = 1
+//		0 <= alpha_i <= Cn for y_i = -1
+//
+// Given:
+//
+//	Q, p, y, Cp, Cn, and an initial feasible point \alpha
+//	l is the size of vectors and matrices
+//	eps is the stopping tolerance
+//
+// solution will be put in \alpha, objective value will be put in obj
+//
+class Solver {
+	int active_size;
+	byte[] y;
+	double[] G;		// gradient of objective function
+	static final byte LOWER_BOUND = 0;
+	static final byte UPPER_BOUND = 1;
+	static final byte FREE = 2;
+	byte[] alpha_status;	// LOWER_BOUND, UPPER_BOUND, FREE
+	double[] alpha;
+	QMatrix Q;
+	double[] QD;
+	double eps;
+	double Cp,Cn;
+	double[] p;
+	int[] active_set;
+	double[] G_bar;		// gradient, if we treat free variables as 0
+	int l;
+	boolean unshrink;	// XXX
+	
+	static final double INF = Double.POSITIVE_INFINITY;
+
+	double get_C(int i)
+	{
+		return (y[i] > 0)? Cp : Cn;
+	}
+	void update_alpha_status(int i)
+	{
+		if(alpha[i] >= get_C(i))
+			alpha_status[i] = UPPER_BOUND;
+		else if(alpha[i] <= 0)
+			alpha_status[i] = LOWER_BOUND;
+		else alpha_status[i] = FREE;
+	}
+	boolean is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
+	boolean is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
+	boolean is_free(int i) {  return alpha_status[i] == FREE; }
+
+	// java: information about solution except alpha,
+	// because we cannot return multiple values otherwise...
+	static class SolutionInfo {
+		double obj;
+		double rho;
+		double upper_bound_p;
+		double upper_bound_n;
+		double r;	// for Solver_NU
+	}
+
+	void swap_index(int i, int j)
+	{
+		Q.swap_index(i,j);
+		do {byte tmp = y[i]; y[i]=y[j]; y[j] = tmp;} while(false);
+		do {double tmp = G[i]; G[i]=G[j]; G[j] = tmp;} while(false);
+		do {byte tmp = alpha_status[i]; alpha_status[i]=alpha_status[j]; alpha_status[j] = tmp;} while(false);
+		do {double tmp = alpha[i]; alpha[i]=alpha[j]; alpha[j] = tmp;} while(false);
+		do {double tmp = p[i]; p[i]=p[j]; p[j] = tmp;} while(false);
+		do {int tmp = active_set[i]; active_set[i]=active_set[j]; active_set[j] = tmp;} while(false);
+		do {double tmp = G_bar[i]; G_bar[i]=G_bar[j]; G_bar[j] = tmp;} while(false);
+	}
+
+	void reconstruct_gradient()
+	{
+		// reconstruct inactive elements of G from G_bar and free variables
+
+		if(active_size == l) return;
+
+		int i,j;
+		int nr_free = 0;
+
+		for(j=active_size;j<l;j++)
+			G[j] = G_bar[j] + p[j];
+
+		for(j=0;j<active_size;j++)
+			if(is_free(j))
+				nr_free++;
+
+		if(2*nr_free < active_size)
+			SupportVectorMachine.info("\nWARNING: using -h 0 may be faster\n");
+
+		if (nr_free*l > 2*active_size*(l-active_size))
+		{
+			for(i=active_size;i<l;i++)
+			{
+				float[] Q_i = Q.get_Q(i,active_size);
+				for(j=0;j<active_size;j++)
+					if(is_free(j))
+						G[i] += alpha[j] * Q_i[j];
+			}	
+		}
+		else
+		{
+			for(i=0;i<active_size;i++)
+				if(is_free(i))
+				{
+					float[] Q_i = Q.get_Q(i,l);
+					double alpha_i = alpha[i];
+					for(j=active_size;j<l;j++)
+						G[j] += alpha_i * Q_i[j];
+				}
+		}
+	}
+
+	void Solve(int l, QMatrix Q, double[] p_, byte[] y_,
+		   double[] alpha_, double Cp, double Cn, double eps, SolutionInfo si, int shrinking)
+	{
+		this.l = l;
+		this.Q = Q;
+		QD = Q.get_QD();
+		p = p_.clone();
+		y = y_.clone();
+		alpha = alpha_.clone();
+		this.Cp = Cp;
+		this.Cn = Cn;
+		this.eps = eps;
+		this.unshrink = false;
+
+		// initialize alpha_status
+		{
+			alpha_status = new byte[l];
+			for(int i=0;i<l;i++)
+				update_alpha_status(i);
+		}
+
+		// initialize active set (for shrinking)
+		{
+			active_set = new int[l];
+			for(int i=0;i<l;i++)
+				active_set[i] = i;
+			active_size = l;
+		}
+
+		// initialize gradient
+		{
+			G = new double[l];
+			G_bar = new double[l];
+			int i;
+			for(i=0;i<l;i++)
+			{
+				G[i] = p[i];
+				G_bar[i] = 0;
+			}
+			for(i=0;i<l;i++)
+				if(!is_lower_bound(i))
+				{
+					float[] Q_i = Q.get_Q(i,l);
+					double alpha_i = alpha[i];
+					int j;
+					for(j=0;j<l;j++)
+						G[j] += alpha_i*Q_i[j];
+					if(is_upper_bound(i))
+						for(j=0;j<l;j++)
+							G_bar[j] += get_C(i) * Q_i[j];
+				}
+		}
+
+		// optimization step
+
+		int iter = 0;
+		int max_iter = Math.max(10000000, l>Integer.MAX_VALUE/100 ? Integer.MAX_VALUE : 100*l);
+		int counter = Math.min(l,1000)+1;
+		int[] working_set = new int[2];
+
+		while(iter < max_iter)
+		{
+			// show progress and do shrinking
+
+			if(--counter == 0)
+			{
+				counter = Math.min(l,1000);
+				if(shrinking!=0) do_shrinking();
+				SupportVectorMachine.info(".");
+			}
+
+			if(select_working_set(working_set)!=0)
+			{
+				// reconstruct the whole gradient
+				reconstruct_gradient();
+				// reset active set size and check
+				active_size = l;
+				SupportVectorMachine.info("*");
+				if(select_working_set(working_set)!=0)
+					break;
+				else
+					counter = 1;	// do shrinking next iteration
+			}
+			
+			int i = working_set[0];
+			int j = working_set[1];
+
+			++iter;
+
+			// update alpha[i] and alpha[j], handle bounds carefully
+
+			float[] Q_i = Q.get_Q(i,active_size);
+			float[] Q_j = Q.get_Q(j,active_size);
+
+			double C_i = get_C(i);
+			double C_j = get_C(j);
+
+			double old_alpha_i = alpha[i];
+			double old_alpha_j = alpha[j];
+
+			if(y[i]!=y[j])
+			{
+				double quad_coef = QD[i]+QD[j]+2*Q_i[j];
+				if (quad_coef <= 0)
+					quad_coef = 1e-12;
+				double delta = (-G[i]-G[j])/quad_coef;
+				double diff = alpha[i] - alpha[j];
+				alpha[i] += delta;
+				alpha[j] += delta;
+			
+				if(diff > 0)
+				{
+					if(alpha[j] < 0)
+					{
+						alpha[j] = 0;
+						alpha[i] = diff;
+					}
+				}
+				else
+				{
+					if(alpha[i] < 0)
+					{
+						alpha[i] = 0;
+						alpha[j] = -diff;
+					}
+				}
+				if(diff > C_i - C_j)
+				{
+					if(alpha[i] > C_i)
+					{
+						alpha[i] = C_i;
+						alpha[j] = C_i - diff;
+					}
+				}
+				else
+				{
+					if(alpha[j] > C_j)
+					{
+						alpha[j] = C_j;
+						alpha[i] = C_j + diff;
+					}
+				}
+			}
+			else
+			{
+				double quad_coef = QD[i]+QD[j]-2*Q_i[j];
+				if (quad_coef <= 0)
+					quad_coef = 1e-12;
+				double delta = (G[i]-G[j])/quad_coef;
+				double sum = alpha[i] + alpha[j];
+				alpha[i] -= delta;
+				alpha[j] += delta;
+
+				if(sum > C_i)
+				{
+					if(alpha[i] > C_i)
+					{
+						alpha[i] = C_i;
+						alpha[j] = sum - C_i;
+					}
+				}
+				else
+				{
+					if(alpha[j] < 0)
+					{
+						alpha[j] = 0;
+						alpha[i] = sum;
+					}
+				}
+				if(sum > C_j)
+				{
+					if(alpha[j] > C_j)
+					{
+						alpha[j] = C_j;
+						alpha[i] = sum - C_j;
+					}
+				}
+				else
+				{
+					if(alpha[i] < 0)
+					{
+						alpha[i] = 0;
+						alpha[j] = sum;
+					}
+				}
+			}
+
+			// update G
+
+			double delta_alpha_i = alpha[i] - old_alpha_i;
+			double delta_alpha_j = alpha[j] - old_alpha_j;
+
+			for(int k=0;k<active_size;k++)
+			{
+				G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
+			}
+
+			// update alpha_status and G_bar
+
+			{
+				boolean ui = is_upper_bound(i);
+				boolean uj = is_upper_bound(j);
+				update_alpha_status(i);
+				update_alpha_status(j);
+				int k;
+				if(ui != is_upper_bound(i))
+				{
+					Q_i = Q.get_Q(i,l);
+					if(ui)
+						for(k=0;k<l;k++)
+							G_bar[k] -= C_i * Q_i[k];
+					else
+						for(k=0;k<l;k++)
+							G_bar[k] += C_i * Q_i[k];
+				}
+
+				if(uj != is_upper_bound(j))
+				{
+					Q_j = Q.get_Q(j,l);
+					if(uj)
+						for(k=0;k<l;k++)
+							G_bar[k] -= C_j * Q_j[k];
+					else
+						for(k=0;k<l;k++)
+							G_bar[k] += C_j * Q_j[k];
+				}
+			}
+
+		}
+		
+		if(iter >= max_iter)
+		{
+			if(active_size < l)
+			{
+				// reconstruct the whole gradient to calculate objective value
+				reconstruct_gradient();
+				active_size = l;
+				SupportVectorMachine.info("*");
+			}
+			System.err.print("\nWARNING: reaching max number of iterations\n");
+		}
+
+		// calculate rho
+
+		si.rho = calculate_rho();
+
+		// calculate objective value
+		{
+			double v = 0;
+			int i;
+			for(i=0;i<l;i++)
+				v += alpha[i] * (G[i] + p[i]);
+
+			si.obj = v/2;
+		}
+
+		// put back the solution
+		{
+			for(int i=0;i<l;i++)
+				alpha_[active_set[i]] = alpha[i];
+		}
+
+		si.upper_bound_p = Cp;
+		si.upper_bound_n = Cn;
+
+		SupportVectorMachine.info("\noptimization finished, #iter = "+iter+"\n");
+	}
+
+	// return 1 if already optimal, return 0 otherwise
+	int select_working_set(int[] working_set)
+	{
+		// return i,j such that
+		// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
+		// j: mimimizes the decrease of obj value
+		//    (if quadratic coefficeint <= 0, replace it with tau)
+		//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
+		
+		double Gmax = -INF;
+		double Gmax2 = -INF;
+		int Gmax_idx = -1;
+		int Gmin_idx = -1;
+		double obj_diff_min = INF;
+	
+		for(int t=0;t<active_size;t++)
+			if(y[t]==+1)	
+			{
+				if(!is_upper_bound(t))
+					if(-G[t] >= Gmax)
+					{
+						Gmax = -G[t];
+						Gmax_idx = t;
+					}
+			}
+			else
+			{
+				if(!is_lower_bound(t))
+					if(G[t] >= Gmax)
+					{
+						Gmax = G[t];
+						Gmax_idx = t;
+					}
+			}
+	
+		int i = Gmax_idx;
+		float[] Q_i = null;
+		if(i != -1) // null Q_i not accessed: Gmax=-INF if i=-1
+			Q_i = Q.get_Q(i,active_size);
+	
+		for(int j=0;j<active_size;j++)
+		{
+			if(y[j]==+1)
+			{
+				if (!is_lower_bound(j))
+				{
+					double grad_diff=Gmax+G[j];
+					if (G[j] >= Gmax2)
+						Gmax2 = G[j];
+					if (grad_diff > 0)
+					{
+						double obj_diff; 
+						double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
+						if (quad_coef > 0)
+							obj_diff = -(grad_diff*grad_diff)/quad_coef;
+						else
+							obj_diff = -(grad_diff*grad_diff)/1e-12;
+	
+						if (obj_diff <= obj_diff_min)
+						{
+							Gmin_idx=j;
+							obj_diff_min = obj_diff;
+						}
+					}
+				}
+			}
+			else
+			{
+				if (!is_upper_bound(j))
+				{
+					double grad_diff= Gmax-G[j];
+					if (-G[j] >= Gmax2)
+						Gmax2 = -G[j];
+					if (grad_diff > 0)
+					{
+						double obj_diff; 
+						double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
+						if (quad_coef > 0)
+							obj_diff = -(grad_diff*grad_diff)/quad_coef;
+						else
+							obj_diff = -(grad_diff*grad_diff)/1e-12;
+	
+						if (obj_diff <= obj_diff_min)
+						{
+							Gmin_idx=j;
+							obj_diff_min = obj_diff;
+						}
+					}
+				}
+			}
+		}
+
+		if(Gmax+Gmax2 < eps)
+			return 1;
+
+		working_set[0] = Gmax_idx;
+		working_set[1] = Gmin_idx;
+		return 0;
+	}
+
+	private boolean be_shrunk(int i, double Gmax1, double Gmax2)
+	{	
+		if(is_upper_bound(i))
+		{
+			if(y[i]==+1)
+				return(-G[i] > Gmax1);
+			else
+				return(-G[i] > Gmax2);
+		}
+		else if(is_lower_bound(i))
+		{
+			if(y[i]==+1)
+				return(G[i] > Gmax2);
+			else	
+				return(G[i] > Gmax1);
+		}
+		else
+			return(false);
+	}
+
+	void do_shrinking()
+	{
+		int i;
+		double Gmax1 = -INF;		// max { -y_i * grad(f)_i | i in I_up(\alpha) }
+		double Gmax2 = -INF;		// max { y_i * grad(f)_i | i in I_low(\alpha) }
+
+		// findNode maximal violating pair first
+		for(i=0;i<active_size;i++)
+		{
+			if(y[i]==+1)
+			{
+				if(!is_upper_bound(i))	
+				{
+					if(-G[i] >= Gmax1)
+						Gmax1 = -G[i];
+				}
+				if(!is_lower_bound(i))
+				{
+					if(G[i] >= Gmax2)
+						Gmax2 = G[i];
+				}
+			}
+			else		
+			{
+				if(!is_upper_bound(i))	
+				{
+					if(-G[i] >= Gmax2)
+						Gmax2 = -G[i];
+				}
+				if(!is_lower_bound(i))	
+				{
+					if(G[i] >= Gmax1)
+						Gmax1 = G[i];
+				}
+			}
+		}
+
+		if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
+		{
+			unshrink = true;
+			reconstruct_gradient();
+			active_size = l;
+		}
+
+		for(i=0;i<active_size;i++)
+			if (be_shrunk(i, Gmax1, Gmax2))
+			{
+				active_size--;
+				while (active_size > i)
+				{
+					if (!be_shrunk(active_size, Gmax1, Gmax2))
+					{
+						swap_index(i,active_size);
+						break;
+					}
+					active_size--;
+				}
+			}
+	}
+
+	double calculate_rho()
+	{
+		double r;
+		int nr_free = 0;
+		double ub = INF, lb = -INF, sum_free = 0;
+		for(int i=0;i<active_size;i++)
+		{
+			double yG = y[i]*G[i];
+
+			if(is_lower_bound(i))
+			{
+				if(y[i] > 0)
+					ub = Math.min(ub,yG);
+				else
+					lb = Math.max(lb,yG);
+			}
+			else if(is_upper_bound(i))
+			{
+				if(y[i] < 0)
+					ub = Math.min(ub,yG);
+				else
+					lb = Math.max(lb,yG);
+			}
+			else
+			{
+				++nr_free;
+				sum_free += yG;
+			}
+		}
+
+		if(nr_free>0)
+			r = sum_free/nr_free;
+		else
+			r = (ub+lb)/2;
+
+		return r;
+	}
+
+}
+
+
+//
+// Solver for nu-svm classification and regression
+//
+// additional constraint: e^T \alpha = constant
+//
+final class Solver_NU extends Solver
+{
+	private SolutionInfo si;
+
+	void Solve(int l, QMatrix Q, double[] p, byte[] y,
+		   double[] alpha, double Cp, double Cn, double eps,
+		   SolutionInfo si, int shrinking)
+	{
+		this.si = si;
+		super.Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
+	}
+
+	// return 1 if already optimal, return 0 otherwise
+	int select_working_set(int[] working_set)
+	{
+		// return i,j such that y_i = y_j and
+		// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
+		// j: minimizes the decrease of obj value
+		//    (if quadratic coefficeint <= 0, replace it with tau)
+		//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
+	
+		double Gmaxp = -INF;
+		double Gmaxp2 = -INF;
+		int Gmaxp_idx = -1;
+	
+		double Gmaxn = -INF;
+		double Gmaxn2 = -INF;
+		int Gmaxn_idx = -1;
+	
+		int Gmin_idx = -1;
+		double obj_diff_min = INF;
+	
+		for(int t=0;t<active_size;t++)
+			if(y[t]==+1)
+			{
+				if(!is_upper_bound(t))
+					if(-G[t] >= Gmaxp)
+					{
+						Gmaxp = -G[t];
+						Gmaxp_idx = t;
+					}
+			}
+			else
+			{
+				if(!is_lower_bound(t))
+					if(G[t] >= Gmaxn)
+					{
+						Gmaxn = G[t];
+						Gmaxn_idx = t;
+					}
+			}
+	
+		int ip = Gmaxp_idx;
+		int in = Gmaxn_idx;
+		float[] Q_ip = null;
+		float[] Q_in = null;
+		if(ip != -1) // null Q_ip not accessed: Gmaxp=-INF if ip=-1
+			Q_ip = Q.get_Q(ip,active_size);
+		if(in != -1)
+			Q_in = Q.get_Q(in,active_size);
+	
+		for(int j=0;j<active_size;j++)
+		{
+			if(y[j]==+1)
+			{
+				if (!is_lower_bound(j))	
+				{
+					double grad_diff=Gmaxp+G[j];
+					if (G[j] >= Gmaxp2)
+						Gmaxp2 = G[j];
+					if (grad_diff > 0)
+					{
+						double obj_diff; 
+						double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
+						if (quad_coef > 0)
+							obj_diff = -(grad_diff*grad_diff)/quad_coef;
+						else
+							obj_diff = -(grad_diff*grad_diff)/1e-12;
+	
+						if (obj_diff <= obj_diff_min)
+						{
+							Gmin_idx=j;
+							obj_diff_min = obj_diff;
+						}
+					}
+				}
+			}
+			else
+			{
+				if (!is_upper_bound(j))
+				{
+					double grad_diff=Gmaxn-G[j];
+					if (-G[j] >= Gmaxn2)
+						Gmaxn2 = -G[j];
+					if (grad_diff > 0)
+					{
+						double obj_diff; 
+						double quad_coef = QD[in]+QD[j]-2*Q_in[j];
+						if (quad_coef > 0)
+							obj_diff = -(grad_diff*grad_diff)/quad_coef;
+						else
+							obj_diff = -(grad_diff*grad_diff)/1e-12;
+	
+						if (obj_diff <= obj_diff_min)
+						{
+							Gmin_idx=j;
+							obj_diff_min = obj_diff;
+						}
+					}
+				}
+			}
+		}
+
+		if(Math.max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
+			return 1;
+	
+		if(y[Gmin_idx] == +1)
+			working_set[0] = Gmaxp_idx;
+		else
+			working_set[0] = Gmaxn_idx;
+		working_set[1] = Gmin_idx;
+	
+		return 0;
+	}
+
+	private boolean be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
+	{
+		if(is_upper_bound(i))
+		{
+			if(y[i]==+1)
+				return(-G[i] > Gmax1);
+			else	
+				return(-G[i] > Gmax4);
+		}
+		else if(is_lower_bound(i))
+		{
+			if(y[i]==+1)
+				return(G[i] > Gmax2);
+			else	
+				return(G[i] > Gmax3);
+		}
+		else
+			return(false);
+	}
+
+	void do_shrinking()
+	{
+		double Gmax1 = -INF;	// max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
+		double Gmax2 = -INF;	// max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
+		double Gmax3 = -INF;	// max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
+		double Gmax4 = -INF;	// max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
+ 
+		// findNode maximal violating pair first
+		int i;
+		for(i=0;i<active_size;i++)
+		{
+			if(!is_upper_bound(i))
+			{
+				if(y[i]==+1)
+				{
+					if(-G[i] > Gmax1) Gmax1 = -G[i];
+				}
+				else	if(-G[i] > Gmax4) Gmax4 = -G[i];
+			}
+			if(!is_lower_bound(i))
+			{
+				if(y[i]==+1)
+				{	
+					if(G[i] > Gmax2) Gmax2 = G[i];
+				}
+				else	if(G[i] > Gmax3) Gmax3 = G[i];
+			}
+		}
+
+		if(unshrink == false && Math.max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
+		{
+			unshrink = true;
+			reconstruct_gradient();
+			active_size = l;
+		}
+
+		for(i=0;i<active_size;i++)
+			if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
+			{
+				active_size--;
+				while (active_size > i)
+				{
+					if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
+					{
+						swap_index(i,active_size);
+						break;
+					}
+					active_size--;
+				}
+			}
+	}
+	
+	double calculate_rho()
+	{
+		int nr_free1 = 0,nr_free2 = 0;
+		double ub1 = INF, ub2 = INF;
+		double lb1 = -INF, lb2 = -INF;
+		double sum_free1 = 0, sum_free2 = 0;
+
+		for(int i=0;i<active_size;i++)
+		{
+			if(y[i]==+1)
+			{
+				if(is_lower_bound(i))
+					ub1 = Math.min(ub1,G[i]);
+				else if(is_upper_bound(i))
+					lb1 = Math.max(lb1,G[i]);
+				else
+				{
+					++nr_free1;
+					sum_free1 += G[i];
+				}
+			}
+			else
+			{
+				if(is_lower_bound(i))
+					ub2 = Math.min(ub2,G[i]);
+				else if(is_upper_bound(i))
+					lb2 = Math.max(lb2,G[i]);
+				else
+				{
+					++nr_free2;
+					sum_free2 += G[i];
+				}
+			}
+		}
+
+		double r1,r2;
+		if(nr_free1 > 0)
+			r1 = sum_free1/nr_free1;
+		else
+			r1 = (ub1+lb1)/2;
+
+		if(nr_free2 > 0)
+			r2 = sum_free2/nr_free2;
+		else
+			r2 = (ub2+lb2)/2;
+
+		si.r = (r1+r2)/2;
+		return (r1-r2)/2;
+	}
+}
+
+
+//
+// Q matrices for various formulations
+//
+class SVC_Q extends Kernel
+{
+	private final byte[] y;
+	private final Cache cache;
+	private final double[] QD;
+
+	SVC_Q(svm_problem prob, svm_parameter param, byte[] y_)
+	{
+		super(prob.l, prob.x, param);
+		y = y_.clone();
+		cache = new Cache(prob.l,(long)(param.cache_size*(1<<20)));
+		QD = new double[prob.l];
+		for(int i=0;i<prob.l;i++)
+			QD[i] = kernel_function(i,i);
+	}
+
+	float[] get_Q(int i, int len)
+	{
+		float[][] data = new float[1][];
+		int start, j;
+		if((start = cache.get_data(i,data,len)) < len)
+		{
+			for(j=start;j<len;j++)
+				data[0][j] = (float)(y[i]*y[j]*kernel_function(i,j));
+		}
+		return data[0];
+	}
+
+	double[] get_QD()
+	{
+		return QD;
+	}
+
+	void swap_index(int i, int j)
+	{
+		cache.swap_index(i,j);
+		super.swap_index(i,j);
+		do {byte tmp = y[i]; y[i]=y[j]; y[j] = tmp;} while(false);
+		do {double tmp = QD[i]; QD[i]=QD[j]; QD[j] = tmp;} while(false);
+	}
+}
+
+
+class ONE_CLASS_Q extends Kernel
+{
+	private final Cache cache;
+	private final double[] QD;
+
+	ONE_CLASS_Q(svm_problem prob, svm_parameter param)
+	{
+		super(prob.l, prob.x, param);
+		cache = new Cache(prob.l,(long)(param.cache_size*(1<<20)));
+		QD = new double[prob.l];
+		for(int i=0;i<prob.l;i++)
+			QD[i] = kernel_function(i,i);
+	}
+
+	float[] get_Q(int i, int len)
+	{
+		float[][] data = new float[1][];
+		int start, j;
+		if((start = cache.get_data(i,data,len)) < len)
+		{
+			for(j=start;j<len;j++)
+				data[0][j] = (float)kernel_function(i,j);
+		}
+		return data[0];
+	}
+
+	double[] get_QD()
+	{
+		return QD;
+	}
+
+	void swap_index(int i, int j)
+	{
+		cache.swap_index(i,j);
+		super.swap_index(i,j);
+		do {double tmp = QD[i]; QD[i]=QD[j]; QD[j] = tmp;} while(false);
+	}
+}
+
+
+class SVR_Q extends Kernel
+{
+	private final int l;
+	private final Cache cache;
+	private final byte[] sign;
+	private final int[] index;
+	private int next_buffer;
+	private float[][] buffer;
+	private final double[] QD;
+
+	SVR_Q(svm_problem prob, svm_parameter param)
+	{
+		super(prob.l, prob.x, param);
+		l = prob.l;
+		cache = new Cache(l,(long)(param.cache_size*(1<<20)));
+		QD = new double[2*l];
+		sign = new byte[2*l];
+		index = new int[2*l];
+		for(int k=0;k<l;k++)
+		{
+			sign[k] = 1;
+			sign[k+l] = -1;
+			index[k] = k;
+			index[k+l] = k;
+			QD[k] = kernel_function(k,k);
+			QD[k+l] = QD[k];
+		}
+		buffer = new float[2][2*l];
+		next_buffer = 0;
+	}
+
+	void swap_index(int i, int j)
+	{
+		do {byte tmp = sign[i]; sign[i]=sign[j]; sign[j] = tmp;} while(false);
+		do {int tmp = index[i]; index[i]=index[j]; index[j] = tmp;} while(false);
+		do {double tmp = QD[i]; QD[i]=QD[j]; QD[j] = tmp;} while(false);
+	}
+
+	float[] get_Q(int i, int len)
+	{
+		float[][] data = new float[1][];
+		int j, real_i = index[i];
+		if(cache.get_data(real_i,data,l) < l)
+		{
+			for(j=0;j<l;j++)
+				data[0][j] = (float)kernel_function(real_i,j);
+		}
+
+		// reorder and copy
+		float buf[] = buffer[next_buffer];
+		next_buffer = 1 - next_buffer;
+		byte si = sign[i];
+		for(j=0;j<len;j++)
+			buf[j] = (float) si * sign[j] * data[0][index[j]];
+		return buf;
+	}
+
+	double[] get_QD()
+	{
+		return QD;
+	}
+}
+
+
+public class SupportVectorMachine {
+	//
+	// construct and solve various formulations
+	//
+	public static final int LIBSVM_VERSION=320; 
+	public static final Random rand = new Random();
+
+	private static svm_print_interface svm_print_stdout = new svm_print_interface()
+	{
+		public void print(String s)
+		{
+			System.out.print(s);
+			System.out.flush();
+		}
+	};
+
+	private static svm_print_interface svm_print_string = svm_print_stdout;
+
+	static void info(String s) 
+	{
+		svm_print_string.print(s);
+	}
+
+	private static void solve_c_svc(svm_problem prob, svm_parameter param,
+					double[] alpha, Solver.SolutionInfo si,
+					double Cp, double Cn)
+	{
+		int l = prob.l;
+		double[] minus_ones = new double[l];
+		byte[] y = new byte[l];
+
+		int i;
+
+		for(i=0;i<l;i++)
+		{
+			alpha[i] = 0;
+			minus_ones[i] = -1;
+			if(prob.y[i] > 0) y[i] = +1; else y[i] = -1;
+		}
+
+		Solver s = new Solver();
+		s.Solve(l, new SVC_Q(prob,param,y), minus_ones, y,
+			alpha, Cp, Cn, param.eps, si, param.shrinking);
+
+		double sum_alpha=0;
+		for(i=0;i<l;i++)
+			sum_alpha += alpha[i];
+
+		if (Cp==Cn)
+			SupportVectorMachine.info("nu = "+sum_alpha/(Cp*prob.l)+"\n");
+
+		for(i=0;i<l;i++)
+			alpha[i] *= y[i];
+	}
+
+	private static void solve_nu_svc(svm_problem prob, svm_parameter param,
+					double[] alpha, Solver.SolutionInfo si)
+	{
+		int i;
+		int l = prob.l;
+		double nu = param.nu;
+
+		byte[] y = new byte[l];
+
+		for(i=0;i<l;i++)
+			if(prob.y[i]>0)
+				y[i] = +1;
+			else
+				y[i] = -1;
+
+		double sum_pos = nu*l/2;
+		double sum_neg = nu*l/2;
+
+		for(i=0;i<l;i++)
+			if(y[i] == +1)
+			{
+				alpha[i] = Math.min(1.0,sum_pos);
+				sum_pos -= alpha[i];
+			}
+			else
+			{
+				alpha[i] = Math.min(1.0,sum_neg);
+				sum_neg -= alpha[i];
+			}
+
+		double[] zeros = new double[l];
+
+		for(i=0;i<l;i++)
+			zeros[i] = 0;
+
+		Solver_NU s = new Solver_NU();
+		s.Solve(l, new SVC_Q(prob,param,y), zeros, y,
+			alpha, 1.0, 1.0, param.eps, si, param.shrinking);
+		double r = si.r;
+
+		SupportVectorMachine.info("C = "+1/r+"\n");
+
+		for(i=0;i<l;i++)
+			alpha[i] *= y[i]/r;
+
+		si.rho /= r;
+		si.obj /= (r*r);
+		si.upper_bound_p = 1/r;
+		si.upper_bound_n = 1/r;
+	}
+
+	private static void solve_one_class(svm_problem prob, svm_parameter param,
+					double[] alpha, Solver.SolutionInfo si)
+	{
+		int l = prob.l;
+		double[] zeros = new double[l];
+		byte[] ones = new byte[l];
+		int i;
+
+		int n = (int)(param.nu*prob.l);	// # of alpha's at upper bound
+
+		for(i=0;i<n;i++)
+			alpha[i] = 1;
+		if(n<prob.l)
+			alpha[n] = param.nu * prob.l - n;
+		for(i=n+1;i<l;i++)
+			alpha[i] = 0;
+
+		for(i=0;i<l;i++)
+		{
+			zeros[i] = 0;
+			ones[i] = 1;
+		}
+
+		Solver s = new Solver();
+		s.Solve(l, new ONE_CLASS_Q(prob,param), zeros, ones,
+			alpha, 1.0, 1.0, param.eps, si, param.shrinking);
+	}
+
+	private static void solve_epsilon_svr(svm_problem prob, svm_parameter param,
+					double[] alpha, Solver.SolutionInfo si)
+	{
+		int l = prob.l;
+		double[] alpha2 = new double[2*l];
+		double[] linear_term = new double[2*l];
+		byte[] y = new byte[2*l];
+		int i;
+
+		for(i=0;i<l;i++)
+		{
+			alpha2[i] = 0;
+			linear_term[i] = param.p - prob.y[i];
+			y[i] = 1;
+
+			alpha2[i+l] = 0;
+			linear_term[i+l] = param.p + prob.y[i];
+			y[i+l] = -1;
+		}
+
+		Solver s = new Solver();
+		s.Solve(2*l, new SVR_Q(prob,param), linear_term, y,
+			alpha2, param.C, param.C, param.eps, si, param.shrinking);
+
+		double sum_alpha = 0;
+		for(i=0;i<l;i++)
+		{
+			alpha[i] = alpha2[i] - alpha2[i+l];
+			sum_alpha += Math.abs(alpha[i]);
+		}
+		SupportVectorMachine.info("nu = "+sum_alpha/(param.C*l)+"\n");
+	}
+
+	private static void solve_nu_svr(svm_problem prob, svm_parameter param,
+					double[] alpha, Solver.SolutionInfo si)
+	{
+		int l = prob.l;
+		double C = param.C;
+		double[] alpha2 = new double[2*l];
+		double[] linear_term = new double[2*l];
+		byte[] y = new byte[2*l];
+		int i;
+
+		double sum = C * param.nu * l / 2;
+		for(i=0;i<l;i++)
+		{
+			alpha2[i] = alpha2[i+l] = Math.min(sum,C);
+			sum -= alpha2[i];
+			
+			linear_term[i] = - prob.y[i];
+			y[i] = 1;
+
+			linear_term[i+l] = prob.y[i];
+			y[i+l] = -1;
+		}
+
+		Solver_NU s = new Solver_NU();
+		s.Solve(2*l, new SVR_Q(prob,param), linear_term, y,
+			alpha2, C, C, param.eps, si, param.shrinking);
+
+		SupportVectorMachine.info("epsilon = "+(-si.r)+"\n");
+		
+		for(i=0;i<l;i++)
+			alpha[i] = alpha2[i] - alpha2[i+l];
+	}
+
+	//
+	// decision_function
+	//
+	static class decision_function
+	{
+		double[] alpha;
+		double rho;	
+	}
+
+	static decision_function svm_train_one(
+		svm_problem prob, svm_parameter param,
+		double Cp, double Cn)
+	{
+		double[] alpha = new double[prob.l];
+		Solver.SolutionInfo si = new Solver.SolutionInfo();
+		switch(param.svm_type)
+		{
+			case svm_parameter.C_SVC:
+				solve_c_svc(prob,param,alpha,si,Cp,Cn);
+				break;
+			case svm_parameter.NU_SVC:
+				solve_nu_svc(prob,param,alpha,si);
+				break;
+			case svm_parameter.ONE_CLASS:
+				solve_one_class(prob,param,alpha,si);
+				break;
+			case svm_parameter.EPSILON_SVR:
+				solve_epsilon_svr(prob,param,alpha,si);
+				break;
+			case svm_parameter.NU_SVR:
+				solve_nu_svr(prob,param,alpha,si);
+				break;
+			default:
+				break;
+		}
+
+		SupportVectorMachine.info("obj = "+si.obj+", rho = "+si.rho+"\n");
+
+		// output SVs
+
+		int nSV = 0;
+		int nBSV = 0;
+		for(int i=0;i<prob.l;i++)
+		{
+			if(Math.abs(alpha[i]) > 0)
+			{
+				++nSV;
+				if(prob.y[i] > 0)
+				{
+					if(Math.abs(alpha[i]) >= si.upper_bound_p)
+					++nBSV;
+				}
+				else
+				{
+					if(Math.abs(alpha[i]) >= si.upper_bound_n)
+						++nBSV;
+				}
+			}
+		}
+
+		SupportVectorMachine.info("nSV = "+nSV+", nBSV = "+nBSV+"\n");
+
+		decision_function f = new decision_function();
+		f.alpha = alpha;
+		f.rho = si.rho;
+		return f;
+	}
+
+	// Platt's binary SVM Probablistic Output: an improvement from Lin et al.
+	private static void sigmoid_train(int l, double[] dec_values, double[] labels, 
+				  double[] probAB)
+	{
+		double A, B;
+		double prior1=0, prior0 = 0;
+		int i;
+
+		for (i=0;i<l;i++)
+			if (labels[i] > 0) prior1+=1;
+			else prior0+=1;
+	
+		int max_iter=100;	// Maximal number of iterations
+		double min_step=1e-10;	// Minimal step taken in line search
+		double sigma=1e-12;	// For numerically strict PD of Hessian
+		double eps=1e-5;
+		double hiTarget=(prior1+1.0)/(prior1+2.0);
+		double loTarget=1/(prior0+2.0);
+		double[] t= new double[l];
+		double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
+		double newA,newB,newf,d1,d2;
+		int iter; 
+	
+		// Initial Point and Initial Fun Value
+		A=0.0; B=Math.log((prior0+1.0)/(prior1+1.0));
+		double fval = 0.0;
+
+		for (i=0;i<l;i++)
+		{
+			if (labels[i]>0) t[i]=hiTarget;
+			else t[i]=loTarget;
+			fApB = dec_values[i]*A+B;
+			if (fApB>=0)
+				fval += t[i]*fApB + Math.log(1+Math.exp(-fApB));
+			else
+				fval += (t[i] - 1)*fApB +Math.log(1+Math.exp(fApB));
+		}
+		for (iter=0;iter<max_iter;iter++)
+		{
+			// Update Gradient and Hessian (use H' = H + sigma I)
+			h11=sigma; // numerically ensures strict PD
+			h22=sigma;
+			h21=0.0;g1=0.0;g2=0.0;
+			for (i=0;i<l;i++)
+			{
+				fApB = dec_values[i]*A+B;
+				if (fApB >= 0)
+				{
+					p=Math.exp(-fApB)/(1.0+Math.exp(-fApB));
+					q=1.0/(1.0+Math.exp(-fApB));
+				}
+				else
+				{
+					p=1.0/(1.0+Math.exp(fApB));
+					q=Math.exp(fApB)/(1.0+Math.exp(fApB));
+				}
+				d2=p*q;
+				h11+=dec_values[i]*dec_values[i]*d2;
+				h22+=d2;
+				h21+=dec_values[i]*d2;
+				d1=t[i]-p;
+				g1+=dec_values[i]*d1;
+				g2+=d1;
+			}
+
+			// Stopping Criteria
+			if (Math.abs(g1)<eps && Math.abs(g2)<eps)
+				break;
+			
+			// Finding Newton direction: -inv(H') * g
+			det=h11*h22-h21*h21;
+			dA=-(h22*g1 - h21 * g2) / det;
+			dB=-(-h21*g1+ h11 * g2) / det;
+			gd=g1*dA+g2*dB;
+
+
+			stepsize = 1;		// Line Search
+			while (stepsize >= min_step)
+			{
+				newA = A + stepsize * dA;
+				newB = B + stepsize * dB;
+
+				// New function value
+				newf = 0.0;
+				for (i=0;i<l;i++)
+				{
+					fApB = dec_values[i]*newA+newB;
+					if (fApB >= 0)
+						newf += t[i]*fApB + Math.log(1+Math.exp(-fApB));
+					else
+						newf += (t[i] - 1)*fApB +Math.log(1+Math.exp(fApB));
+				}
+				// Check sufficient decrease
+				if (newf<fval+0.0001*stepsize*gd)
+				{
+					A=newA;B=newB;fval=newf;
+					break;
+				}
+				else
+					stepsize = stepsize / 2.0;
+			}
+			
+			if (stepsize < min_step)
+			{
+				SupportVectorMachine.info("Line search fails in two-class probability estimates\n");
+				break;
+			}
+		}
+		
+		if (iter>=max_iter)
+			SupportVectorMachine.info("Reaching maximal iterations in two-class probability estimates\n");
+		probAB[0]=A;probAB[1]=B;
+	}
+
+	private static double sigmoid_predict(double decision_value, double A, double B)
+	{
+		double fApB = decision_value*A+B;
+		if (fApB >= 0)
+			return Math.exp(-fApB)/(1.0+Math.exp(-fApB));
+		else
+			return 1.0/(1+Math.exp(fApB)) ;
+	}
+
+	// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
+	private static void multiclass_probability(int k, double[][] r, double[] p)
+	{
+		int t,j;
+		int iter = 0, max_iter=Math.max(100,k);
+		double[][] Q=new double[k][k];
+		double[] Qp=new double[k];
+		double pQp, eps=0.005/k;
+	
+		for (t=0;t<k;t++)
+		{
+			p[t]=1.0/k;  // Valid if k = 1
+			Q[t][t]=0;
+			for (j=0;j<t;j++)
+			{
+				Q[t][t]+=r[j][t]*r[j][t];
+				Q[t][j]=Q[j][t];
+			}
+			for (j=t+1;j<k;j++)
+			{
+				Q[t][t]+=r[j][t]*r[j][t];
+				Q[t][j]=-r[j][t]*r[t][j];
+			}
+		}
+		for (iter=0;iter<max_iter;iter++)
+		{
+			// stopping condition, recalculate QP,pQP for numerical accuracy
+			pQp=0;
+			for (t=0;t<k;t++)
+			{
+				Qp[t]=0;
+				for (j=0;j<k;j++)
+					Qp[t]+=Q[t][j]*p[j];
+				pQp+=p[t]*Qp[t];
+			}
+			double max_error=0;
+			for (t=0;t<k;t++)
+			{
+				double error=Math.abs(Qp[t]-pQp);
+				if (error>max_error)
+					max_error=error;
+			}
+			if (max_error<eps) break;
+		
+			for (t=0;t<k;t++)
+			{
+				double diff=(-Qp[t]+pQp)/Q[t][t];
+				p[t]+=diff;
+				pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
+				for (j=0;j<k;j++)
+				{
+					Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
+					p[j]/=(1+diff);
+				}
+			}
+		}
+		if (iter>=max_iter)
+			SupportVectorMachine.info("Exceeds max_iter in multiclass_prob\n");
+	}
+
+	// Cross-validation decision values for probability estimates
+	private static void svm_binary_svc_probability(svm_problem prob, svm_parameter param, double Cp, double Cn, double[] probAB)
+	{
+		int i;
+		int nr_fold = 5;
+		int[] perm = new int[prob.l];
+		double[] dec_values = new double[prob.l];
+
+		// naive shuffle
+		for(i=0;i<prob.l;i++) perm[i]=i;
+		for(i=0;i<prob.l;i++)
+		{
+			int j = i+rand.nextInt(prob.l-i);
+			do {int tmp = perm[i]; perm[i]=perm[j]; perm[j] = tmp;} while(false);
+		}
+		for(i=0;i<nr_fold;i++)
+		{
+			int begin = i*prob.l/nr_fold;
+			int end = (i+1)*prob.l/nr_fold;
+			int j,k;
+			svm_problem subprob = new svm_problem();
+
+			subprob.l = prob.l-(end-begin);
+			subprob.x = new SupportVectorMachineNode[subprob.l][];
+			subprob.y = new double[subprob.l];
+			
+			k=0;
+			for(j=0;j<begin;j++)
+			{
+				subprob.x[k] = prob.x[perm[j]];
+				subprob.y[k] = prob.y[perm[j]];
+				++k;
+			}
+			for(j=end;j<prob.l;j++)
+			{
+				subprob.x[k] = prob.x[perm[j]];
+				subprob.y[k] = prob.y[perm[j]];
+				++k;
+			}
+			int p_count=0,n_count=0;
+			for(j=0;j<k;j++)
+				if(subprob.y[j]>0)
+					p_count++;
+				else
+					n_count++;
+			
+			if(p_count==0 && n_count==0)
+				for(j=begin;j<end;j++)
+					dec_values[perm[j]] = 0;
+			else if(p_count > 0 && n_count == 0)
+				for(j=begin;j<end;j++)
+					dec_values[perm[j]] = 1;
+			else if(p_count == 0 && n_count > 0)
+				for(j=begin;j<end;j++)
+					dec_values[perm[j]] = -1;
+			else
+			{
+				svm_parameter subparam = param.makeCopy();
+				subparam.probability=0;
+				subparam.C=1.0;
+				subparam.nr_weight=2;
+				subparam.weight_label = new int[2];
+				subparam.weight = new double[2];
+				subparam.weight_label[0]=+1;
+				subparam.weight_label[1]=-1;
+				subparam.weight[0]=Cp;
+				subparam.weight[1]=Cn;
+				svm_model submodel = svm_train(subprob,subparam);
+				for(j=begin;j<end;j++)
+				{
+					double[] dec_value=new double[1];
+					svm_predict_values(submodel,prob.x[perm[j]],dec_value);
+					dec_values[perm[j]]=dec_value[0];
+					// ensure +1 -1 order; reason not using CV subroutine
+					dec_values[perm[j]] *= submodel.label[0];
+				}		
+			}
+		}		
+		sigmoid_train(prob.l,dec_values,prob.y,probAB);
+	}
+
+	// Return parameter of a Laplace distribution 
+	private static double svm_svr_probability(svm_problem prob, svm_parameter param)
+	{
+		int i;
+		int nr_fold = 5;
+		double[] ymv = new double[prob.l];
+		double mae = 0;
+
+		svm_parameter newparam = param.makeCopy();
+		newparam.probability = 0;
+		svm_cross_validation(prob,newparam,nr_fold,ymv);
+		for(i=0;i<prob.l;i++)
+		{
+			ymv[i]=prob.y[i]-ymv[i];
+			mae += Math.abs(ymv[i]);
+		}		
+		mae /= prob.l;
+		double std=Math.sqrt(2*mae*mae);
+		int count=0;
+		mae=0;
+		for(i=0;i<prob.l;i++)
+			if (Math.abs(ymv[i]) > 5*std) 
+				count=count+1;
+			else 
+				mae+=Math.abs(ymv[i]);
+		mae /= (prob.l-count);
+		SupportVectorMachine.info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+mae+"\n");
+		return mae;
+	}
+
+	// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
+	// perm, length l, must be allocated before calling this subroutine
+	private static void svm_group_classes(svm_problem prob, int[] nr_class_ret, int[][] label_ret, int[][] start_ret, int[][] count_ret, int[] perm)
+	{
+		int l = prob.l;
+		int max_nr_class = 16;
+		int nr_class = 0;
+		int[] label = new int[max_nr_class];
+		int[] count = new int[max_nr_class];
+		int[] data_label = new int[l];
+		int i;
+
+		for(i=0;i<l;i++)
+		{
+			int this_label = (int)(prob.y[i]);
+			int j;
+			for(j=0;j<nr_class;j++)
+			{
+				if(this_label == label[j])
+				{
+					++count[j];
+					break;
+				}
+			}
+			data_label[i] = j;
+			if(j == nr_class)
+			{
+				if(nr_class == max_nr_class)
+				{
+					max_nr_class *= 2;
+					int[] new_data = new int[max_nr_class];
+					System.arraycopy(label,0,new_data,0,label.length);
+					label = new_data;
+					new_data = new int[max_nr_class];
+					System.arraycopy(count,0,new_data,0,count.length);
+					count = new_data;					
+				}
+				label[nr_class] = this_label;
+				count[nr_class] = 1;
+				++nr_class;
+			}
+		}
+
+		//
+		// Labels are ordered by their first occurrence in the train set.
+		// However, for two-class sets with -1/+1 labels and -1 appears first, 
+		// we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
+		//
+		if (nr_class == 2 && label[0] == -1 && label[1] == +1)
+		{
+			do {int tmp = label[0]; label[0]=label[1]; label[1] = tmp;} while(false);
+			do {int tmp = count[0]; count[0]=count[1]; count[1] = tmp;} while(false);
+			for(i=0;i<l;i++)
+			{
+				if(data_label[i] == 0)
+					data_label[i] = 1;
+				else
+					data_label[i] = 0;
+			}
+		}
+
+		int[] start = new int[nr_class];
+		start[0] = 0;
+		for(i=1;i<nr_class;i++)
+			start[i] = start[i-1]+count[i-1];
+		for(i=0;i<l;i++)
+		{
+			perm[start[data_label[i]]] = i;
+			++start[data_label[i]];
+		}
+		start[0] = 0;
+		for(i=1;i<nr_class;i++)
+			start[i] = start[i-1]+count[i-1];
+
+		nr_class_ret[0] = nr_class;
+		label_ret[0] = label;
+		start_ret[0] = start;
+		count_ret[0] = count;
+	}
+
+	//
+	// Interface functions
+	//
+	public static svm_model svm_train(svm_problem prob, svm_parameter param)
+	{
+		svm_model model = new svm_model();
+		model.param = param;
+
+		if(param.svm_type == svm_parameter.ONE_CLASS ||
+		   param.svm_type == svm_parameter.EPSILON_SVR ||
+		   param.svm_type == svm_parameter.NU_SVR)
+		{
+			// regression or one-class-svm
+			model.nr_class = 2;
+			model.label = null;
+			model.nSV = null;
+			model.probA = null; model.probB = null;
+			model.sv_coef = new double[1][];
+
+			if(param.probability == 1 &&
+			   (param.svm_type == svm_parameter.EPSILON_SVR ||
+			    param.svm_type == svm_parameter.NU_SVR))
+			{
+				model.probA = new double[1];
+				model.probA[0] = svm_svr_probability(prob,param);
+			}
+
+			decision_function f = svm_train_one(prob,param,0,0);
+			model.rho = new double[1];
+			model.rho[0] = f.rho;
+
+			int nSV = 0;
+			int i;
+			for(i=0;i<prob.l;i++)
+				if(Math.abs(f.alpha[i]) > 0) ++nSV;
+			model.l = nSV;
+			model.SV = new SupportVectorMachineNode[nSV][];
+			model.sv_coef[0] = new double[nSV];
+			model.sv_indices = new int[nSV];
+			int j = 0;
+			for(i=0;i<prob.l;i++)
+				if(Math.abs(f.alpha[i]) > 0)
+				{
+					model.SV[j] = prob.x[i];
+					model.sv_coef[0][j] = f.alpha[i];
+					model.sv_indices[j] = i+1;
+					++j;
+				}
+		}
+		else
+		{
+			// classification
+			int l = prob.l;
+			int[] tmp_nr_class = new int[1];
+			int[][] tmp_label = new int[1][];
+			int[][] tmp_start = new int[1][];
+			int[][] tmp_count = new int[1][];			
+			int[] perm = new int[l];
+
+			// group train data of the same class
+			svm_group_classes(prob,tmp_nr_class,tmp_label,tmp_start,tmp_count,perm);
+			int nr_class = tmp_nr_class[0];			
+			int[] label = tmp_label[0];
+			int[] start = tmp_start[0];
+			int[] count = tmp_count[0];
+ 			
+			if(nr_class == 1) 
+				SupportVectorMachine.info("WARNING: training data in only one class. See README for details.\n");
+			
+			SupportVectorMachineNode[][] x = new SupportVectorMachineNode[l][];
+			int i;
+			for(i=0;i<l;i++)
+				x[i] = prob.x[perm[i]];
+
+			// calculate weighted C
+
+			double[] weighted_C = new double[nr_class];
+			for(i=0;i<nr_class;i++)
+				weighted_C[i] = param.C;
+			for(i=0;i<param.nr_weight;i++)
+			{
+				int j;
+				for(j=0;j<nr_class;j++)
+					if(param.weight_label[i] == label[j])
+						break;
+				if(j == nr_class)
+					System.err.print("WARNING: class label "+param.weight_label[i]+" specified in weight is not found\n");
+				else
+					weighted_C[j] *= param.weight[i];
+			}
+
+			// train k*(k-1)/2 models
+
+			boolean[] nonzero = new boolean[l];
+			for(i=0;i<l;i++)
+				nonzero[i] = false;
+			decision_function[] f = new decision_function[nr_class*(nr_class-1)/2];
+
+			double[] probA=null,probB=null;
+			if (param.probability == 1)
+			{
+				probA=new double[nr_class*(nr_class-1)/2];
+				probB=new double[nr_class*(nr_class-1)/2];
+			}
+
+			int p = 0;
+			for(i=0;i<nr_class;i++)
+				for(int j=i+1;j<nr_class;j++)
+				{
+					svm_problem sub_prob = new svm_problem();
+					int si = start[i], sj = start[j];
+					int ci = count[i], cj = count[j];
+					sub_prob.l = ci+cj;
+					sub_prob.x = new SupportVectorMachineNode[sub_prob.l][];
+					sub_prob.y = new double[sub_prob.l];
+					int k;
+					for(k=0;k<ci;k++)
+					{
+						sub_prob.x[k] = x[si+k];
+						sub_prob.y[k] = +1;
+					}
+					for(k=0;k<cj;k++)
+					{
+						sub_prob.x[ci+k] = x[sj+k];
+						sub_prob.y[ci+k] = -1;
+					}
+
+					if(param.probability == 1)
+					{
+						double[] probAB=new double[2];
+						svm_binary_svc_probability(sub_prob,param,weighted_C[i],weighted_C[j],probAB);
+						probA[p]=probAB[0];
+						probB[p]=probAB[1];
+					}
+
+					f[p] = svm_train_one(sub_prob,param,weighted_C[i],weighted_C[j]);
+					for(k=0;k<ci;k++)
+						if(!nonzero[si+k] && Math.abs(f[p].alpha[k]) > 0)
+							nonzero[si+k] = true;
+					for(k=0;k<cj;k++)
+						if(!nonzero[sj+k] && Math.abs(f[p].alpha[ci+k]) > 0)
+							nonzero[sj+k] = true;
+					++p;
+				}
+
+			// build output
+
+			model.nr_class = nr_class;
+
+			model.label = new int[nr_class];
+			for(i=0;i<nr_class;i++)
+				model.label[i] = label[i];
+
+			model.rho = new double[nr_class*(nr_class-1)/2];
+			for(i=0;i<nr_class*(nr_class-1)/2;i++)
+				model.rho[i] = f[i].rho;
+
+			if(param.probability == 1)
+			{
+				model.probA = new double[nr_class*(nr_class-1)/2];
+				model.probB = new double[nr_class*(nr_class-1)/2];
+				for(i=0;i<nr_class*(nr_class-1)/2;i++)
+				{
+					model.probA[i] = probA[i];
+					model.probB[i] = probB[i];
+				}
+			}
+			else
+			{
+				model.probA=null;
+				model.probB=null;
+			}
+
+			int nnz = 0;
+			int[] nz_count = new int[nr_class];
+			model.nSV = new int[nr_class];
+			for(i=0;i<nr_class;i++)
+			{
+				int nSV = 0;
+				for(int j=0;j<count[i];j++)
+					if(nonzero[start[i]+j])
+					{
+						++nSV;
+						++nnz;
+					}
+				model.nSV[i] = nSV;
+				nz_count[i] = nSV;
+			}
+
+			SupportVectorMachine.info("Total nSV = "+nnz+"\n");
+
+			model.l = nnz;
+			model.SV = new SupportVectorMachineNode[nnz][];
+			model.sv_indices = new int[nnz];
+			p = 0;
+			for(i=0;i<l;i++)
+				if(nonzero[i])
+				{
+					model.SV[p] = x[i];
+					model.sv_indices[p++] = perm[i] + 1;
+				}
+
+			int[] nz_start = new int[nr_class];
+			nz_start[0] = 0;
+			for(i=1;i<nr_class;i++)
+				nz_start[i] = nz_start[i-1]+nz_count[i-1];
+
+			model.sv_coef = new double[nr_class-1][];
+			for(i=0;i<nr_class-1;i++)
+				model.sv_coef[i] = new double[nnz];
+
+			p = 0;
+			for(i=0;i<nr_class;i++)
+				for(int j=i+1;j<nr_class;j++)
+				{
+					// classifier (i,j): coefficients with
+					// i are in sv_coef[j-1][nz_start[i]...],
+					// j are in sv_coef[i][nz_start[j]...]
+
+					int si = start[i];
+					int sj = start[j];
+					int ci = count[i];
+					int cj = count[j];
+
+					int q = nz_start[i];
+					int k;
+					for(k=0;k<ci;k++)
+						if(nonzero[si+k])
+							model.sv_coef[j-1][q++] = f[p].alpha[k];
+					q = nz_start[j];
+					for(k=0;k<cj;k++)
+						if(nonzero[sj+k])
+							model.sv_coef[i][q++] = f[p].alpha[ci+k];
+					++p;
+				}
+		}
+		return model;
+	}
+	
+	// Stratified cross validation
+	public static void svm_cross_validation(svm_problem prob, svm_parameter param, int nr_fold, double[] target)
+	{
+		int i;
+		int[] fold_start = new int[nr_fold+1];
+		int l = prob.l;
+		int[] perm = new int[l];
+		
+		// stratified cv may not give leave-one-out rate
+		// Each class to l folds -> some folds may have zero elements
+		if((param.svm_type == svm_parameter.C_SVC ||
+		    param.svm_type == svm_parameter.NU_SVC) && nr_fold < l)
+		{
+			int[] tmp_nr_class = new int[1];
+			int[][] tmp_label = new int[1][];
+			int[][] tmp_start = new int[1][];
+			int[][] tmp_count = new int[1][];
+
+			svm_group_classes(prob,tmp_nr_class,tmp_label,tmp_start,tmp_count,perm);
+
+			int nr_class = tmp_nr_class[0];
+			int[] start = tmp_start[0];
+			int[] count = tmp_count[0];		
+
+			// naive shuffle and then data grouped by fold using the array perm
+			int[] fold_count = new int[nr_fold];
+			int c;
+			int[] index = new int[l];
+			for(i=0;i<l;i++)
+				index[i]=perm[i];
+			for (c=0; c<nr_class; c++)
+				for(i=0;i<count[c];i++)
+				{
+					int j = i+rand.nextInt(count[c]-i);
+					do {int tmp = index[start[c]+j]; index[start[c]+j]=index[start[c]+i]; index[start[c]+i] = tmp;} while(false);
+				}
+			for(i=0;i<nr_fold;i++)
+			{
+				fold_count[i] = 0;
+				for (c=0; c<nr_class;c++)
+					fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
+			}
+			fold_start[0]=0;
+			for (i=1;i<=nr_fold;i++)
+				fold_start[i] = fold_start[i-1]+fold_count[i-1];
+			for (c=0; c<nr_class;c++)
+				for(i=0;i<nr_fold;i++)
+				{
+					int begin = start[c]+i*count[c]/nr_fold;
+					int end = start[c]+(i+1)*count[c]/nr_fold;
+					for(int j=begin;j<end;j++)
+					{
+						perm[fold_start[i]] = index[j];
+						fold_start[i]++;
+					}
+				}
+			fold_start[0]=0;
+			for (i=1;i<=nr_fold;i++)
+				fold_start[i] = fold_start[i-1]+fold_count[i-1];
+		}
+		else
+		{
+			for(i=0;i<l;i++) perm[i]=i;
+			for(i=0;i<l;i++)
+			{
+				int j = i+rand.nextInt(l-i);
+				do {int tmp = perm[i]; perm[i]=perm[j]; perm[j] = tmp;} while(false);
+			}
+			for(i=0;i<=nr_fold;i++)
+				fold_start[i]=i*l/nr_fold;
+		}
+
+		for(i=0;i<nr_fold;i++)
+		{
+			int begin = fold_start[i];
+			int end = fold_start[i+1];
+			int j,k;
+			svm_problem subprob = new svm_problem();
+
+			subprob.l = l-(end-begin);
+			subprob.x = new SupportVectorMachineNode[subprob.l][];
+			subprob.y = new double[subprob.l];
+
+			k=0;
+			for(j=0;j<begin;j++)
+			{
+				subprob.x[k] = prob.x[perm[j]];
+				subprob.y[k] = prob.y[perm[j]];
+				++k;
+			}
+			for(j=end;j<l;j++)
+			{
+				subprob.x[k] = prob.x[perm[j]];
+				subprob.y[k] = prob.y[perm[j]];
+				++k;
+			}
+			svm_model submodel = svm_train(subprob,param);
+			if(param.probability==1 &&
+			   (param.svm_type == svm_parameter.C_SVC ||
+			    param.svm_type == svm_parameter.NU_SVC))
+			{
+				double[] prob_estimates= new double[svm_get_nr_class(submodel)];
+				for(j=begin;j<end;j++)
+					target[perm[j]] = svm_predict_probability(submodel,prob.x[perm[j]],prob_estimates);
+			}
+			else
+				for(j=begin;j<end;j++)
+					target[perm[j]] = svm_predict(submodel,prob.x[perm[j]]);
+		}
+	}
+
+	public static int svm_get_svm_type(svm_model model)
+	{
+		return model.param.svm_type;
+	}
+
+	public static int svm_get_nr_class(svm_model model)
+	{
+		return model.nr_class;
+	}
+
+	public static void svm_get_labels(svm_model model, int[] label)
+	{
+		if (model.label != null)
+			for(int i=0;i<model.nr_class;i++)
+				label[i] = model.label[i];
+	}
+
+	public static void svm_get_sv_indices(svm_model model, int[] indices)
+	{
+		if (model.sv_indices != null)
+			for(int i=0;i<model.l;i++)
+				indices[i] = model.sv_indices[i];
+	}
+
+	public static int svm_get_nr_sv(svm_model model)
+	{
+		return model.l;
+	}
+
+	public static double svm_get_svr_probability(svm_model model)
+	{
+		if ((model.param.svm_type == svm_parameter.EPSILON_SVR || model.param.svm_type == svm_parameter.NU_SVR) &&
+		    model.probA!=null)
+		return model.probA[0];
+		else
+		{
+			System.err.print("Model doesn't contain information for SVR probability inference\n");
+			return 0;
+		}
+	}
+
+	public static double svm_predict_values(svm_model model, SupportVectorMachineNode[] x, double[] dec_values)
+	{
+		int i;
+		if(model.param.svm_type == svm_parameter.ONE_CLASS ||
+		   model.param.svm_type == svm_parameter.EPSILON_SVR ||
+		   model.param.svm_type == svm_parameter.NU_SVR)
+		{
+			double[] sv_coef = model.sv_coef[0];
+			double sum = 0;
+			for(i=0;i<model.l;i++)
+				sum += sv_coef[i] * Kernel.k_function(x,model.SV[i],model.param);
+			sum -= model.rho[0];
+			dec_values[0] = sum;
+
+			///QQQ dwp
+//			if(model.param.svm_type == svm_parameter.ONE_CLASS)
+//				return (sum>0)?1:-1;
+//			else
+//				return sum;
+			return sum;
+		}
+		else
+		{
+			int nr_class = model.nr_class;
+			int l = model.l;
+		
+			double[] kvalue = new double[l];
+			for(i=0;i<l;i++)
+				kvalue[i] = Kernel.k_function(x,model.SV[i],model.param);
+
+			int[] start = new int[nr_class];
+			start[0] = 0;
+			for(i=1;i<nr_class;i++)
+				start[i] = start[i-1]+model.nSV[i-1];
+
+			int[] vote = new int[nr_class];
+			for(i=0;i<nr_class;i++)
+				vote[i] = 0;
+
+			int p=0;
+			for(i=0;i<nr_class;i++)
+				for(int j=i+1;j<nr_class;j++)
+				{
+					double sum = 0;
+					int si = start[i];
+					int sj = start[j];
+					int ci = model.nSV[i];
+					int cj = model.nSV[j];
+				
+					int k;
+					double[] coef1 = model.sv_coef[j-1];
+					double[] coef2 = model.sv_coef[i];
+					for(k=0;k<ci;k++)
+						sum += coef1[si+k] * kvalue[si+k];
+					for(k=0;k<cj;k++)
+						sum += coef2[sj+k] * kvalue[sj+k];
+					sum -= model.rho[p];
+					dec_values[p] = sum;					
+
+					if(dec_values[p] > 0)
+						++vote[i];
+					else
+						++vote[j];
+					p++;
+				}
+
+			int vote_max_idx = 0;
+			for(i=1;i<nr_class;i++)
+				if(vote[i] > vote[vote_max_idx])
+					vote_max_idx = i;
+
+			return model.label[vote_max_idx];
+		}
+	}
+
+	public static double svm_predict(svm_model model, SupportVectorMachineNode[] x)
+	{
+		int nr_class = model.nr_class;
+		double[] dec_values;
+		if(model.param.svm_type == svm_parameter.ONE_CLASS ||
+				model.param.svm_type == svm_parameter.EPSILON_SVR ||
+				model.param.svm_type == svm_parameter.NU_SVR)
+			dec_values = new double[1];
+		else
+			dec_values = new double[nr_class*(nr_class-1)/2];
+		double pred_result = svm_predict_values(model, x, dec_values);
+		return pred_result;
+	}
+
+	public static double svm_predict_probability(svm_model model, SupportVectorMachineNode[] x, double[] prob_estimates)
+	{
+		if ((model.param.svm_type == svm_parameter.C_SVC || model.param.svm_type == svm_parameter.NU_SVC) &&
+		    model.probA!=null && model.probB!=null)
+		{
+			int i;
+			int nr_class = model.nr_class;
+			double[] dec_values = new double[nr_class*(nr_class-1)/2];
+			svm_predict_values(model, x, dec_values);
+
+			double min_prob=1e-7;
+			double[][] pairwise_prob=new double[nr_class][nr_class];
+			
+			int k=0;
+			for(i=0;i<nr_class;i++)
+				for(int j=i+1;j<nr_class;j++)
+				{
+					pairwise_prob[i][j]=Math.min(Math.max(sigmoid_predict(dec_values[k],model.probA[k],model.probB[k]),min_prob),1-min_prob);
+					pairwise_prob[j][i]=1-pairwise_prob[i][j];
+					k++;
+				}
+			multiclass_probability(nr_class,pairwise_prob,prob_estimates);
+
+			int prob_max_idx = 0;
+			for(i=1;i<nr_class;i++)
+				if(prob_estimates[i] > prob_estimates[prob_max_idx])
+					prob_max_idx = i;
+			return model.label[prob_max_idx];
+		}
+		else 
+			return svm_predict(model, x);
+	}
+
+	static final String svm_type_table[] =
+	{
+		"c_svc","nu_svc","one_class","epsilon_svr","nu_svr",
+	};
+
+	static final String kernel_type_table[]=
+	{
+		"linear","polynomial","rbf","sigmoid","precomputed"
+	};
+
+	public static void svm_save_model(String model_file_name, svm_model model) throws IOException
+	{
+		DataOutputStream fp = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(model_file_name)));
+
+		svm_parameter param = model.param;
+
+		fp.writeBytes("svm_type "+svm_type_table[param.svm_type]+"\n");
+		fp.writeBytes("kernel_type "+kernel_type_table[param.kernel_type]+"\n");
+
+		if(param.kernel_type == svm_parameter.POLY)
+			fp.writeBytes("degree "+param.degree+"\n");
+
+		if(param.kernel_type == svm_parameter.POLY ||
+		   param.kernel_type == svm_parameter.RBF ||
+		   param.kernel_type == svm_parameter.SIGMOID)
+			fp.writeBytes("gamma "+param.gamma+"\n");
+
+		if(param.kernel_type == svm_parameter.POLY ||
+		   param.kernel_type == svm_parameter.SIGMOID)
+			fp.writeBytes("coef0 "+param.coef0+"\n");
+
+		int nr_class = model.nr_class;
+		int l = model.l;
+		fp.writeBytes("nr_class "+nr_class+"\n");
+		fp.writeBytes("total_sv "+l+"\n");
+	
+		{
+			fp.writeBytes("rho");
+			for(int i=0;i<nr_class*(nr_class-1)/2;i++)
+				fp.writeBytes(" "+model.rho[i]);
+			fp.writeBytes("\n");
+		}
+	
+		if(model.label != null)
+		{
+			fp.writeBytes("label");
+			for(int i=0;i<nr_class;i++)
+				fp.writeBytes(" "+model.label[i]);
+			fp.writeBytes("\n");
+		}
+
+		if(model.probA != null) // regression has probA only
+		{
+			fp.writeBytes("probA");
+			for(int i=0;i<nr_class*(nr_class-1)/2;i++)
+				fp.writeBytes(" "+model.probA[i]);
+			fp.writeBytes("\n");
+		}
+		if(model.probB != null) 
+		{
+			fp.writeBytes("probB");
+			for(int i=0;i<nr_class*(nr_class-1)/2;i++)
+				fp.writeBytes(" "+model.probB[i]);
+			fp.writeBytes("\n");
+		}
+
+		if(model.nSV != null)
+		{
+			fp.writeBytes("nr_sv");
+			for(int i=0;i<nr_class;i++)
+				fp.writeBytes(" "+model.nSV[i]);
+			fp.writeBytes("\n");
+		}
+
+		fp.writeBytes("SV\n");
+		double[][] sv_coef = model.sv_coef;
+		SupportVectorMachineNode[][] SV = model.SV;
+
+		for(int i=0;i<l;i++)
+		{
+			for(int j=0;j<nr_class-1;j++)
+				fp.writeBytes(sv_coef[j][i]+" ");
+
+			SupportVectorMachineNode[] p = SV[i];
+			if(param.kernel_type == svm_parameter.PRECOMPUTED)
+				fp.writeBytes("0:"+(int)(p[0].value));
+			else	
+				for(int j=0;j<p.length;j++)
+					fp.writeBytes(p[j].index+":"+p[j].value+" ");
+			fp.writeBytes("\n");
+		}
+
+		fp.close();
+	}
+
+	private static double atof(String s)
+	{
+		return Double.valueOf(s).doubleValue();
+	}
+
+	private static int atoi(String s)
+	{
+		return Integer.parseInt(s);
+	}
+
+	private static boolean read_model_header(BufferedReader fp, svm_model model)
+	{
+		svm_parameter param = new svm_parameter();
+		model.param = param;
+		try
+		{
+			while(true)
+			{
+				String cmd = fp.readLine();
+				if(cmd == null) continue;
+
+				String arg = cmd.substring(cmd.indexOf(' ')+1);
+
+				if(cmd.startsWith("svm_type"))
+				{
+					int i;
+					for(i=0;i<svm_type_table.length;i++)
+					{
+						if(arg.indexOf(svm_type_table[i])!=-1)
+						{
+							param.svm_type=i;
+							break;
+						}
+					}
+					if(i == svm_type_table.length)
+					{
+						System.err.print("unknown svm type.\n");
+						return false;
+					}
+				}
+				else if(cmd.startsWith("kernel_type"))
+				{
+					int i;
+					for(i=0;i<kernel_type_table.length;i++)
+					{
+						if(arg.indexOf(kernel_type_table[i])!=-1)
+						{
+							param.kernel_type=i;
+							break;
+						}
+					}
+					if(i == kernel_type_table.length)
+					{
+						System.err.print("unknown kernel function.\n");
+						return false;
+					}
+				}
+				else if(cmd.startsWith("degree"))
+					param.degree = atoi(arg);
+				else if(cmd.startsWith("gamma"))
+					param.gamma = atof(arg);
+				else if(cmd.startsWith("coef0"))
+					param.coef0 = atof(arg);
+				else if(cmd.startsWith("nr_class"))
+					model.nr_class = atoi(arg);
+				else if(cmd.startsWith("total_sv"))
+					model.l = atoi(arg);
+				else if(cmd.startsWith("rho"))
+				{
+					int n = model.nr_class * (model.nr_class-1)/2;
+					model.rho = new double[n];
+					StringTokenizer st = new StringTokenizer(arg);
+					for(int i=0;i<n;i++)
+						model.rho[i] = atof(st.nextToken());
+				}
+				else if(cmd.startsWith("label"))
+				{
+					int n = model.nr_class;
+					model.label = new int[n];
+					StringTokenizer st = new StringTokenizer(arg);
+					for(int i=0;i<n;i++)
+						model.label[i] = atoi(st.nextToken());					
+				}
+				else if(cmd.startsWith("probA"))
+				{
+					int n = model.nr_class*(model.nr_class-1)/2;
+					model.probA = new double[n];
+					StringTokenizer st = new StringTokenizer(arg);
+					for(int i=0;i<n;i++)
+						model.probA[i] = atof(st.nextToken());					
+				}
+				else if(cmd.startsWith("probB"))
+				{
+					int n = model.nr_class*(model.nr_class-1)/2;
+					model.probB = new double[n];
+					StringTokenizer st = new StringTokenizer(arg);
+					for(int i=0;i<n;i++)
+						model.probB[i] = atof(st.nextToken());					
+				}
+				else if(cmd.startsWith("nr_sv"))
+				{
+					int n = model.nr_class;
+					model.nSV = new int[n];
+					StringTokenizer st = new StringTokenizer(arg);
+					for(int i=0;i<n;i++)
+						model.nSV[i] = atoi(st.nextToken());
+				}
+				else if(cmd.startsWith("SV"))
+				{
+					break;
+				}
+				else
+				{
+					System.err.print("unknown text in model file: ["+cmd+"]\n");
+					return false;
+				}
+			}
+		}
+		catch(RuntimeException e) {
+			throw e;
+		} catch(Exception e) {
+			return false;
+		}
+		return true;
+	}
+
+	public static svm_model svm_load_model(String model_file_name) throws IOException
+	{
+		return svm_load_model(new BufferedReader(new InputStreamReader(new FileInputStream(model_file_name), "UTF-8")));
+	}
+
+	public static svm_model svm_load_model(BufferedReader fp) throws IOException
+	{
+		// read parameters
+
+		svm_model model = new svm_model();
+		model.rho = null;
+		model.probA = null;
+		model.probB = null;
+		model.label = null;
+		model.nSV = null;
+
+		if (read_model_header(fp, model) == false)
+		{
+			System.err.print("ERROR: failed to read model\n");
+			return null;
+		}
+
+		// read sv_coef and SV
+
+		int m = model.nr_class - 1;
+		int l = model.l;
+		model.sv_coef = new double[m][l];
+		model.SV = new SupportVectorMachineNode[l][];
+
+		for(int i=0;i<l;i++)
+		{
+			String line = fp.readLine();
+
+			if(line != null) {
+				StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:");
+
+				for (int k = 0; k < m; k++)
+					model.sv_coef[k][i] = atof(st.nextToken());
+				int n = st.countTokens() / 2;
+				model.SV[i] = new SupportVectorMachineNode[n];
+				for (int j = 0; j < n; j++) {
+					model.SV[i][j] = new SupportVectorMachineNode();
+					model.SV[i][j].index = atoi(st.nextToken());
+					model.SV[i][j].value = atof(st.nextToken());
+				}
+			}
+		}
+
+		fp.close();
+		return model;
+	}
+
+	public static String svm_check_parameter(svm_problem prob, svm_parameter param)
+	{
+		// svm_type
+
+		int svm_type = param.svm_type;
+		if(svm_type != svm_parameter.C_SVC &&
+		   svm_type != svm_parameter.NU_SVC &&
+		   svm_type != svm_parameter.ONE_CLASS &&
+		   svm_type != svm_parameter.EPSILON_SVR &&
+		   svm_type != svm_parameter.NU_SVR)
+		return "unknown svm type";
+
+		// kernel_type, degree
+	
+		int kernel_type = param.kernel_type;
+		if(kernel_type != svm_parameter.LINEAR &&
+		   kernel_type != svm_parameter.POLY &&
+		   kernel_type != svm_parameter.RBF &&
+		   kernel_type != svm_parameter.SIGMOID &&
+		   kernel_type != svm_parameter.PRECOMPUTED)
+			return "unknown kernel type";
+
+		if(param.gamma < 0)
+			return "gamma < 0";
+
+		if(param.degree < 0)
+			return "degree of polynomial kernel < 0";
+
+		// cache_size,eps,C,nu,p,shrinking
+
+		if(param.cache_size <= 0)
+			return "cache_size <= 0";
+
+		if(param.eps <= 0)
+			return "eps <= 0";
+
+		if(svm_type == svm_parameter.C_SVC ||
+		   svm_type == svm_parameter.EPSILON_SVR ||
+		   svm_type == svm_parameter.NU_SVR)
+			if(param.C <= 0)
+				return "C <= 0";
+
+		if(svm_type == svm_parameter.NU_SVC ||
+		   svm_type == svm_parameter.ONE_CLASS ||
+		   svm_type == svm_parameter.NU_SVR)
+			if(param.nu <= 0 || param.nu > 1)
+				return "nu <= 0 or nu > 1";
+
+		if(svm_type == svm_parameter.EPSILON_SVR)
+			if(param.p < 0)
+				return "p < 0";
+
+		if(param.shrinking != 0 &&
+		   param.shrinking != 1)
+			return "shrinking != 0 and shrinking != 1";
+
+		if(param.probability != 0 &&
+		   param.probability != 1)
+			return "probability != 0 and probability != 1";
+
+		if(param.probability == 1 &&
+		   svm_type == svm_parameter.ONE_CLASS)
+			return "one-class SVM probability output not supported yet";
+		
+		// check whether nu-svc is feasible
+	
+		if(svm_type == svm_parameter.NU_SVC)
+		{
+			int l = prob.l;
+			int max_nr_class = 16;
+			int nr_class = 0;
+			int[] label = new int[max_nr_class];
+			int[] count = new int[max_nr_class];
+
+			int i;
+			for(i=0;i<l;i++)
+			{
+				int this_label = (int)prob.y[i];
+				int j;
+				for(j=0;j<nr_class;j++)
+					if(this_label == label[j])
+					{
+						++count[j];
+						break;
+					}
+
+				if(j == nr_class)
+				{
+					if(nr_class == max_nr_class)
+					{
+						max_nr_class *= 2;
+						int[] new_data = new int[max_nr_class];
+						System.arraycopy(label,0,new_data,0,label.length);
+						label = new_data;
+						
+						new_data = new int[max_nr_class];
+						System.arraycopy(count,0,new_data,0,count.length);
+						count = new_data;
+					}
+					label[nr_class] = this_label;
+					count[nr_class] = 1;
+					++nr_class;
+				}
+			}
+
+			for(i=0;i<nr_class;i++)
+			{
+				int n1 = count[i];
+				for(int j=i+1;j<nr_class;j++)
+				{
+					int n2 = count[j];
+					if(param.nu*(n1+n2)/2 > Math.min(n1,n2))
+						return "specified nu is infeasible";
+				}
+			}
+		}
+
+		return null;
+	}
+
+	public static int svm_check_probability_model(svm_model model)
+	{
+		if (((model.param.svm_type == svm_parameter.C_SVC || model.param.svm_type == svm_parameter.NU_SVC) &&
+		model.probA!=null && model.probB!=null) ||
+		((model.param.svm_type == svm_parameter.EPSILON_SVR || model.param.svm_type == svm_parameter.NU_SVR) &&
+		 model.probA!=null))
+			return 1;
+		else
+			return 0;
+	}
+
+	public static void svm_set_print_string_function(svm_print_interface print_func)
+	{
+		if (print_func == null)
+			svm_print_string = svm_print_stdout;
+		else 
+			svm_print_string = print_func;
+	}
+}

+ 17 - 0
gtbook/src/main/java/opennlp/tools/svm/libsvm/SupportVectorMachineNode.java

@@ -0,0 +1,17 @@
+package opennlp.tools.svm.libsvm;
+public class SupportVectorMachineNode implements java.io.Serializable
+{
+	public int index;
+	public double value;
+
+	public void copy(SupportVectorMachineNode rhs){
+		index = rhs.index;
+		value = rhs.value;
+	}
+
+	public SupportVectorMachineNode makeCopy(){
+		SupportVectorMachineNode clone = new SupportVectorMachineNode();
+		clone.copy(this);
+		return clone;
+	}
+}

+ 57 - 0
gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_model.java

@@ -0,0 +1,57 @@
+//
+// svm_model
+//
+package opennlp.tools.svm.libsvm;
+public class svm_model implements Cloneable, java.io.Serializable {
+	public svm_parameter param;    // parameter
+	public int nr_class;        // number of classes, = 2 in regression/one class svm
+	public int l;            // total #SV
+	public SupportVectorMachineNode[][] SV;    // SVs (SV[l])
+	public double[][] sv_coef;    // coefficients for SVs in decision functions (sv_coef[k-1][l])
+	public double[] rho;        // constants in decision functions (rho[k*(k-1)/2])
+	public double[] probA;         // pariwise probability information
+	public double[] probB;
+	public int[] sv_indices;       // sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the train set
+
+	// for classification only
+
+	public int[] label;        // label of each class (label[k])
+	public int[] nSV;        // number of SVs for each class (nSV[k])
+	// nSV[0] + nSV[1] + ... + nSV[k-1] = l
+
+	public svm_model makeCopy() {
+		svm_model clone = new svm_model();
+		clone.copy(this);
+
+		return clone;
+	}
+
+	public void copy(svm_model rhs){
+		param = rhs.param;
+		nr_class = rhs.nr_class;
+		l = rhs.l;            // total #SV
+		SV=new SupportVectorMachineNode[rhs.SV.length][];    // SVs (SV[l])
+
+		for(int i=0; i < rhs.SV.length; ++i){
+			SV[i] = new SupportVectorMachineNode[rhs.SV[i].length];
+			for(int j=0; j < rhs.SV[i].length; ++j){
+				SV[i][j] = rhs.SV[i][j].makeCopy();
+			}
+		}
+
+		sv_coef = new double[rhs.sv_coef.length][];    // coefficients for SVs in decision functions (sv_coef[k-1][l])
+		for(int i=0; i < rhs.sv_coef.length; ++i){
+			sv_coef[i] = rhs.sv_coef[i].clone();
+		}
+
+		rho = rhs.rho == null ? null : rhs.rho.clone();        // constants in decision functions (rho[k*(k-1)/2])
+		probA = rhs.probA == null ? null : rhs.probA.clone();         // pariwise probability information
+		probB = rhs.probB == null ? null : rhs.probB.clone();
+		sv_indices = rhs.sv_indices == null ? null : rhs.sv_indices.clone();       // sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the train set
+
+		// for classification only
+
+		label = rhs.label == null ? null : rhs.label.clone();        // label of each class (label[k])
+		nSV = rhs.nSV == null ? null : rhs.nSV.clone();        // number of SVs for each class (nSV[k])
+	}
+}

+ 63 - 0
gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_parameter.java

@@ -0,0 +1,63 @@
+package opennlp.tools.svm.libsvm;
+public class svm_parameter implements Cloneable,java.io.Serializable
+{
+	/* svm_type */
+	public static final int C_SVC = 0;
+	public static final int NU_SVC = 1;
+	public static final int ONE_CLASS = 2;
+	public static final int EPSILON_SVR = 3;
+	public static final int NU_SVR = 4;
+
+	/* kernel_type */
+	public static final int LINEAR = 0;
+	public static final int POLY = 1;
+	public static final int RBF = 2;
+	public static final int SIGMOID = 3;
+	public static final int PRECOMPUTED = 4;
+
+	public int svm_type;
+	public int kernel_type;
+	public int degree;	// for poly
+	public double gamma;	// for poly/rbf/sigmoid
+	public double coef0;	// for poly/sigmoid
+
+	// these are for train only
+	public double cache_size; // in MB
+	public double eps;	// stopping actionselection
+	public double C;	// for C_SVC, EPSILON_SVR and NU_SVR
+	public int nr_weight;		// for C_SVC
+	public int[] weight_label;	// for C_SVC
+	public double[] weight;		// for C_SVC
+	public double nu;	// for NU_SVC, ONE_CLASS, and NU_SVR
+	public double p;	// for EPSILON_SVR
+	public int shrinking;	// use the shrinking heuristics
+	public int probability; // do probability estimates
+
+	public void copy(svm_parameter rhs){
+		svm_type = rhs.svm_type;
+		kernel_type = rhs.kernel_type;
+		degree = rhs.degree;	// for poly
+		gamma = rhs.gamma;	// for poly/rbf/sigmoid
+		coef0 = rhs.coef0;	// for poly/sigmoid
+
+		// these are for train only
+		cache_size = rhs.cache_size; // in MB
+		eps = rhs.eps;	// stopping actionselection
+		C = rhs.C;	// for C_SVC, EPSILON_SVR and NU_SVR
+		nr_weight = rhs.nr_weight;		// for C_SVC
+		weight_label = rhs.weight_label.clone();	// for C_SVC
+		weight = rhs.weight.clone();		// for C_SVC
+		nu = rhs.nu;	// for NU_SVC, ONE_CLASS, and NU_SVR
+		p = rhs.p;	// for EPSILON_SVR
+		shrinking = rhs.shrinking;	// use the shrinking heuristics
+		probability = rhs.probability; // do probability estimates
+	}
+
+	public svm_parameter makeCopy()
+	{
+		svm_parameter clone = new svm_parameter();
+		clone.copy(this);
+		return clone;
+	}
+
+}

+ 197 - 0
gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_predict.java

@@ -0,0 +1,197 @@
+package opennlp.tools.svm.libsvm;
+
+import java.io.*;
+import java.util.StringTokenizer;
+
+
+class svm_predict {
+	private static svm_print_interface svm_print_null = new svm_print_interface()
+	{
+		public void print(String s) {}
+	};
+
+	private static svm_print_interface svm_print_stdout = new svm_print_interface()
+	{
+		public void print(String s)
+		{
+			System.out.print(s);
+		}
+	};
+
+	private static svm_print_interface svm_print_string = svm_print_stdout;
+
+	static void info(String s) 
+	{
+		svm_print_string.print(s);
+	}
+
+	private static double atof(String s)
+	{
+		return Double.valueOf(s).doubleValue();
+	}
+
+	private static int atoi(String s)
+	{
+		return Integer.parseInt(s);
+	}
+
+	private static void predict(BufferedReader input, DataOutputStream output, svm_model model, int predict_probability) throws IOException
+	{
+		int correct = 0;
+		int total = 0;
+		double error = 0;
+		double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
+
+		int svm_type= SupportVectorMachine.svm_get_svm_type(model);
+		int nr_class= SupportVectorMachine.svm_get_nr_class(model);
+		double[] prob_estimates=null;
+
+		if(predict_probability == 1)
+		{
+			if(svm_type == svm_parameter.EPSILON_SVR ||
+			   svm_type == svm_parameter.NU_SVR)
+			{
+				svm_predict.info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma="+ SupportVectorMachine
+						.svm_get_svr_probability(model)+"\n");
+			}
+			else
+			{
+				int[] labels=new int[nr_class];
+				SupportVectorMachine.svm_get_labels(model,labels);
+				prob_estimates = new double[nr_class];
+				output.writeBytes("labels");
+				for(int j=0;j<nr_class;j++)
+					output.writeBytes(" "+labels[j]);
+				output.writeBytes("\n");
+			}
+		}
+		while(true)
+		{
+			String line = input.readLine();
+			if(line == null) break;
+
+			StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
+
+			double target = atof(st.nextToken());
+			int m = st.countTokens()/2;
+			SupportVectorMachineNode[] x = new SupportVectorMachineNode[m];
+			for(int j=0;j<m;j++)
+			{
+				x[j] = new SupportVectorMachineNode();
+				x[j].index = atoi(st.nextToken());
+				x[j].value = atof(st.nextToken());
+			}
+
+			double v;
+			if (predict_probability==1 && (svm_type==svm_parameter.C_SVC || svm_type==svm_parameter.NU_SVC))
+			{
+				v = SupportVectorMachine.svm_predict_probability(model,x,prob_estimates);
+				output.writeBytes(v+" ");
+				for(int j=0;j<nr_class;j++)
+					output.writeBytes(prob_estimates[j]+" ");
+				output.writeBytes("\n");
+			}
+			else
+			{
+				v = SupportVectorMachine.svm_predict(model,x);
+				output.writeBytes(v+"\n");
+			}
+
+			if(v == target)
+				++correct;
+			error += (v-target)*(v-target);
+			sumv += v;
+			sumy += target;
+			sumvv += v*v;
+			sumyy += target*target;
+			sumvy += v*target;
+			++total;
+		}
+		if(svm_type == svm_parameter.EPSILON_SVR ||
+		   svm_type == svm_parameter.NU_SVR)
+		{
+			svm_predict.info("Mean squared error = "+error/total+" (regression)\n");
+			svm_predict.info("Squared correlation coefficient = "+
+				 ((total*sumvy-sumv*sumy)*(total*sumvy-sumv*sumy))/
+				 ((total*sumvv-sumv*sumv)*(total*sumyy-sumy*sumy))+
+				 " (regression)\n");
+		}
+		else
+			svm_predict.info("Accuracy = "+(double)correct/total*100+
+				 "% ("+correct+"/"+total+") (classification)\n");
+	}
+
+	private static void exit_with_help()
+	{
+		System.err.print("usage: svm_predict [options] test_file model_file output_file\n"
+		+"options:\n"
+		+"-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); one-class SVM not supported yet\n"
+		+"-q : quiet mode (no outputs)\n");
+		System.exit(1);
+	}
+
+	public static void main(String argv[]) throws IOException
+	{
+		int i, predict_probability=0;
+        	svm_print_string = svm_print_stdout;
+
+		// parse options
+		for(i=0;i<argv.length;i++)
+		{
+			if(argv[i].charAt(0) != '-') break;
+			++i;
+			switch(argv[i-1].charAt(1))
+			{
+				case 'b':
+					predict_probability = atoi(argv[i]);
+					break;
+				case 'q':
+					svm_print_string = svm_print_null;
+					i--;
+					break;
+				default:
+					System.err.print("Unknown option: " + argv[i-1] + "\n");
+					exit_with_help();
+			}
+		}
+		if(i>=argv.length-2)
+			exit_with_help();
+		try 
+		{
+			BufferedReader input = new BufferedReader(new InputStreamReader(new FileInputStream(argv[i]), "UTF-8"));
+			DataOutputStream output = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(argv[i+2])));
+			svm_model model = SupportVectorMachine.svm_load_model(argv[i+1]);
+			if (model == null)
+			{
+				System.err.print("can't open model file "+argv[i+1]+"\n");
+				System.exit(1);
+			}
+			if(predict_probability == 1)
+			{
+				if(SupportVectorMachine.svm_check_probability_model(model)==0)
+				{
+					System.err.print("Model does not support probabiliy estimates\n");
+					System.exit(1);
+				}
+			}
+			else
+			{
+				if(SupportVectorMachine.svm_check_probability_model(model)!=0)
+				{
+					svm_predict.info("Model supports probability estimates, but disabled in prediction.\n");
+				}
+			}
+			predict(input,output,model,predict_probability);
+			input.close();
+			output.close();
+		} 
+		catch(FileNotFoundException e) 
+		{
+			exit_with_help();
+		}
+		catch(ArrayIndexOutOfBoundsException e) 
+		{
+			exit_with_help();
+		}
+	}
+}

+ 5 - 0
gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_print_interface.java

@@ -0,0 +1,5 @@
+package opennlp.tools.svm.libsvm;
+public interface svm_print_interface
+{
+	void print(String s);
+}

+ 7 - 0
gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_problem.java

@@ -0,0 +1,7 @@
+package opennlp.tools.svm.libsvm;
+public class svm_problem implements java.io.Serializable
+{
+	public int l;
+	public double[] y;
+	public SupportVectorMachineNode[][] x;
+}

+ 324 - 0
gtbook/src/main/java/opennlp/tools/svm/libsvm/svm_train.java

@@ -0,0 +1,324 @@
+package opennlp.tools.svm.libsvm;
+
+/**
+ * Created by xschen on 16/8/15.
+ */
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+import java.util.Vector;
+
+
+class svm_train {
+    private svm_parameter param;		// set by parse_command_line
+    private svm_problem prob;		// set by read_problem
+    private svm_model model;
+    private String input_file_name;		// set by parse_command_line
+    private String model_file_name;		// set by parse_command_line
+    private String error_msg;
+    private int cross_validation;
+    private int nr_fold;
+
+    private static svm_print_interface svm_print_null = new svm_print_interface()
+    {
+        public void print(String s) {}
+    };
+
+    private static void exit_with_help()
+    {
+        System.out.print(
+                "Usage: svm_train [options] training_set_file [model_file]\n"
+                        +"options:\n"
+                        +"-s svm_type : set type of SVM (default 0)\n"
+                        +"	0 -- C-SVC		(multi-class classification)\n"
+                        +"	1 -- nu-SVC		(multi-class classification)\n"
+                        +"	2 -- one-class SVM\n"
+                        +"	3 -- epsilon-SVR	(regression)\n"
+                        +"	4 -- nu-SVR		(regression)\n"
+                        +"-t kernel_type : set type of kernel function (default 2)\n"
+                        +"	0 -- linear: u'*v\n"
+                        +"	1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
+                        +"	2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
+                        +"	3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
+                        +"	4 -- precomputed kernel (kernel values in training_set_file)\n"
+                        +"-d degree : set degree in kernel function (default 3)\n"
+                        +"-g gamma : set gamma in kernel function (default 1/num_features)\n"
+                        +"-r coef0 : set coef0 in kernel function (default 0)\n"
+                        +"-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n"
+                        +"-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n"
+                        +"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
+                        +"-m cachesize : set cache memory size in MB (default 100)\n"
+                        +"-e epsilon : set tolerance of termination criterion (default 0.001)\n"
+                        +"-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n"
+                        +"-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n"
+                        +"-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n"
+                        +"-v n : n-fold cross validation mode\n"
+                        +"-q : quiet mode (no outputs)\n"
+        );
+        System.exit(1);
+    }
+
+    private void do_cross_validation()
+    {
+        int i;
+        int total_correct = 0;
+        double total_error = 0;
+        double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
+        double[] target = new double[prob.l];
+
+        SupportVectorMachine.svm_cross_validation(prob,param,nr_fold,target);
+        if(param.svm_type == svm_parameter.EPSILON_SVR ||
+                param.svm_type == svm_parameter.NU_SVR)
+        {
+            for(i=0;i<prob.l;i++)
+            {
+                double y = prob.y[i];
+                double v = target[i];
+                total_error += (v-y)*(v-y);
+                sumv += v;
+                sumy += y;
+                sumvv += v*v;
+                sumyy += y*y;
+                sumvy += v*y;
+            }
+            System.out.print("Cross Validation Mean squared error = "+total_error/prob.l+"\n");
+            System.out.print("Cross Validation Squared correlation coefficient = "+
+                            ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
+                                    ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))+"\n"
+            );
+        }
+        else
+        {
+            for(i=0;i<prob.l;i++)
+                if(target[i] == prob.y[i])
+                    ++total_correct;
+            System.out.print("Cross Validation Accuracy = "+100.0*total_correct/prob.l+"%\n");
+        }
+    }
+
+    private void run(String argv[]) throws IOException
+    {
+        parse_command_line(argv);
+        read_problem();
+        error_msg = SupportVectorMachine.svm_check_parameter(prob,param);
+
+        if(error_msg != null)
+        {
+            System.err.print("ERROR: "+error_msg+"\n");
+            throw new RuntimeException("ERROR: "+error_msg);
+        }
+
+        if(cross_validation != 0)
+        {
+            do_cross_validation();
+        }
+        else
+        {
+            model = SupportVectorMachine.svm_train(prob,param);
+            SupportVectorMachine.svm_save_model(model_file_name,model);
+        }
+    }
+
+    private static double atof(String s)
+    {
+        double d = Double.valueOf(s).doubleValue();
+        if (Double.isNaN(d) || Double.isInfinite(d))
+        {
+            System.err.print("NaN or Infinity in input\n");
+            System.exit(1);
+        }
+        return(d);
+    }
+
+    private static int atoi(String s)
+    {
+        return Integer.parseInt(s);
+    }
+
+    private void parse_command_line(String argv[])
+    {
+        int i;
+        svm_print_interface print_func = null;	// default printing to stdout
+
+        param = new svm_parameter();
+        // default values
+        param.svm_type = svm_parameter.C_SVC;
+        param.kernel_type = svm_parameter.RBF;
+        param.degree = 3;
+        param.gamma = 0;	// 1/num_features
+        param.coef0 = 0;
+        param.nu = 0.5;
+        param.cache_size = 100;
+        param.C = 1;
+        param.eps = 1e-3;
+        param.p = 0.1;
+        param.shrinking = 1;
+        param.probability = 0;
+        param.nr_weight = 0;
+        param.weight_label = new int[0];
+        param.weight = new double[0];
+        cross_validation = 0;
+
+        // parse options
+        for(i=0;i<argv.length;i++)
+        {
+            if(argv[i].charAt(0) != '-') break;
+            if(++i>=argv.length)
+                exit_with_help();
+            switch(argv[i-1].charAt(1))
+            {
+                case 's':
+                    param.svm_type = atoi(argv[i]);
+                    break;
+                case 't':
+                    param.kernel_type = atoi(argv[i]);
+                    break;
+                case 'd':
+                    param.degree = atoi(argv[i]);
+                    break;
+                case 'g':
+                    param.gamma = atof(argv[i]);
+                    break;
+                case 'r':
+                    param.coef0 = atof(argv[i]);
+                    break;
+                case 'n':
+                    param.nu = atof(argv[i]);
+                    break;
+                case 'm':
+                    param.cache_size = atof(argv[i]);
+                    break;
+                case 'c':
+                    param.C = atof(argv[i]);
+                    break;
+                case 'e':
+                    param.eps = atof(argv[i]);
+                    break;
+                case 'p':
+                    param.p = atof(argv[i]);
+                    break;
+                case 'h':
+                    param.shrinking = atoi(argv[i]);
+                    break;
+                case 'b':
+                    param.probability = atoi(argv[i]);
+                    break;
+                case 'q':
+                    print_func = svm_print_null;
+                    i--;
+                    break;
+                case 'v':
+                    cross_validation = 1;
+                    nr_fold = atoi(argv[i]);
+                    if(nr_fold < 2)
+                    {
+                        System.err.print("n-fold cross validation: n must >= 2\n");
+                        exit_with_help();
+                    }
+                    break;
+                case 'w':
+                    ++param.nr_weight;
+                {
+                    int[] old = param.weight_label;
+                    param.weight_label = new int[param.nr_weight];
+                    System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1);
+                }
+
+                {
+                    double[] old = param.weight;
+                    param.weight = new double[param.nr_weight];
+                    System.arraycopy(old,0,param.weight,0,param.nr_weight-1);
+                }
+
+                param.weight_label[param.nr_weight-1] = atoi(argv[i-1].substring(2));
+                param.weight[param.nr_weight-1] = atof(argv[i]);
+                break;
+                default:
+                    System.err.print("Unknown option: " + argv[i-1] + "\n");
+                    exit_with_help();
+            }
+        }
+
+        SupportVectorMachine.svm_set_print_string_function(print_func);
+
+        // determine filenames
+
+        if(i>=argv.length)
+            exit_with_help();
+
+        input_file_name = argv[i];
+
+        if(i<argv.length-1)
+            model_file_name = argv[i+1];
+        else
+        {
+            int p = argv[i].lastIndexOf('/');
+            ++p;	// whew...
+            model_file_name = argv[i].substring(p)+".model";
+        }
+    }
+
+    // read in a problem (in svmlight format)
+
+    private void read_problem() throws IOException
+    {
+        BufferedReader fp = new BufferedReader(new InputStreamReader(new FileInputStream(input_file_name), "UTF-8"));
+        Vector<Double> vy = new Vector<Double>();
+        Vector<SupportVectorMachineNode[]> vx = new Vector<SupportVectorMachineNode[]>();
+        int max_index = 0;
+
+        while(true)
+        {
+            String line = fp.readLine();
+            if(line == null) break;
+
+            StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
+
+            vy.addElement(atof(st.nextToken()));
+            int m = st.countTokens()/2;
+            SupportVectorMachineNode[] x = new SupportVectorMachineNode[m];
+            for(int j=0;j<m;j++)
+            {
+                x[j] = new SupportVectorMachineNode();
+                x[j].index = atoi(st.nextToken());
+                x[j].value = atof(st.nextToken());
+            }
+            if(m>0) max_index = Math.max(max_index, x[m-1].index);
+            vx.addElement(x);
+        }
+
+        fp.close();
+
+        prob = new svm_problem();
+        prob.l = vy.size();
+        prob.x = new SupportVectorMachineNode[prob.l][];
+        for(int i=0;i<prob.l;i++)
+            prob.x[i] = vx.elementAt(i);
+        prob.y = new double[prob.l];
+        for(int i=0;i<prob.l;i++)
+            prob.y[i] = vy.elementAt(i);
+
+        if(param.gamma == 0 && max_index > 0)
+            param.gamma = 1.0/max_index;
+
+        if(param.kernel_type == svm_parameter.PRECOMPUTED)
+            for(int i=0;i<prob.l;i++)
+            {
+                if (prob.x[i][0].index != 0)
+                {
+                    System.err.print("Wrong kernel matrix: first column must be 0:sample_serial_number\n");
+                    throw new RuntimeException("Wrong kernel matrix: first column must be 0:sample_serial_number");
+                }
+                if ((int)prob.x[i][0].value <= 0 || (int)prob.x[i][0].value > max_index)
+                {
+                    System.err.print("Wrong input format: sample_serial_number out of range\n");
+                    throw new RuntimeException("Wrong input format: sample_serial_number out of range");
+                }
+            }
+
+        fp.close();
+    }
+}

+ 14 - 0
gtbook/src/main/java/opennlp/tools/svm/svmext/Learner.java

@@ -0,0 +1,14 @@
+package opennlp.tools.svm.svmext;
+
+
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+
+
+/**
+ * Created by xschen on 6/5/2017.
+ */
+public interface Learner {
+   double transform(DataRow row);
+   void fit(DataFrame dataFrame);
+}

+ 221 - 0
gtbook/src/main/java/opennlp/tools/svm/svmext/classifiers/BinarySVC.java

@@ -0,0 +1,221 @@
+package opennlp.tools.svm.svmext.classifiers;
+
+
+//import com.github.svm.libsvm.*;
+//import com.github.svm.svmext.Learner;
+//import com.github.svm.data.frame.DataFrame;
+//import com.github.svm.data.frame.DataRow;
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+import opennlp.tools.svm.libsvm.*;
+import opennlp.tools.svm.svmext.Learner;
+
+import java.util.Vector;
+
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_predict;
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_set_print_string_function;
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_train;
+
+
+/**
+ * Created by xschen on 6/5/2017.
+ */
+public class BinarySVC implements Learner {
+
+   private static svm_print_interface svm_print_null = new svm_print_interface()
+   {
+      public void print(String s) {}
+   };
+   private svm_parameter parameters;
+   private int cross_validation;
+   private svm_model model;
+   private boolean quiet;
+
+   public void copy(BinarySVC that){
+      parameters = that.parameters == null ? null : that.parameters.makeCopy();
+      cross_validation = that.cross_validation;
+      model = that.model == null ? null : that.model.makeCopy();
+      quiet = that.quiet;
+   }
+
+   public BinarySVC makeCopy(){
+      BinarySVC clone = new BinarySVC();
+      clone.copy(this);
+
+      return clone;
+   }
+
+   public BinarySVC(){
+      init();
+   }
+
+   public static svm_print_interface getSvm_print_null() {
+      return svm_print_null;
+   }
+
+   public static void setSvm_print_null(svm_print_interface svm_print_null) {
+      BinarySVC.svm_print_null = svm_print_null;
+   }
+
+   public int getCross_validation() {
+      return cross_validation;
+   }
+
+   public void setCross_validation(int cross_validation) {
+      this.cross_validation = cross_validation;
+   }
+
+   public svm_model getModel() {
+      return model;
+   }
+
+   public void setModel(svm_model model) {
+      this.model = model;
+   }
+
+   public boolean isQuiet() {
+      return quiet;
+   }
+
+   public void setQuiet(boolean quiet) {
+      this.quiet = quiet;
+   }
+
+   public SVMType getSVMType(){
+      if(parameters.svm_type == svm_parameter.C_SVC){
+         return SVMType.C;
+      }else{
+         return SVMType.nu;
+      }
+   }
+
+   public void setSVMType(SVMType type){
+      switch (type){
+
+         case C:
+            parameters.svm_type = svm_parameter.C_SVC;
+            break;
+         case nu:
+            parameters.svm_type = svm_parameter.NU_SVC;
+            break;
+      }
+   }
+
+   private void init(){
+      svm_print_interface print_func = null;	// default printing to stdout
+
+      parameters = new svm_parameter();
+      // default values
+      parameters.svm_type = svm_parameter.C_SVC;
+      parameters.kernel_type = svm_parameter.RBF;
+      parameters.degree = 3;
+      parameters.gamma = 0;	// 1/num_features
+      parameters.coef0 = 0;
+      parameters.nu = 0.5;
+      parameters.cache_size = 100;
+      parameters.C = 1;
+      parameters.eps = 1e-3;
+      parameters.p = 0.1;
+      parameters.shrinking = 1;
+      parameters.probability = 0;
+      parameters.nr_weight = 0;
+      parameters.weight_label = new int[0];
+      parameters.weight = new double[0];
+      cross_validation = 0;
+
+      svm_set_print_string_function(null);
+      quiet = false;
+   }
+
+   public svm_parameter getParameters(){
+      return parameters;
+   }
+
+   public void setParameters(svm_parameter parameters) {
+      this.parameters = parameters;
+   }
+
+   private void info(String info){
+
+   }
+
+   @Override
+   public double transform(DataRow row) {
+      double[] x0 = row.toArray();
+      int n = x0.length;
+
+      SupportVectorMachineNode[] x = new SupportVectorMachineNode[n];
+      for(int j=0; j < n; j++)
+      {
+         x[j] = new SupportVectorMachineNode();
+         x[j].index = j+1;
+         x[j].value = x0[j];
+      }
+
+      return svm_predict(model, x);
+   }
+
+   public boolean isInClass(DataRow row) {
+      double p = transform(row);
+      return p > 0;
+   }
+
+   @Override
+   public void fit(DataFrame dataFrame) {
+
+      if(this.quiet){
+         svm_set_print_string_function(svm_print_null);
+      }else{
+         svm_set_print_string_function(null);
+      }
+
+      Vector<Double> vy = new Vector<Double>();
+      Vector<SupportVectorMachineNode[]> vx = new Vector<SupportVectorMachineNode[]>();
+      int max_index = 0;
+
+      int m = dataFrame.rowCount();
+
+
+      for(int i=0; i < m; ++i)
+      {
+         DataRow row = dataFrame.row(i);
+
+         double[] x0 = row.toArray();
+         int n = x0.length;
+
+         vy.add(row.target() > 0.5 ? 1.0 : -1.0);
+
+         SupportVectorMachineNode[] x = new SupportVectorMachineNode[n];
+         for(int j=0; j < n; j++)
+         {
+            x[j] = new SupportVectorMachineNode();
+            x[j].index = j+1;
+            x[j].value = x0[j];
+         }
+
+         if(n>0) max_index = Math.max(max_index, x[n-1].index);
+
+         vx.addElement(x);
+      }
+
+      svm_problem prob = new svm_problem();
+      prob.l = m;
+      prob.x = new SupportVectorMachineNode[m][];
+      for(int i=0;i<m;i++)
+         prob.x[i] = vx.elementAt(i);
+      prob.y = new double[m];
+      for(int i=0;i<m;i++)
+         prob.y[i] = vy.elementAt(i);
+
+      if(parameters.gamma == 0 && max_index > 0)
+         parameters.gamma = 1.0/max_index;
+
+
+      model = svm_train(prob, parameters);
+   }
+
+   public enum SVMType{
+      C,
+      nu
+   }
+}

+ 236 - 0
gtbook/src/main/java/opennlp/tools/svm/svmext/classifiers/OneVsOneSVC.java

@@ -0,0 +1,236 @@
+package opennlp.tools.svm.svmext.classifiers;
+
+//import com.github.svm.svmext.Learner;
+//import com.github.svm.data.frame.BasicDataFrame;
+//import com.github.svm.data.frame.DataFrame;
+//import com.github.svm.data.frame.DataRow;
+//import com.github.svm.svmext.regression.SVR;
+//import com.github.svm.data.utils.TupleTwo;
+import opennlp.tools.svm.data.frame.BasicDataFrame;
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+import opennlp.tools.svm.data.utils.TupleTwo;
+import opennlp.tools.svm.svmext.Learner;
+import opennlp.tools.svm.svmext.regression.SVR;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+
+/**
+ * Created by xschen on 8/20/2015 0020.
+ */
+public class OneVsOneSVC implements Learner {
+   protected List<TupleTwo<SVR, SVR>> classifiers;
+   private double alpha = 0.1;
+   private boolean shuffleData = false;
+   private List<String> classLabels = new ArrayList<>();
+
+   private static String BINARY_LABEL = "success";
+
+   public OneVsOneSVC(List<String> classLabels){
+      this.classLabels.addAll(classLabels);
+      classifiers = new ArrayList<>();
+   }
+
+   public OneVsOneSVC(){
+      super();
+      classifiers = new ArrayList<>();
+   }
+
+   public boolean isShuffleData() {
+      return shuffleData;
+   }
+
+   public void setShuffleData(boolean shuffleData) {
+      this.shuffleData = shuffleData;
+   }
+
+   public double getAlpha() {
+      return alpha;
+   }
+
+   public void setAlpha(double alpha) {
+      this.alpha = alpha;
+   }
+
+   protected void createClassifiers(DataFrame dataFrame){
+      classifiers = new ArrayList<>();
+
+      if(classLabels.size()==0){
+         classLabels.addAll(dataFrame.stream().map(DataRow::categoricalTarget).distinct().collect(Collectors.toList()));
+      }
+      for(int i=0; i < classLabels.size()-1; ++i){
+         for(int j=i+1; j < classLabels.size(); ++j) {
+            SVR svr1 = createClassifier(classLabels.get(i));
+            SVR svr2 = createClassifier(classLabels.get(j));
+            classifiers.add(new TupleTwo<>(svr1, svr2));
+         }
+      }
+   }
+
+
+
+   protected SVR createClassifier(String classLabel) {
+      SVR svr = new SVR();
+      svr.setName(classLabel);
+      return svr;
+   }
+
+   protected double getClassifierScore(DataRow tuple, SVR classifier) {
+      return classifier.transform(tuple);
+   }
+
+   protected List<DataFrame> split(DataFrame dataFrame, int n){
+      List<DataFrame> miniFrames = new ArrayList<>();
+
+      for(int i=0; i < n; ++i){
+         miniFrames.add(new BasicDataFrame());
+      }
+
+      int index = 0;
+      for(DataRow tuple : dataFrame) {
+         int batchIndex = index % n;
+         miniFrames.get(batchIndex).addRow(tuple);
+         index++;
+      }
+
+      return miniFrames;
+   }
+
+   protected List<DataFrame> remerge(List<DataFrame> batches, int k){
+      List<DataFrame> newBatches = new ArrayList<>();
+
+
+      for(int i=0; i < batches.size(); ++i){
+
+         DataFrame newBatch = new BasicDataFrame();
+
+         for(int j=0; j < k; ++j){
+            int d = (i + j) % batches.size();
+            DataFrame batch = batches.get(d);
+            for(DataRow tuple : batch){
+               newBatch.addRow(tuple.makeCopy());
+            }
+         }
+
+         newBatches.add(newBatch);
+      }
+      return newBatches;
+   }
+
+
+   @Override public double transform(DataRow row) {
+      String label = classify(row);
+      return classLabels.indexOf(label);
+   }
+
+
+   @Override
+   public void fit(DataFrame dataFrame) {
+
+      createClassifiers(dataFrame);
+
+      if(shuffleData) {
+         dataFrame.shuffle();
+      }
+
+      List<DataFrame> batches = split(dataFrame, classifiers.size());
+
+      int k= Math.max(1, (int)alpha * batches.size());
+      batches = remerge(batches, k);
+
+
+      for(int i=0; i < classifiers.size(); ++i){
+         TupleTwo<SVR, SVR> pair = classifiers.get(i);
+         SVR classifier1 = pair._1();
+         SVR classifier2 = pair._2();
+
+         classifier1.fit(createBinaryBatch(batches.get(i), classifier1.getName()));
+         classifier2.fit(createBinaryBatch(batches.get(i), classifier2.getName()));
+      }
+
+   }
+
+   private DataFrame createBinaryBatch(DataFrame dataFrame, String classLabel){
+      DataFrame binaryBatch = new BasicDataFrame();
+      for(DataRow row  : dataFrame){
+         String label = row.categoricalTarget();
+         DataRow rowWithBinaryTargetOutput = row.makeCopy();
+         rowWithBinaryTargetOutput.setTargetCell(BINARY_LABEL, label.equals(classLabel) ? 1.0 : 0.0);
+         binaryBatch.addRow(rowWithBinaryTargetOutput);
+      }
+      return binaryBatch;
+   }
+
+
+   public String classify(DataRow row) {
+
+      row = row.makeCopy();
+      if(row.getTargetColumnNames().isEmpty()) {
+        row.setTargetColumnNames(Collections.singletonList(BINARY_LABEL));
+      }
+
+      Map<String, Integer> scores = score(row);
+
+      String predicatedClassLabel = null;
+      int maxScore = 0;
+      for(Map.Entry<String, Integer> entry : scores.entrySet()){
+         String label = entry.getKey();
+         int score = entry.getValue();
+         if(score > maxScore){
+            maxScore= score;
+            predicatedClassLabel = label;
+         }
+      }
+
+      if(predicatedClassLabel == null) {
+         predicatedClassLabel = "NA";
+      }
+
+      return predicatedClassLabel;
+   }
+
+
+   public void reset() {
+      classifiers.clear();
+      classLabels.clear();
+   }
+
+
+   public List<String> getClassLabels() {
+      return classLabels;
+   }
+
+
+   public Map<String, Integer> score(DataRow row) {
+
+      Map<String, Integer> scores = new HashMap<>();
+
+      for(int i=0; i < classifiers.size(); ++i){
+         TupleTwo<SVR, SVR> pair = classifiers.get(i);
+         SVR classifier1 = pair._1();
+         SVR classifier2 = pair._2();
+
+         double score1 = getClassifierScore(row, classifier1);
+         double score2 = getClassifierScore(row, classifier2);
+
+         if(score1 == score2) continue;
+
+         String winningLabel;
+         if(score1 > score2) {
+            winningLabel = classifier1.getName();
+         }
+         else {
+            winningLabel = classifier2.getName();
+         }
+         if(scores.containsKey(winningLabel)){
+            scores.put(winningLabel, scores.get(winningLabel) + 1);
+         }else {
+            scores.put(winningLabel, 1);
+         }
+      }
+
+      return scores;
+   }
+}

+ 174 - 0
gtbook/src/main/java/opennlp/tools/svm/svmext/oneclass/OneClassSVM.java

@@ -0,0 +1,174 @@
+package opennlp.tools.svm.svmext.oneclass;
+
+
+//import com.github.svm.svmext.Learner;
+//import com.github.svm.data.frame.DataFrame;
+//import com.github.svm.data.frame.DataRow;
+//import com.github.svm.libsvm.*;
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+import opennlp.tools.svm.libsvm.*;
+import opennlp.tools.svm.svmext.Learner;
+
+import java.util.Vector;
+import java.util.function.Supplier;
+
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_predict;
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_set_print_string_function;
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_train;
+
+
+/**
+ * Created by xschen on 2/5/2017.
+ */
+public class OneClassSVM implements Learner {
+
+   private static svm_print_interface svm_print_null = new svm_print_interface()
+   {
+      public void print(String s) {}
+   };
+   private svm_parameter param;
+   private int cross_validation;
+   private svm_model model;
+   private boolean quiet;
+   public Supplier<Double> thresholdSupplier;
+
+   public void copy(OneClassSVM that){
+      param = that.param == null ? null : that.param.makeCopy();
+      cross_validation = that.cross_validation;
+      quiet = that.quiet;
+      model = that.model == null ? null : that.model.makeCopy();
+      if(model != null) model.param = param;
+   }
+
+   private double threshold(){
+      if(thresholdSupplier == null){
+         return 0;
+      }else{
+         return thresholdSupplier.get();
+      }
+   }
+
+   public OneClassSVM makeCopy(){
+      OneClassSVM clone = new OneClassSVM();
+      clone.copy(this);
+      return clone;
+   }
+
+   public OneClassSVM(){
+      svm_print_interface print_func = null;	// default printing to stdout
+
+      param = new svm_parameter();
+      // default values
+      param.svm_type = svm_parameter.ONE_CLASS;
+      param.kernel_type = svm_parameter.RBF;
+      param.degree = 3;
+      param.gamma = 0;	// 1/num_features
+      param.coef0 = 0;
+      param.nu = 0.5;
+      param.cache_size = 100;
+      param.C = 1;
+      param.eps = 1e-3;
+      param.p = 0.1;
+      param.shrinking = 1;
+      param.probability = 0;
+      param.nr_weight = 0;
+      param.weight_label = new int[0];
+      param.weight = new double[0];
+      cross_validation = 0;
+
+      svm_set_print_string_function(svm_print_null);
+      quiet = true;
+   }
+
+   public void set_nu(double nu) {
+      param.nu = nu;
+   }
+
+   public void set_gamma(double gamma){
+      param.gamma = gamma;
+   }
+
+   public boolean isQuiet() {
+      return quiet;
+   }
+
+   public void setQuiet(boolean quiet) {
+      this.quiet = quiet;
+   }
+
+   public svm_parameter getParameters(){
+      return param;
+   }
+
+   @Override
+   public double transform(DataRow row) {
+      double[] x0 = row.toArray();
+      int n = x0.length;
+
+      SupportVectorMachineNode[] x = new SupportVectorMachineNode[n];
+      for(int j=0; j < n; j++)
+      {
+         x[j] = new SupportVectorMachineNode();
+         x[j].index = j+1;
+         x[j].value = x0[j];
+      }
+
+      double v = svm_predict(model,x);
+      return v;
+   }
+
+   public boolean isAnomaly(DataRow tuple) {
+      double p = transform(tuple);
+      return p < threshold();
+   }
+
+   @Override
+   public void fit(DataFrame dataFrame) {
+
+      if(this.quiet){
+         svm_set_print_string_function(svm_print_null);
+      }else{
+         svm_set_print_string_function(null);
+      }
+
+      Vector<SupportVectorMachineNode[]> vx = new Vector<>();
+      int max_index = 0;
+
+      int m = dataFrame.rowCount();
+      for(int i=0; i < m; ++i)
+      {
+         DataRow tuple = dataFrame.row(i);
+
+         double[] x0 = tuple.toArray();
+         int n = x0.length;
+
+         SupportVectorMachineNode[] x = new SupportVectorMachineNode[n];
+         for(int j=0; j < n; j++)
+         {
+            x[j] = new SupportVectorMachineNode();
+            x[j].index = j+1;
+            x[j].value = x0[j];
+         }
+
+         if(n>0) max_index = Math.max(max_index, x[n-1].index);
+
+         vx.addElement(x);
+      }
+
+      svm_problem prob = new svm_problem();
+      prob.l = m;
+      prob.x = new SupportVectorMachineNode[m][];
+      for(int i=0;i<prob.l;i++)
+         prob.x[i] = vx.elementAt(i);
+      prob.y = new double[m];
+      for(int i=0;i<prob.l;i++)
+         prob.y[i] = 0;
+
+      if(param.gamma == 0 && max_index > 0)
+         param.gamma = 1.0/max_index;
+
+
+      model = svm_train(prob, param);
+   }
+}

+ 205 - 0
gtbook/src/main/java/opennlp/tools/svm/svmext/regression/SVR.java

@@ -0,0 +1,205 @@
+package opennlp.tools.svm.svmext.regression;
+
+
+//import com.github.svm.libsvm.*;
+//import com.github.svm.svmext.Learner;
+//import com.github.svm.data.frame.DataFrame;
+//import com.github.svm.data.frame.DataRow;
+//import com.github.svm.data.utils.Scaler;
+import opennlp.tools.svm.data.frame.DataFrame;
+import opennlp.tools.svm.data.frame.DataRow;
+import opennlp.tools.svm.data.utils.Scaler;
+import opennlp.tools.svm.libsvm.*;
+import opennlp.tools.svm.svmext.Learner;
+
+import java.util.Vector;
+
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_predict;
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_set_print_string_function;
+import static opennlp.tools.svm.libsvm.SupportVectorMachine.svm_train;
+
+
+/**
+ * Created by xschen on 5/5/2017.
+ */
+public class SVR implements Learner {
+   private static svm_print_interface svm_print_null = new svm_print_interface()
+   {
+      public void print(String s) {}
+   };
+   private svm_parameter param;
+   private int cross_validation;
+   private svm_model model;
+   private boolean quiet;
+
+   private final Scaler scaler = new Scaler();
+   private String name;
+
+
+   public void copy(SVR that){
+
+      param = that.param == null ? null : that.param.makeCopy();
+      cross_validation = that.cross_validation;
+      model = that.model == null ? null : that.model.makeCopy();
+      if(model != null) model.param = param;
+      quiet = that.quiet;
+      scaler.copy(that.scaler);
+   }
+
+   public SVR makeCopy(){
+      SVR clone = new SVR();
+      clone.copy(this);
+
+      return clone;
+   }
+
+   public SVR(){
+      svm_print_interface print_func = null;	// default printing to stdout
+
+      param = new svm_parameter();
+      // default values
+      param.svm_type = svm_parameter.NU_SVR;
+      param.kernel_type = svm_parameter.RBF;
+      param.degree = 3;
+      param.gamma = 0;	// 1/num_features
+      param.coef0 = 0;
+      param.nu = 0.5;
+      param.cache_size = 100;
+      param.C = 1;
+      param.eps = 1e-3;
+      param.p = 0.1;
+      param.shrinking = 1;
+      param.probability = 0;
+      param.nr_weight = 0;
+      param.weight_label = new int[0];
+      param.weight = new double[0];
+      cross_validation = 0;
+
+      svm_set_print_string_function(svm_print_null);
+      this.quiet = true;
+   }
+
+   public SVMType getSVMType(){
+      if(param.svm_type == svm_parameter.EPSILON_SVR){
+         return SVMType.epsilon;
+      }else{
+         return SVMType.nu;
+      }
+   }
+
+   public void setSVMType(SVMType type){
+      switch (type){
+         case nu:
+            param.svm_type = svm_parameter.NU_SVR;
+            break;
+         case epsilon:
+            param.svm_type = svm_parameter.EPSILON_SVR;
+      }
+   }
+
+   public boolean isQuiet() {
+      return quiet;
+   }
+
+   public void setQuiet(boolean quiet) {
+      this.quiet = quiet;
+   }
+
+   public svm_parameter getParameters(){
+      return param;
+   }
+
+   @Override
+   public double transform(DataRow row) {
+
+      row = scaler.transform(row);
+
+      double[] x0 = row.toArray();
+      int n = x0.length;
+
+      SupportVectorMachineNode[] x = new SupportVectorMachineNode[n];
+      for(int j=0; j < n; j++)
+      {
+         x[j] = new SupportVectorMachineNode();
+         x[j].index = j+1;
+         x[j].value = x0[j];
+      }
+
+      double v = svm_predict(model, x);
+      return scaler.inverseTransform(row.targetColumnName(), v);
+   }
+
+   @Override
+   public void fit(DataFrame frame) {
+
+      if(this.quiet){
+         svm_set_print_string_function(svm_print_null);
+      }else{
+         svm_set_print_string_function(null);
+      }
+
+
+
+      Vector<Double> vy = new Vector<Double>();
+      Vector<SupportVectorMachineNode[]> vx = new Vector<>();
+      int max_index = 0;
+
+      scaler.fit(frame);
+
+      int m = frame.rowCount();
+      for(int i=0; i < m; ++i)
+      {
+         DataRow row = frame.row(i);
+
+         row = scaler.transform(row);
+
+         double[] x0 = row.toArray();
+         int n = x0.length;
+
+
+         vy.add(row.target());
+         SupportVectorMachineNode[] x = new SupportVectorMachineNode[n];
+         for(int j=0; j < n; j++)
+         {
+            x[j] = new SupportVectorMachineNode();
+            x[j].index = j+1;
+            x[j].value = x0[j];
+         }
+
+         if(n>0) max_index = Math.max(max_index, x[n-1].index);
+
+         vx.addElement(x);
+      }
+
+      svm_problem prob = new svm_problem();
+      prob.l = m;
+      prob.x = new SupportVectorMachineNode[prob.l][];
+      for(int i=0;i<prob.l;i++)
+         prob.x[i] = vx.elementAt(i);
+      prob.y = new double[prob.l];
+      for(int i=0;i<prob.l;i++)
+         prob.y[i] = vy.elementAt(i);
+
+      if(param.gamma == 0 && max_index > 0)
+         param.gamma = 1.0/max_index;
+
+
+      model = svm_train(prob, param);
+   }
+
+
+   public void setName(String name) {
+      this.name = name;
+   }
+
+
+   public String getName() {
+      return name;
+   }
+
+
+   public enum SVMType{
+      nu,
+      epsilon
+   }
+}

+ 98 - 0
gtbook/src/main/java/org/cnnlp/data/Main.java

@@ -0,0 +1,98 @@
+package org.cnnlp.data;
+
+public class Main {
+
+    public static int argPos(String opt, String[] args) {
+        int a = -1;
+        for (int i = 0; i < args.length; i++) {
+            if (opt.equalsIgnoreCase(args[i])) {
+                a = i;
+                break;
+            }
+        }
+        return a;
+    }
+
+    public static int command(String action, String source, String sourceType, String target) {
+        if (action.equalsIgnoreCase("help")) {
+
+        } else if (action.equalsIgnoreCase("tojson")) {
+            // SimpleMdSplitter splitter = new SimpleMdSplitter();
+            // FaqMdSplitter splitter = new FaqMdSplitter();
+        }
+        return 0;
+    }
+
+
+    public static void main(String[] args) {
+        //	 W2CTools wt = new W2CTools();
+        //
+        ////	 wt.load("");
+        //	try {
+        //		wt.loadBin("");
+        //	} catch (IOException e) {
+        //		// TODO Auto-generated catch block
+        //		e.printStackTrace();
+        //	}
+        //
+        ////	 wt.txtToBin();
+
+        //W2CTools tools = new W2CTools();
+
+        String action = null;
+        String mdFile = null;
+        String mdType = null;
+        String target = null;
+
+        if (args.length < 1) {
+            System.out.println();
+
+            System.out.println("Markdowns tools 2.0.1");
+            System.out.println("Copyright (c) 2020-2025 Guangzhou Jitian IT Co.,Ltd.");
+            System.out.println("Options:");
+            System.out.println("Parameters for tools:");
+
+            System.out.println("\t-tojson ");
+            System.out.println("\t\tconvert markdownfile to json, default");
+
+            System.out.println("\t-source <file>");
+            System.out.println("\t\tsource file or path ");
+
+            System.out.println("\t-sourcetype <s>");
+            System.out.println("\t\tmarkdown type of file , simple or faq, default=simple ");
+
+            System.out.println("\t-target <file>");
+            System.out.println("\t\ttarget file or path ");
+
+            System.out.println();
+            System.out.println("Examples:");
+            System.out.println("mdtool -tojson -source a.md  -sourcetype simple -target a.json");
+            System.out.println();
+        } else {
+            int idx = -1;
+            if ((idx = argPos("-tojson", args)) >= 0) {
+                action = "tojson";
+            }
+
+            if ((idx = argPos("-source", args)) >= 0) {
+                mdFile = args[idx + 1];
+            }
+
+            if ((idx = argPos("-sourcetype", args)) >= 0) {
+                mdType = args[idx + 1];
+            }
+
+            if ((idx = argPos("-target", args)) >= 0) {
+                target = args[idx + 1];
+            }
+
+            if (action == null) {
+                action = "tojson";
+            }
+            if (mdFile != null) {
+                command(action, mdFile, mdType, target);
+            }
+        }
+
+    }
+}

+ 44 - 0
gtbook/src/main/java/org/cnnlp/data/ReadMe.java

@@ -0,0 +1,44 @@
+package org.cnnlp.data;
+
+
+////////
+/*
+ * org.commonmark.renderer.text.CoreTextContentNodeRenderer 的改造
+ *     private void writeEndOfLineIfNeeded(Node node, Character c) {
+        if (context.stripNewlines()) {
+            if (c != null) {
+                textContent.write(c);
+            }
+            
+            ///QQQ 2020.2.5 注释掉,英文文档词之间需要空格,而中文不需要
+//            if (node.getNext() != null) {
+//                textContent.whitespace();
+//            }
+        } else {
+            if (node.getNext() != null) {
+                textContent.line();
+            }
+        }
+    }    
+ */
+
+/*
+ * JODConverter, the Java OpenDocument Converter, converts documents between different office formats. It leverages Apache OpenOffice or LibreOffice, which provide arguably the best free import/export filters for OpenDocument and Microsoft Office formats available today.
+
+JODConverter automates all conversions supported by OpenOffice/LibreOffice. Supported conversions include:
+
+| Document Type | Input Format                    | Output Format                                 |
+| ------------- | ------------------------------- | --------------------------------------------- |
+| Text          | DOC, DOCX, ODT, OTT, RTF, TEXT  | DOC, DOCX, HTML, ODT, OTT, PDF, PNG, RTF, TXT |
+| Spreadsheet   | CSV, ODS, OTS, TSV, XLS, XLSX   | CSV, HTML, ODS, OTS, PDF, PNG, TSV, XLS, XLSX |
+| Presentation  | ODP, OTP, PPT, PPTX             | HTML, ODP, OTP, PDF, PNG, PPT, PPTX, SWF      |
+| Drawing       | ODG, OTG                        | ODG, OTG, PDF, PNG, SWF                       |
+| Other         | HTML                            | DOC, DOCX, HTML, ODT, OTT, PDF, PNG, RTF, TXT |
+
+ */
+
+
+public class ReadMe {
+
+	
+}

+ 1627 - 0
gtbook/src/main/java/org/cnnlp/data/book/GTBook.java

@@ -0,0 +1,1627 @@
+package org.cnnlp.data.book;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.Externalizable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInput;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutput;
+import java.io.ObjectOutputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.util.*;
+import java.util.Map.Entry;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import org.cnnlp.data.md.GTDoc;
+import org.cnnlp.data.util.ISenSim;
+import org.cnnlp.data.util.ObjectDoublePair;
+import org.cnnlp.data.util.SenSimFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.ConcurrentHashMap;
+
+import com.github.cliftonlabs.json_simple.JsonArray;
+import com.github.cliftonlabs.json_simple.JsonObject;
+
+import gnu.trove.TIntArrayList;
+
+public class GTBook implements IBook, Externalizable {
+    private static final long serialVersionUID = 1L;
+    private static Logger log = LoggerFactory.getLogger(GTBook.class);
+    public static final String MAGIC = "GTBOOK";
+
+    // 总字数
+    public static final String TOTAL = "total";
+    public static final String PREVIOUS_TOTAL = "previous_total";
+    public static final String CURRENT_TOTAL = "current_total";
+
+    public static final String SOURCE_TYPE = "source_type";
+    public static final String SOURCE_PRODUCER = "source_producer";
+
+    public static final String SOURCE_HTML_HEAD = "html_head";
+    public static final String SOURCE_HTML_CSS = "html_css";
+    public static final String SOURCE_HTML_TEMPLATE = "html_template";
+
+    String version = "1.0";
+    String title = "";
+    String author;
+
+    String cid = null;// content 的 url id
+
+    String id = "000";
+
+    /*
+     * Date dNow = new Date( );
+      SimpleDateFormat ft = new SimpleDateFormat ("yyyy-MM-dd hh:mm:ss");
+      System.out.println("当前时间为: " + ft.format(dNow));
+     */
+    Date createDate;
+
+    GTNode root;
+
+    HashMap<String, IElement> meta;
+
+    transient ConcurrentHashMap<String, int[]> words;
+    //transient List<INode> nodes;
+
+    public GTBook() {
+        this(UUID.randomUUID().toString());
+    }
+
+
+    public GTBook(String id) {
+        this.id = id;
+        cid = "/book/" + id + "/content";
+        root = GTNode.buildRoot("");
+        createDate = new Date();
+        words = new ConcurrentHashMap<>();
+    }
+
+    public String getId() {
+        return id;
+    }
+
+    public void setId(String id) {
+        this.id = id;
+        cid = "/book/" + id + "/content";
+    }
+
+    public String getTitle() {
+        return title;
+    }
+
+    public void setTitle(String title) {
+        this.title = title;
+    }
+
+    public String getUuid() {
+        return cid;
+    }
+
+    public void setUuid(String uuid) {
+        this.cid = uuid;
+    }
+
+    public String getAuthor() {
+        return author;
+    }
+
+    public void setAuthor(String author) {
+        this.author = author;
+    }
+
+    public String getVersion() {
+        return version;
+    }
+
+    public void setVersion(String version) {
+        this.version = version;
+    }
+
+    public Date getCreateDate() {
+        return createDate;
+    }
+
+    public void setCreateDate(Date createDate) {
+        this.createDate = createDate;
+    }
+
+
+    public GTNode getRoot() {
+        return root;
+    }
+
+    public void setRoot(GTNode root) {
+        this.root = root;
+    }
+
+    public void addMeta(String field, String value) {
+        if (meta == null) {
+            meta = new HashMap<>();
+        }
+        IElement ie = meta.get(field);
+        if (ie != null) {
+            MDElement mde = (MDElement) ie;
+            mde.add(value, value);
+        } else {
+            ie = new MDElement(value, value);
+            meta.put(field, ie);
+        }
+    }
+
+    public void putMeta(String field, String value) {
+        if (meta == null) {
+            meta = new HashMap<>();
+        }
+        MDElement ie = new MDElement(value, value);
+        meta.put(field, ie);
+    }
+
+
+    public void addMeta(String field, IElement value) {
+        if (meta == null) {
+            meta = new HashMap<>();
+        }
+        meta.put(field, value);
+    }
+
+    public String getMeta(String field) {
+        List<String> ms = getMetaList(field);
+        if (ms != null && ms.size() > 0) {
+            return ms.get(0);
+        }
+        return null;
+    }
+
+    public List<String> getMetaList(String field) {
+        if (meta != null) {
+            IElement ie = meta.get(field);
+            if (ie != null) {
+                return ie.getText();
+            }
+        }
+        return null;
+    }
+
+    public IElement getMetaElement(String field) {
+        if (meta != null) {
+            IElement ie = meta.get(field);
+            return ie;
+        }
+        return null;
+    }
+
+    public void outInfo() {
+        if (root != null) {
+            System.out.println(root.toString());
+        }
+    }
+
+    public INode getNode(String path) {
+        String[] ss = path.split("-");
+        INode nd = root;
+        for (int i = 0; i < ss.length; i++) {
+            if (nd != null) {
+                nd = getNode(nd, Integer.valueOf(ss[i]));
+            } else {
+                break;
+            }
+        }
+        return nd;
+    }
+
+    public INode getNode(TIntArrayList path) {
+        INode nd = root;
+        for (int i = 0; i < path.size(); i++) {
+            if (nd != null) {
+                nd = getNode(nd, path.getQuick(i));
+            } else {
+                break;
+            }
+        }
+        return nd;
+    }
+
+    //根据绝对路径
+    public INode getAbsoluteNode(TIntArrayList path) {
+        INode nd = root;
+        for (int i = 0; i < path.size(); i++) {
+            if (nd != null) {
+                nd = getAbsoluteNode(nd, path.getQuick(i));
+            } else {
+                break;
+            }
+        }
+        return nd;
+    }
+
+    public INode getAbsoluteNode(INode node, int index) {
+        if (node != null) {
+            List<INode> ls = node.getChildren();
+            if (ls != null && index < ls.size()) {
+                return ls.get(index);
+            }
+        }
+        return null;
+    }
+
+
+    public TIntArrayList toIntPath(String path) {
+        TIntArrayList ti = new TIntArrayList();
+        if (path != null && path.length() > 0) {
+            String[] ss = path.split("-");
+            for (int i = 0; i < ss.length; i++) {
+                ti.add(Integer.valueOf(ss[i]));
+            }
+        }
+        return ti;
+    }
+
+    // 2025.2.19
+    public INode getNodeById(int id) {
+        INode nd1 = getNodeById(root, id);
+        return nd1;
+    }
+
+    public INode getFatherNodeById(int id) {
+        if (id == 0) return null;
+        INode nd1 = getFatherNodeById(root, id);
+        return nd1;
+    }
+
+    public int getMaxId(){
+        return getMaxId(root);
+    }
+
+    private int getMaxId(INode nd) {
+        List<INode> ls = nd.getChildren();
+        if (ls != null) {
+            INode nd1 = ls.get(ls.size()-1);
+            return getMaxId(nd1);
+        }
+        return nd.getId();
+    }
+
+    public INode getNode(INode node, int index) {
+        if (node != null) {
+            List<INode> ls = node.getChildren();
+            if (ls != null && index < ls.size()) {
+                int ii = -1;
+                for (int i = 0; i < ls.size(); i++) {
+                    if (!ls.get(i).isLeaf()) {
+                        ii++;
+                    }
+                    if (ii == index) {
+                        return ls.get(i);
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
+
+    // 1-2-3-
+    public String getText(String path) {
+        INode nd = null;
+        if (path == null) {
+            nd = root;
+        } else {
+            nd = getNode(path);
+        }
+        if (nd != null) {
+            return GTBookUtil.listToString(nd.getText(), "");
+        }
+        return null;
+    }
+
+    public String getHtml(String path) {
+        INode nd = null;
+        if (path == null) {
+            nd = root;
+        } else {
+            nd = getNode(path);
+        }
+        if (nd != null) {
+            ///QQQ
+            return GTBookUtil.listToString(nd.getHtml(), "");
+        }
+        return null;
+    }
+
+    public List<String> getMd(String path) {
+        INode nd = null;
+        if (path == null) {
+            nd = root;
+        } else {
+            nd = getNode(path);
+        }
+        if (nd != null) {
+            ///QQQ
+            List<String> ls = nd.getHtml();
+            //return GTBookUtil.listToString(nd.getHtml(),"\n");
+            return ls;
+        }
+        return null;
+    }
+
+    // 这里的输出和 上面的List<String> getMd(String path)不一样
+    public Map<String, Object> getMds(String path, String pgid) {
+        if (path == null || path.length() == 0) {
+            if (pgid != null) {
+                if (pgid.startsWith(GTDoc.PREFIX_PARAGRAPHID)) {
+                    String pgid1 = pgid.substring(GTDoc.PREFIX_PARAGRAPHID.length());
+                    int pid1 = Integer.valueOf(pgid1);
+                    INode nd = getNodeById(root, pid1);
+
+                    if (nd != null) {
+                        List<List<String>> ls = nd.getHtmls();
+
+                        int[] count = words.get(pgid);
+                        if (count == null) {
+                            count = new int[2];
+
+                            if (nd != null) {
+                                getProgress(root, nd, count);
+                                count[1] = nd.getCharSize();
+                            }
+                            words.put(pgid, count);
+                        }
+
+                        Map m = new HashMap<String, Object>();
+                        m.put(PREVIOUS_TOTAL, count[0]);
+                        m.put(CURRENT_TOTAL, count[1]);
+
+                        int[] total = getCachedTotal();
+                        m.put(TOTAL, total[1]);
+
+                        m.put("content_ls", ls);
+                        return m;
+
+                    }
+                }
+            }
+
+            List<List<String>> ls = root.getHtmls();
+            Map m = new HashMap<String, Object>();
+            int[] total = getCachedTotal();
+
+            m.put(PREVIOUS_TOTAL, 0);
+            m.put(CURRENT_TOTAL, total[1]);
+            m.put(TOTAL, total[1]);
+
+            m.put("content_ls", ls);
+            return m;
+        } else {
+            Map<String, Object> m = getProgressToMap(path);
+            List<List<String>> ls = getMds(path);
+            m.put("content_ls", ls);
+            return m;
+        }
+    }
+
+
+    //	public List<List<String>> getMds(String path,String pgid){
+    //		if (path == null || path.length()==0) {
+    //			if (pgid!= null) {
+    //				if (pgid.startsWith(GTDoc.PREFIX_PARAGRAPHID)) {
+    //					pgid = pgid.substring(GTDoc.PREFIX_PARAGRAPHID.length());
+    //					int pid1 = Integer.valueOf(pgid);
+    //					INode nd = getNodeById(pid1);
+    //					if (nd != null){
+    //						List<List<String>> ls = nd.getHtmls();
+    //						return ls;
+    //					}
+    //				}
+    //			}
+    //			List<List<String>> ls = root.getHtmls();
+    //			return ls;
+    //		}else {
+    //			return getMds(path);
+    //		}
+    //	}
+
+
+    private INode getFatherNodeById(INode nd, int id) {
+//        if (nd == null) return null;
+//        if (nd.getId() == id) {
+//            return nd;
+//        }
+
+        List<INode> ls = nd.getChildren();
+        if (ls != null) {
+            for (int i = 0; i < ls.size(); i++) {
+                INode nd1 = ls.get(i);
+                if (nd1.getId() == id) {
+                    return nd;
+                }
+                //INode nd2 = getNodeById(nd1, id);
+                INode nd2 = getFatherNodeById(nd1, id);
+                if (nd2 != null) {
+                    return nd2;
+                }
+            }
+        }
+        return null;
+    }
+
+
+    private INode getNodeById(INode nd, int id) {
+        if (nd == null) return null;
+        if (nd.getId() == id) {
+            return nd;
+        }
+
+        List<INode> ls = nd.getChildren();
+        if (ls != null) {
+            for (int i = 0; i < ls.size(); i++) {
+                INode nd1 = ls.get(i);
+                INode nd2 = getNodeById(nd1, id);
+                if (nd2 != null) {
+                    return nd2;
+                }
+            }
+        }
+        return null;
+    }
+
+
+
+
+    public List<List<String>> getMds(String path) {
+        //		INode nd = null;
+        //		if (path == null){
+        //			nd = root;
+        //		}else{
+        //			nd = getNode(path);
+        //		}
+
+        INode nd = getNodeByString(path);
+
+        if (nd != null) {
+            ///QQQ
+            List<List<String>> ls = nd.getHtmls();
+            //return GTBookUtil.listToString(nd.getHtml(),"\n");
+            return ls;
+        }
+        return null;
+    }
+
+
+    public void loadObj(File file) {
+        try {
+            ObjectInputStream in = new ObjectInputStream(new BufferedInputStream(new FileInputStream(file)));
+            readExternal(in);
+            in.close();
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        }
+    }
+
+
+    @Override
+    public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+		/*
+		 * 	public static final String MAGIC = "GTBOOK";
+	String version;
+	String title;
+	String autor;
+	String cid=null;// content 的 url id
+	String id = "yls-1";
+	Date createDate;	
+	HashMap<String,IElement> meta;
+	GTNode root;
+		 */
+
+        String m1 = in.readUTF();
+        version = in.readUTF();
+        Object obj = in.readObject();
+        if (obj != null) {
+            title = (String) obj;
+        }
+
+        obj = in.readObject();
+        if (obj != null) {
+            cid = (String) obj;
+        }
+
+        obj = in.readObject();
+        if (obj != null) {
+            id = (String) obj;
+        }
+
+        obj = in.readObject();
+        if (obj != null) {
+            createDate = (Date) obj;
+        }
+
+        int len = in.readInt();
+        if (len >= 0) {
+            HashMap<String, IElement> temp = new HashMap<>();
+            for (int i = 0; i < len; i++) {
+                String key = in.readUTF();
+                IElement value = (IElement) in.readObject();
+                temp.put(key, value);
+            }
+            this.meta = temp;
+        }
+
+        GTNode temp = new GTNode();
+        temp.readExternal(in);
+        this.root = temp;
+    }
+
+    public void saveObj(File file) {
+        try {
+            ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
+            writeExternal(out);
+            out.close();
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        }
+    }
+
+    @Override
+    public void writeExternal(ObjectOutput out) throws IOException {
+        out.writeUTF(MAGIC);
+        out.writeUTF(version);
+        out.writeObject(title);
+        out.writeObject(cid);
+        out.writeObject(id);
+        out.writeObject(createDate);
+        if (meta == null) {
+            out.writeInt(-1);
+        } else {
+            out.writeInt(meta.size());
+            Set<Entry<String, IElement>> entries = meta.entrySet();
+            for (Entry<String, IElement> en : entries) {
+                out.writeUTF(en.getKey());
+                out.writeObject(en.getValue());
+            }
+        }
+        root.writeExternal(out);
+    }
+
+    public List<INode> getAncestor(INode now, TIntArrayList path) {
+        List<INode> ls = new ArrayList<>();
+        //ls.add(now);
+        INode nd = now;
+        for (int i = 0; i < path.size(); i++) {
+            nd = getNode(nd, path.getQuick(i));
+            ls.add(nd);
+        }
+        return ls;
+    }
+
+    public void toGTDoc(File fn, Map<String, String> params) throws IOException {
+
+        String tags = null;
+        if (params != null) {
+            tags = params.get(GTDoc.TAGS);
+        }
+        TIntArrayList path = new TIntArrayList();
+
+        INode[] fathers = new INode[256];
+
+        List<GTDoc> docs = new ArrayList<>();
+
+        final int[] counter = new int[1];
+        counter[0] = 0;
+
+        final String tags1 = tags;
+        IVisitor visitor = new IVisitor() {
+            @Override
+            public void visit(INode now, TIntArrayList path) {
+                if (path.size() >= 1) {
+                    fathers[path.size() - 1] = now;
+                } else {
+                    fathers[0] = now;
+                }
+                if (now.isLeaf()) {
+
+                    //					List<INode> fathers = getAncestor(root, path);
+                    //					if (fathers.size() >=2){
+                    //					    int fid = fathers.size()-2;
+                    //					    INode father = fathers.get(fid);
+                    //					    String title = father.getLabel();
+                    //
+                    //					    if (path.getQuick(path.size()-1)== 0){
+                    //					    	int i = path.size()-3;
+                    //					    	while(i>=0){
+                    //					    		if (path.getQuick(i+1)== 0){
+                    //					    		title = fathers.get(i).getLabel()+"/"+title;
+                    //					    		}else{
+                    //					    			break;
+                    //					    		}
+                    //					    		i--;
+                    //					    	}
+                    //					    }
+                    //					  System.out.println(path.toString()+ title);
+                    //					}
+
+                    counter[0]++;
+                    int titleI = path.size() - 2;
+                    String text = GTBookUtil.listToString(now.getText(), "\n");
+                    GTDoc gdoc = new GTDoc();
+                    gdoc.addField(GTDoc.PARAGRAPHID, GTDoc.PREFIX_PARAGRAPHID + now.getId());
+                    gdoc.addField(GTDoc.CONTENT, text);
+
+                    if (titleI >= 0) {
+                        gdoc.addField(GTDoc.TITLE, fathers[titleI].getLabel());
+                    }
+
+                    if (titleI >= 1) {
+                        gdoc.addField(GTDoc.CHAPTER, fathers[0].getLabel());
+                    }
+                    if (titleI >= 2) {
+                        gdoc.addField(GTDoc.SECTION, fathers[1].getLabel());
+                    }
+                    if (titleI >= 3) {
+                        gdoc.addField(GTDoc.PART, fathers[2].getLabel());
+                    }
+
+                    path.remove(path.size() - 1);
+                    TIntArrayList path2 = getRelativePath(path);
+                    String url = getPath(path2);
+
+                    gdoc.addField(GTDoc.PATH, url);
+                    gdoc.addField(GTDoc.BOOKID, id);
+
+                    if (tags1 != null) {
+                        gdoc.addField(GTDoc.TAGS, tags1);
+                    }
+                    docs.add(gdoc);
+
+                }
+            }
+        };
+        root.traverseDescendants(path, visitor);
+
+        if (counter[0] == 0) {
+            //System.out.println("没有叶子");
+            List<INode> ls = root.getChildren();
+            TIntArrayList path2 = new TIntArrayList();
+            for (int i = 0; i < ls.size(); i++) {
+                INode now = ls.get(i);
+                String text = GTBookUtil.listToString(now.getText(), "\n");
+                GTDoc gdoc = new GTDoc();
+                gdoc.addField(GTDoc.PARAGRAPHID, GTDoc.PREFIX_PARAGRAPHID + now.getId());
+                gdoc.addField(GTDoc.CONTENT, text);
+                gdoc.addField(GTDoc.TITLE, now.getLabel());
+
+                path2.setQuick(0, i);
+                String url = getPath(path2);
+
+                gdoc.addField(GTDoc.PATH, url);
+                gdoc.addField(GTDoc.BOOKID, id);
+
+                if (tags1 != null) {
+                    gdoc.addField(GTDoc.TAGS, tags1);
+                }
+                docs.add(gdoc);
+            }
+        }
+
+        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+
+        try {
+            for (GTDoc gdoc : docs) {
+                List<String> ls = gdoc.toListString();
+                for (String s : ls) {
+                    out.write(s);
+                }
+                out.write("\n");
+            }
+        } finally {
+            if (out != null) {
+                out.close();
+            }
+        }
+
+    }
+
+    public List<String> toStringList() {
+        TIntArrayList path = new TIntArrayList();
+        List<String> ls = new ArrayList<>();
+
+        IVisitor visitor = new IVisitor() {
+            @Override
+            public void visit(INode now, TIntArrayList path) {
+                if (now.isLeaf()) {
+                    String text = GTBookUtil.listToString(now.getText(), "\n");
+                    ls.add(text);
+                } else {
+                    String text = now.getLabel();
+                    ls.add(text);
+                }
+            }
+        };
+        root.traverseDescendants(path, visitor);
+        return ls;
+    }
+
+    public String toTxt() {
+        StringBuilder sb = new StringBuilder();
+        List<String> ls = toStringList();
+        for (String s : ls) {
+            s = s.replace("<br>", "\n");
+            sb.append(s).append("\n");
+        }
+        return sb.toString();
+    }
+
+    public void toTxt(File fn) throws IOException {
+
+        List<String> ls = toStringList();
+
+        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+        try {
+            for (String s : ls) {
+                s = s.replace("<br>", "\n");
+                out.write(s);
+                out.write("\n");
+            }
+        } finally {
+            if (out != null) {
+                out.close();
+            }
+        }
+    }
+
+
+    private String stripLast(String s) {
+        if (s == null || s.length() < 1) return s;
+        if (s.endsWith("\n")) {
+            s = s.substring(0, s.length() - 1);
+        }
+        s = s.replace("<br>", "  ");
+        return s;
+    }
+
+    private String getPathUuid(TIntArrayList path) {
+        StringBuilder sb = new StringBuilder();
+        sb.append(cid).append("/");
+        sb.append(path.getQuick(0));
+        for (int i = 1; i < path.size(); i++) {
+            sb.append("-").append(path.getQuick(i));
+        }
+        return sb.toString();
+    }
+
+    public String getPath(TIntArrayList path) {
+        StringBuilder sb = new StringBuilder();
+        //sb.append(cid).append("/");
+        if (path.size() > 0) {
+            sb.append(path.getQuick(0));
+            for (int i = 1; i < path.size(); i++) {
+                sb.append("-").append(path.getQuick(i));
+            }
+            return sb.toString();
+        } else {
+            // 没有父节点,例如 "前言",有时不带#号
+            return "";
+        }
+    }
+
+    private String getPath(String prefix, TIntArrayList path) {
+        StringBuilder sb = new StringBuilder();
+        if (prefix.length() > 0) {
+            sb.append(prefix);
+            for (int i = 0; i < path.size(); i++) {
+                sb.append("-").append(path.getQuick(i));
+            }
+            return sb.toString();
+        } else {
+            return getPath(path);
+        }
+    }
+
+    // 去除叶子节点的路径
+    public TIntArrayList getRelativePath(TIntArrayList path) {
+        INode nd = root;
+        return getRelativePath(nd, path);
+    }
+
+    private TIntArrayList getRelativePath(INode nd, TIntArrayList path) {
+        TIntArrayList path2 = (TIntArrayList) path.clone();
+        for (int i = 0; i < path.size(); i++) {
+
+            //nd = getNode(nd,Integer.valueOf(ss[i]));
+            List<INode> ls = nd.getChildren();
+            int ii = 0;
+            for (int j = 0; j < path.getQuick(i); j++) {
+                if (ls.get(j).isLeaf()) {
+                    ii++;
+                }
+            }
+            if (ii > 0) {
+                path2.set(i, path2.getQuick(i) - ii);
+            }
+            nd = ls.get(path.getQuick(i));
+        }
+        return path2;
+    }
+
+
+    public String getTocPreview(String path) {
+        INode nd = null;
+        if (path == null || path.length() == 0) {
+            nd = root;
+            path = "";
+        } else {
+            nd = getNode(path);
+        }
+        if (nd != null && nd instanceof GTNode) {
+            return getPreview((GTNode) nd, path);
+        }
+        return null;
+    }
+
+    public String getPreview(GTNode node, String prefix) {
+
+        List<INode> sons = node.getChildren();
+        JsonArray firstNodes = new JsonArray();
+
+        int idx = -1;
+        for (int i = 0; i < sons.size(); i++) {
+            INode now = sons.get(i);
+            if (!now.isLeaf()) {
+                JsonObject obj = new JsonObject();
+                idx++;
+                obj.put("id", idx);
+                obj.put("title", stripLast(now.getLabel()));
+
+                String path1 = prefix + "-" + idx;//getRelativePath(path);
+                obj.put("url", cid + "/" + path1);
+                obj.put("path", path1);
+                firstNodes.add(obj);
+            }
+        }
+        //JsonObject ret = new JsonObject();
+        //ret.put("preview", firstNodes);
+        //ret.put("tille", node.getLabel());
+
+        StringWriter outJson = new StringWriter();
+        try {
+            firstNodes.toJson(outJson);
+        } catch (IOException e) {
+            log.error(e.getMessage());
+        }
+
+        //System.out.println(outJson.toString());
+        return outJson.toString();
+
+    }
+
+    public String getToc(String path, int depth) {
+        return getToc(path, depth, null);
+    }
+
+    public String getToc(String path, int depth, String filter) {
+        INode nd = null;
+        if (path == null || path.length() == 0) {
+            nd = root;
+            path = "";
+        } else {
+            nd = getNode(path);
+        }
+        if (nd != null && nd instanceof GTNode) {
+            return toStructureTree((GTNode) nd, path, depth, filter);
+        }
+        return null;
+    }
+
+    public String toStructureTree(GTNode node, String prefix, int depth, final String filter) {
+        // 生成 markdown的文档结构
+        //List<INode> ls = root.traverseDescendants(path);
+        //System.out.println("tree size="+ls.size());
+
+        //List<INode> ancestor = new ArrayList<INode>();
+
+        JsonObject[] fathers = new JsonObject[1024];
+        JsonArray firstNodes = new JsonArray();
+        final int[] idx = new int[1];
+        //fathers[0] = firstNodes;
+
+        //TIntArrayList prefixPath = toIntPath(prefix);
+
+        TIntArrayList path = new TIntArrayList();
+        //final List<TIntArrayList> lls = new ArrayList<TIntArrayList>();
+
+        IVisitor visitor = new IVisitor() {
+            @Override
+            public void visit(INode now, TIntArrayList path1) {
+                //				if ("{1, 2}".equals(path1.toString())){
+                //					System.out.println("====");
+                //				}
+                //				System.out.println(path1.toString());
+
+                if (path1.size() > depth) return;
+                if (!now.isLeaf() && path1.size() > 0) {
+                    JsonObject obj = new JsonObject();
+                    idx[0]++;
+                    String title = now.getLabel();
+                    title = title.trim();
+                    if (title.length() <= 0) return;
+                    title = stripLast(title);
+
+                    obj.put("id", idx[0]);
+                    obj.put("title", title);
+
+                    TIntArrayList path2 = getRelativePath(node, path1);
+                    //lls.add(path2);
+                    obj.put("path2", path2);
+
+                    String p1 = getPath(prefix, path2);
+                    //obj.put("url",cid+"/"+p1);
+                    obj.put("path", p1);
+                    //obj.put("bookid",id);
+
+                    int fid = path1.size() - 2;
+                    if (fid >= 0) {
+                        fathers[path1.size() - 1] = obj;
+                    } else {
+                        fathers[0] = obj;
+                    }
+
+                    if (filter == null) {
+                        if (fid >= 0) {
+                            JsonObject fo = fathers[fid];
+                            if (fo != null) {
+                                Object ch1 = fo.get("children");
+
+                                JsonArray children = null;
+                                if (ch1 == null) {
+                                    children = new JsonArray();
+                                    children.add(obj);
+                                    fo.put("children", children);
+                                } else {
+                                    children = (JsonArray) ch1;
+                                    children.add(obj);
+                                }
+                            }
+                        } else {
+                            firstNodes.add(obj);
+                        }
+                    } else {
+                        if (title.contains(filter)) {
+                            int i2 = title.indexOf(filter);
+
+                            String title2 = title.substring(0, i2) + "<em>" + filter + "</em>" + title.substring(i2 + filter.length());
+                            obj.put("title", title2);
+                            if (fid >= 0) {
+                                int np = fid;
+                                JsonObject now1 = obj;
+                                JsonArray children = null;
+                                while (np >= 0) {
+                                    JsonObject fo = fathers[np];
+                                    if (fo != null) {
+                                        Object ch1 = fo.get("children");
+                                        if (ch1 == null) {
+                                            children = new JsonArray();
+                                            children.add(now1);
+                                            fo.put("children", children);
+                                        } else {
+                                            children = (JsonArray) ch1;
+                                            if (!children.contains(now1)) {
+                                                children.add(now1);
+                                            }
+                                        }
+
+                                        now1 = fo;
+                                        np--;
+                                    } else {
+                                        break;
+                                    }
+                                }
+
+                                if (now1 != null) {
+                                    if (!firstNodes.contains(now1)) {
+                                        firstNodes.add(now1);
+                                    }
+                                }
+                            } else {
+                                firstNodes.add(obj);
+                            }
+                        }
+                    }
+                }
+            }
+        };
+
+
+        node.traverseDescendants(path, visitor);
+
+        //		JsonObject prop = new JsonObject();
+        //		prop.put("children", "children");
+        //		prop.put("title", "title");
+
+        //		JsonObject ret = new JsonObject();
+        //		ret.put("data", firstNodes);
+        //		ret.put("defaultProps", prop);
+
+        // 带搜索条件
+        //		if (filter != null){
+        //			JsonObject[] objs = firstNodes.toArray(new JsonObject[0]);
+        //
+        //		}
+
+        //System.out.println(firstNodes.size());
+        List<Object> ls = new ArrayList<Object>();
+        transeJson(firstNodes, ls);
+
+        //String p1 = getPath(prefix,path2);
+        //System.out.println(ls.size());
+        //String lastPath = "";
+        int len1 = ls.size();
+        for (int i = 0; i < len1; i++) {
+            //System.out.println(ls.get(i));
+            JsonObject jo = (JsonObject) ls.get(i);
+
+            //Object obj1 = jo.remove("path2");
+            Object obj1 = jo.get("path2");
+            TIntArrayList path1 = (TIntArrayList) obj1;
+            TIntArrayList path2 = (TIntArrayList) path1.clone();
+
+            //System.out.println("=="+path2.toString());
+            int i1 = path2.size() - 1;
+            int i2 = i;
+
+            //if (i>=140) {
+            //   System.out.println(path2);
+            //}
+            TIntArrayList lpath = null;
+            if (path2.getQuick(i1) > 0) {
+                path2.setQuick(i1, path2.getQuick(i1) - 1);
+                lpath = path2;
+
+                //JsonObject jo22 = (JsonObject)ls.get(i-1);
+                //Object obj22 = jo22.get("path2");
+                //if (obj22 != null) {
+                //	TIntArrayList npath2 = (TIntArrayList)obj22;
+                //	lpath = npath2;
+                //}
+            } else {
+                //				int nowLevel = i1;
+                //				if (i1>=1) {
+                //					i1--;
+                //					while(i1>=0) {
+                //						if (path2.getQuick(i1)>0) {
+                //							path2.setQuick(i1, path2.getQuick(i1)-1);
+                //							lpath = new TIntArrayList();
+                //							for (int j = 0; j < i1+1; j++) {
+                //								lpath.add(path2.getQuick(j));
+                //							}
+                //							break;
+                //						}
+                //						i1--;
+                //					}
+                //					if (i1 <0) {
+                //						// null
+                //					}
+                //				}else {
+                //					// null
+                //				}
+
+                int j2 = i - 2;
+
+                while (j2 >= 0) {
+                    JsonObject jo22 = (JsonObject) ls.get(j2);
+                    Object obj22 = jo22.get("path2");
+                    if (obj22 != null) {
+                        TIntArrayList npath2 = (TIntArrayList) obj22;
+                        if (npath2.size() <= path2.size()) {
+                            lpath = npath2;
+                            break;
+                        }
+                    }
+                    j2--;
+                }
+            }
+
+
+            // next
+            TIntArrayList npath = null;
+            if (i2 + 1 < len1) {
+                i2++;
+                JsonObject jo2 = (JsonObject) ls.get(i2);
+                Object obj2 = jo2.get("path2");
+                if (obj2 != null) {
+                    npath = (TIntArrayList) obj2;
+                    if (npath.size() == path2.size()) {
+
+                    } else {
+
+                        if (npath.size() >= path2.size()) {
+
+                            i2++;
+                            TIntArrayList npath1 = null;
+                            while (i2 < len1) {
+                                JsonObject jo22 = (JsonObject) ls.get(i2);
+                                Object obj22 = jo22.get("path2");
+                                if (obj22 != null) {
+                                    TIntArrayList npath2 = (TIntArrayList) obj22;
+                                    if (npath2.size() <= path2.size()) {
+                                        npath1 = npath2;
+                                        break;
+                                    }
+                                }
+                                i2++;
+                            }
+                            if (npath1 != null) {
+                                npath = npath1;
+                            }
+                        }
+
+                        if (npath.size() < path2.size()) {
+                            i2++;
+                            TIntArrayList npath1 = null;
+                            while (i2 < len1) {
+                                JsonObject jo22 = (JsonObject) ls.get(i2);
+                                Object obj22 = jo22.get("path2");
+                                if (obj22 != null) {
+                                    TIntArrayList npath2 = (TIntArrayList) obj22;
+                                    if (npath2.size() == path2.size()) {
+                                        npath1 = npath2;
+                                        break;
+                                    } else if (npath2.size() > npath.size()) {
+                                        npath1 = npath2;
+                                    } else {
+                                        break;
+                                    }
+                                }
+                                i2++;
+                            }
+
+                            //if (npath1.size() == path2.size()) {
+                            //	npath = npath1;
+                            //}
+                            if (npath1 != null) {
+                                npath = npath1;
+                            }
+                        }
+                    }
+                }
+
+                if (npath != null && npath.size() > path2.size()) {
+                    npath = null;
+                }
+            } else {
+                //null
+            }
+
+            if (lpath != null) {
+                String p1 = getPath(prefix, lpath);
+                jo.put("previous", p1);
+            } else {
+                jo.put("previous", "");
+            }
+
+            if (npath != null) {
+                String p1 = getPath(prefix, npath);
+                jo.put("next", p1);
+            } else {
+                jo.put("next", "");
+            }
+        }
+
+        for (int i = 0; i < len1; i++) {
+            //System.out.println(ls.get(i));
+            JsonObject jo = (JsonObject) ls.get(i);
+            jo.remove("path2");
+        }
+
+        StringWriter outJson = new StringWriter();
+        try {
+            firstNodes.toJson(outJson);
+        } catch (IOException e) {
+            log.error(e.getMessage());
+        }
+
+        //System.out.println(outJson.toString());
+        return outJson.toString();
+    }
+
+
+    private void transeJson(Object obj, List<Object> ls) {
+        if (obj == null) {
+            return;
+        }
+        if (obj instanceof JsonArray) {
+            JsonArray ja = (JsonArray) obj;
+            Iterator<Object> it = ja.iterator();
+            while (it.hasNext()) {
+                transeJson(it.next(), ls);
+            }
+        } else if (obj instanceof JsonObject) {
+            ls.add(obj);
+            JsonObject jo = (JsonObject) obj;
+            Object ja = jo.get("children");
+            transeJson(ja, ls);
+        }
+    }
+
+    //System.out.println("ls_ize="+ls.size());
+
+    //	JsonObject[] jos = new JsonObject[depthes.length];
+    //	JsonArray firstNodes = new JsonArray();
+    //	for (int i = 0; i < depthes.length; i++) {
+    //		Node nd = ls.get(i);
+    //		if (depthes[i] >=0){
+    //			JsonObject obj = new JsonObject();
+    //			obj.put("id", i);
+    //			obj.put("title", stripLast(titles[i]));
+    //
+    //			int ii1 = i;
+    //			while(ii1 >=0 && urls[ii1]== null){
+    //				ii1--;
+    //			}
+    //			obj.put("url", urls[ii1]);
+    //
+    //			jos[i] = obj;
+    //			if (fathers[i]>=0){
+    //				JsonObject fo = jos[fathers[i]];
+    //				Object ch1 = fo.get("children");
+    //				JsonArray children = null;
+    //				if (ch1 == null){
+    //					children = new JsonArray();
+    //					children.add(obj);
+    //					fo.put("children", children);
+    //				}else{
+    //					children = (JsonArray)ch1;
+    //					children.add(obj);
+    //				}
+    //			}
+    //
+    //			if (depthes[i] == dt.getMinLevel()){
+    //				firstNodes.add(obj);
+    //			}
+    //		}
+    //	}
+    //
+    //	JsonObject prop = new JsonObject();
+    //	prop.put("children", "children");
+    //	prop.put("title", "title");
+    //
+    //	JsonObject ret = new JsonObject();
+    //	ret.put("data", firstNodes);
+    //	ret.put("defaultProps", prop);
+    //
+    //	 StringWriter outJson = new StringWriter();
+    //	 try {
+    //		 ret.toJson(outJson);
+    //	} catch (IOException e) {
+    //		// TODO Auto-generated catch block
+    //		e.printStackTrace();
+    //	}
+
+
+    public void saveZipObj(File fileName) {
+        try {
+            OutputStream os = new FileOutputStream(fileName);
+            GZIPOutputStream out = new GZIPOutputStream(os);
+            ObjectOutputStream oos = new ObjectOutputStream(out);
+
+            writeExternal(oos);
+            oos.flush();
+            out.close();
+        } catch (IOException e) {
+            log.error(e.getMessage());
+            //e.printStackTrace();
+        }
+    }
+
+    public void loadZipObj(File fileName) {
+        try {
+            InputStream is = new FileInputStream(fileName);
+            GZIPInputStream zip = new GZIPInputStream(is);
+            ObjectInputStream in = new ObjectInputStream(new BufferedInputStream(zip));
+            readExternal(in);
+            in.close();
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        }
+    }
+
+
+    public void saveZipObj(OutputStream os) {
+        try {
+            GZIPOutputStream out = new GZIPOutputStream(os);
+            ObjectOutputStream oos = new ObjectOutputStream(out);
+            writeExternal(oos);
+            oos.flush();
+            out.close();
+        } catch (IOException e) {
+            //throw new IllegalArgumentException("Failed to serialize object of type: " , ex);
+            log.error(e.getMessage());
+            //e.printStackTrace();
+        }
+    }
+
+    public void loadZipObj(InputStream is) {
+        try {
+            GZIPInputStream zip = new GZIPInputStream(is);
+            ObjectInputStream in = new ObjectInputStream(new BufferedInputStream(zip));
+            readExternal(in);
+            in.close();
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        }
+    }
+
+    public InputStream getInputStream() throws IOException {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        GZIPOutputStream zipOut = new GZIPOutputStream(baos);
+        ObjectOutputStream oos = new ObjectOutputStream(zipOut);
+        writeExternal(oos);
+        zipOut.close();
+        byte[] bs = baos.toByteArray();
+        ByteArrayInputStream is = new ByteArrayInputStream(bs);
+        oos.close();
+        baos.close();
+        return is;
+    }
+
+    public String toMd() {
+        List<List<String>> lls = null;
+        if (root != null) {
+            lls = root.getHtmls();
+        }
+        if (lls == null) {
+            log.info("Book " + title + " has no content .");
+            return null;
+        }
+
+        String s = GTBookUtil.listListToString(lls, "\n");
+        return s;
+    }
+
+    public void toMd(File fn) throws IOException {
+        //		TIntArrayList path = new TIntArrayList();
+        //		List<String> ls = new ArrayList<>();
+        //
+        //		IVisitor visitor = new IVisitor() {
+        //			@Override
+        //			public void visit(INode now, TIntArrayList path) {
+        //				if (now.isLeaf()){
+        //					String text = GTBookUtil.listToString(now.getText(), "\n");
+        //                    ls.add(text);
+        //				}else{
+        //					String text = now.getLabel();
+        //					ls.add(text);
+        //				}
+        //			}
+        //		};
+        //		root.traverseDescendants(path, visitor);
+
+        String s = toMd();
+
+        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+        try {
+            //			for(String s : ls){
+            //				s= s.replace("<br>", "\n");
+            //				out.write(s);
+            //				out.write("\n");
+            //			}
+            out.write(s);
+        } finally {
+            if (out != null) {
+                out.close();
+            }
+        }
+    }
+
+    public int getCharSize() {
+        return root.getCharSize();
+    }
+
+    private INode getNodeByString(String path) {
+        INode nd = null;
+        if (path == null || path.length() <= 0) {
+            nd = root;
+        } else {
+            nd = getNode(path);
+        }
+        return nd;
+    }
+
+    public int getCharSize(String path) {
+        INode nd = getNodeByString(path);
+        if (nd != null) {
+            return nd.getCharSize();
+        }
+        return 0;
+    }
+
+    private boolean getProgress(INode nd, INode to, int[] count) {
+        if (to == nd) {
+            //isRet[0] = -1;
+            return true;
+        } else {
+            if (nd.isLeaf()) {
+                List<String> ls = ((IElement) nd.getValue()).getHtml();
+                if (ls != null) {
+                    for (String s : ls) {
+                        count[0] = count[0] + s.length();
+                    }
+                }
+            } else {
+                boolean b = false;
+                if (nd.getChildren() != null) {
+                    for (int i = 0; i < nd.getChildren().size(); i++) {
+                        INode nd1 = nd.getChildren().get(i);
+                        if (nd1 instanceof GTNode) {
+                            //count = count+((GTNode)nd1).getCharSize();
+                            b = getProgress(nd1, to, count);
+                            if (b) {
+                                break;
+                            }
+                        }
+                    }
+                    return b;
+                }
+            }
+        }
+        return false;
+    }
+
+
+    private int[] getCachedTotal() {
+        int[] total = words.get("_total_");
+        if (total == null) {
+            int t1 = getCharSize();
+            total = new int[2];
+            total[0] = 0;
+            total[1] = t1;
+            words.put("_total_", total);
+        }
+        return total;
+    }
+
+    public int[] getProgress(String path) {
+        //TIntArrayList prefixPath = toIntPath(path);
+        int[] count = words.get(path);
+        if (count == null) {
+            final INode to = getNodeByString(path);
+            count = new int[2];
+
+            if (to != null) {
+                getProgress(root, to, count);
+                count[1] = to.getCharSize();
+            }
+            words.put(path, count);
+        }
+
+        return count;
+    }
+
+    public Map<String, Object> getProgressToMap(String path) {
+        if (path != null) {
+            int[] counts = getProgress(path);
+            if (counts != null) {
+                Map m = new HashMap<String, Object>();
+                m.put(PREVIOUS_TOTAL, counts[0]);
+                m.put(CURRENT_TOTAL, counts[1]);
+
+                int[] total = getCachedTotal();
+                m.put(TOTAL, total[1]);
+                return m;
+            }
+        }
+        return null;
+    }
+
+    private void addHitResult(INode now, double sim1, List<ObjectDoublePair<List<List<String>>>> rets) {
+        if (now != null) {
+            List<List<String>> md1 = now.getHtmls();
+            ObjectDoublePair<List<List<String>>> o1 = new ObjectDoublePair<>(md1, sim1);
+            rets.add(o1);
+        }
+    }
+
+    public List<ObjectDoublePair<List<List<String>>>> getMdsByTitle(String titlePath) {
+        return getMdsByTitle(titlePath, SenSimFactory.getSimpleSim());
+    }
+
+    public List<ObjectDoublePair<List<List<String>>>> getMdsByTitle(String titlePath, ISenSim ssim) {
+        return getMdsByTitle(titlePath, ssim, 0.8);
+    }
+
+    // 根据标题路径找出对应的 章节
+    public List<ObjectDoublePair<List<List<String>>>> getMdsByTitle(String titlePath, ISenSim ssim, double alpha) {
+        if (titlePath == null || titlePath.length() <= 0) return null;
+
+        List<String> tp = GTBookUtil.getPathSegments(titlePath);
+        if (tp.size() <= 0) return null;
+
+        List<ObjectDoublePair<List<List<String>>>> rets = new ArrayList<>();
+
+        TIntArrayList path = new TIntArrayList();
+        List<String> ls = new ArrayList<>();
+        IVisitor visitor = new IVisitor() {
+            @Override
+            public void visit(INode now, TIntArrayList path) {
+                if (now.isLeaf()) {
+                    //					String text = GTBookUtil.listToString(now.getText(), "\n");
+                    //					ls.add(text);
+                } else {
+                    String text = now.getLabel();
+                    if (path.size() > 0) {
+                        if (ls.size() < path.size()) {
+                            ls.add(text);
+                        } else if (ls.size() == path.size()) {
+                            ls.set(ls.size() - 1, text);
+                        } else {
+                            for (int i = 0; i < ls.size() - path.size(); i++) {
+                                ls.remove(path.size());
+                            }
+                            ls.set(path.size() - 1, text);
+                        }
+
+                        //System.out.println(path.toString()+" -> "+ls);
+                        if (tp.size() == 1) {
+                            double sim1 = ssim.getSimilarity(tp.get(0), text);
+                            if (sim1 >= alpha) {
+                                addHitResult(now, sim1, rets);
+                            } else {
+                                if (ls.size() > 1) {
+                                    String last = ls.get(ls.size() - 2);
+                                    if (last.length() + 1 < tp.get(0).length()) {
+                                        last = last + text;
+                                        sim1 = ssim.getSimilarity(tp.get(0), last);
+
+                                        if (sim1 >= alpha) {
+                                            addHitResult(now, sim1, rets);
+                                        }
+                                    }
+                                }
+                            }
+                        } else {
+                            if (path.size() >= tp.size()) {
+                                int len = tp.size();
+                                int start = path.size() - len;
+                                double sim = 0;
+                                boolean ok = true;
+                                for (int i = 0; i < len; i++) {
+                                    double d1 = ssim.getSimilarity(tp.get(i), ls.get(start + i));
+                                    if (d1 <= 0.5) {
+                                        ok = false;
+                                        break;
+                                    } else {
+                                        sim = sim + d1;
+                                    }
+                                }
+                                sim = sim / len;
+                                if (ok && sim >= alpha) {
+                                    addHitResult(now, sim, rets);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        };
+
+
+        root.traverseDescendants(path, visitor);
+        if (rets.size() > 1) {
+            Collections.sort(rets, ObjectDoublePair.getValueComparator());
+        }
+
+        return rets;
+    }
+
+}

+ 19 - 0
gtbook/src/main/java/org/cnnlp/data/book/GTBookConstants.java

@@ -0,0 +1,19 @@
+package org.cnnlp.data.book;
+
+public class GTBookConstants {
+    public static final String KEY_MD_COMMENTS = "_comments";
+
+    public final static int LEAF = 2;
+    public final static int NODE = 1;
+    public final static int ROOT = 0;
+    public final static int MD_PARAGRAPH = 1;
+    public final static int MD_HEADING = 2;
+    public final static int MD_TABLE = 3;
+    public final static int MD_IMAGE = 4;
+    public final static int MD_FRONTMATTER = 9;
+    public final static int MD_COMMENTS = 10;
+    //没有标题的片段
+    public final static int MD_NO_HEADING = 12;
+    public final static int MD_HTMLBLOCK = 20;
+    public final static int HTML_PAGE = 200;
+}

+ 262 - 0
gtbook/src/main/java/org/cnnlp/data/book/GTBookHelper.java

@@ -0,0 +1,262 @@
+package org.cnnlp.data.book;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.cnnlp.data.md.GTDoc;
+
+import gnu.trove.TIntArrayList;
+
+public class GTBookHelper {
+
+    // 和GTBook.java 中的代码是一样的
+    public static void toGTDoc(GTBook book, File fn, Map<String, String> params) throws IOException {
+        List<GTDoc> docs = toGTDoc(book, params);
+        BufferedWriter out = null;
+        try {
+            out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+            for (GTDoc gdoc : docs) {
+                List<String> ls = gdoc.toListString();
+                for (String s : ls) {
+                    out.write(s);
+                }
+                out.write("\n");
+            }
+        } finally {
+            if (out != null) {
+                out.close();
+            }
+        }
+    }
+
+    public static List<GTDoc> toGTDoc(GTBook book, Map<String, String> params) throws IOException {
+
+        String tags = null;
+        if (params != null) {
+            tags = params.get(GTDoc.TAGS);
+        }
+        TIntArrayList path = new TIntArrayList();
+
+        INode[] fathers = new INode[256];
+
+        List<GTDoc> docs = new ArrayList<>();
+
+        final int[] counter = new int[1];
+        counter[0] = 0;
+
+        GTNode root = book.getRoot();
+        final String id = book.getId();
+        final String tags1 = tags;
+        IVisitor visitor = new IVisitor() {
+            @Override
+            public void visit(INode now, TIntArrayList path) {
+                if (path.size() >= 1) {
+                    fathers[path.size() - 1] = now;
+                } else {
+                    fathers[0] = now;
+                }
+                if (now.isLeaf()) {
+
+                    counter[0]++;
+                    int titleI = path.size() - 2;
+                    String text = GTBookUtil.listToString(now.getText(), "\n");
+                    GTDoc gdoc = new GTDoc();
+                    gdoc.addField(GTDoc.PARAGRAPHID, GTDoc.PREFIX_PARAGRAPHID + now.getId());
+                    gdoc.addField(GTDoc.CONTENT, text);
+
+                    if (titleI >= 0) {
+                        gdoc.addField(GTDoc.TITLE, fathers[titleI].getLabel());
+                    }
+
+                    if (titleI >= 1) {
+                        gdoc.addField(GTDoc.CHAPTER, fathers[0].getLabel());
+                    }
+                    if (titleI >= 2) {
+                        gdoc.addField(GTDoc.SECTION, fathers[1].getLabel());
+                    }
+                    if (titleI >= 3) {
+                        gdoc.addField(GTDoc.PART, fathers[2].getLabel());
+                    }
+
+                    path.remove(path.size() - 1);
+                    TIntArrayList path2 = book.getRelativePath(path);
+                    String url = book.getPath(path2);
+
+                    gdoc.addField(GTDoc.PATH, url);
+                    gdoc.addField(GTDoc.BOOKID, id);
+
+                    if (tags1 != null) {
+                        gdoc.addField(GTDoc.TAGS, tags1);
+                    }
+                    docs.add(gdoc);
+
+                }
+            }
+        };
+
+        root.traverseDescendants(path, visitor);
+
+        if (counter[0] == 0) {
+            //System.out.println("没有叶子");
+            List<INode> ls = root.getChildren();
+            TIntArrayList path2 = new TIntArrayList();
+            for (int i = 0; i < ls.size(); i++) {
+                INode now = ls.get(i);
+                String text = GTBookUtil.listToString(now.getText(), "\n");
+                GTDoc gdoc = new GTDoc();
+                gdoc.addField(GTDoc.PARAGRAPHID, GTDoc.PREFIX_PARAGRAPHID + now.getId());
+                gdoc.addField(GTDoc.CONTENT, text);
+                gdoc.addField(GTDoc.TITLE, now.getLabel());
+
+                path2.setQuick(0, i);
+                String url = book.getPath(path2);
+
+                gdoc.addField(GTDoc.PATH, url);
+                gdoc.addField(GTDoc.BOOKID, id);
+
+                if (tags1 != null) {
+                    gdoc.addField(GTDoc.TAGS, tags1);
+                }
+                docs.add(gdoc);
+            }
+        }
+        return docs;
+    }
+
+    public static List<GTDoc> split(GTBook book, Map<String, String> params, int maxSegmentSizeInChars, int maxOverlapSizeInChars, int minSegmentSizeInChars) throws IOException {
+        List<GTDoc> docs = toGTDoc(book, params);
+        return split(docs, maxSegmentSizeInChars, maxOverlapSizeInChars, minSegmentSizeInChars);
+    }
+
+    private static void processDocsBuff(DocsBuffer buffer, List<GTDoc> docs) {
+        GTDoc doc = buffer.toGTDoc();
+        if (doc != null) {
+            docs.add(doc);
+            buffer.clear();
+        }
+    }
+
+    public static List<GTDoc> split(List<GTDoc> docs, int maxSegmentSizeInChars, int maxOverlapSizeInChars, int minSegmentSizeInChars) {
+        List<GTDoc> docs2 = new ArrayList<>();
+        // 用于记录 <minSegmentSizeInChars 的doc,将用于合并
+        DocsBuffer buff = new DocsBuffer();
+
+        for (int i = 0; i < docs.size(); i++) {
+
+            GTDoc doc1 = docs.get(i);
+            int len = doc1.lengthOfDoc();
+            String content = doc1.getContent();
+
+            if (content == null) continue;
+
+            if (len > maxSegmentSizeInChars) {
+                processDocsBuff(buff, docs2);
+                // 做句子切分
+                int metaLen = len - content.length();
+                String[] ss = GTBookUtil.split(content, maxSegmentSizeInChars - metaLen, maxSegmentSizeInChars / 4, maxOverlapSizeInChars);
+
+                for (int j = 0; j < ss.length; j++) {
+                    GTDoc doc2 = doc1.deepCopy();
+                    doc2.addField(GTDoc.CONTENT, ss[j]);
+                    doc2.addField(GTDoc.CHUNKID, j);
+                    docs2.add(doc2);
+                }
+//            } else if (len < minSegmentSizeInChars) {
+//                // 考虑合并
+//                boolean b = buff.addOrNot(doc1, maxSegmentSizeInChars);
+//                if (!b){
+//                    processDocsBuff(buff, docs2);
+//                    buff.addOrNot(doc1, maxSegmentSizeInChars);
+//                }
+            } else {
+//                processDocsBuff(buff, docs2);
+//                docs2.add(doc1);
+                // 考虑合并
+                boolean b = buff.addOrNot(doc1, maxSegmentSizeInChars);
+                if (!b) {
+                    processDocsBuff(buff, docs2);
+                    buff.addOrNot(doc1, maxSegmentSizeInChars);
+                }
+            }
+        }
+        if (buff.getLength() > 0) {
+            processDocsBuff(buff, docs2);
+        }
+        return docs2;
+    }
+
+    public static void toMd(File gtbFile, File mdFile) throws IOException {
+        GTBook book = new GTBook();
+        book.loadZipObj(gtbFile);
+        book.toMd(mdFile);
+    }
+
+    private static class DocsBuffer {
+        List<GTDoc> docs = new ArrayList<>();
+
+        int length = 0;
+
+        public DocsBuffer() {
+        }
+
+        public void add(GTDoc doc) {
+            docs.add(doc);
+            if (length > 0) {
+                length = length + doc.lengthOfContent();
+            } else {
+                length = doc.lengthOfDoc();
+            }
+        }
+
+        public boolean addOrNot(GTDoc doc, int maxLength) {
+            if (length > maxLength) {
+                return false;
+            }
+            boolean ok = false;
+            if (length == 0) {
+                ok = true;
+                add(doc);
+            } else {
+                String path = docs.get(0).getPath();
+                String path1 = doc.getPath();
+                //if (path.equals(path1)) {
+                if (path1.startsWith(path)) {
+                    if (length + doc.lengthOfContent() <= maxLength) {
+                        ok = true;
+                        add(doc);
+                    }
+                }
+            }
+            return ok;
+        }
+
+        public int getLength() {
+            return length;
+        }
+
+        public void clear() {
+            docs.clear();
+            length = 0;
+        }
+
+        public GTDoc toGTDoc() {
+            if (docs.size() == 0) return null;
+            if (docs.size() == 1) return docs.get(0);
+            StringBuilder sb = new StringBuilder();
+            for (int i = 0; i < docs.size(); i++) {
+                GTDoc doc = docs.get(i);
+                String content = doc.getContent();
+                sb.append(content).append("\n");
+            }
+            GTDoc doc1 = docs.get(0).deepCopy();
+            doc1.setContent(sb.toString());
+            return doc1;
+        }
+    }
+}

+ 191 - 0
gtbook/src/main/java/org/cnnlp/data/book/GTBookUtil.java

@@ -0,0 +1,191 @@
+package org.cnnlp.data.book;
+
+import org.cnnlp.data.util.SenUtil;
+
+import static java.util.Collections.emptyList;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class GTBookUtil {
+
+	public static String listToString(List<String> ls,String splitter){
+		if (ls == null) return null;
+		if (ls.size() == 0) return "";
+		if (ls.size() == 1) return ls.get(0);
+		StringBuilder sb = new StringBuilder();
+		//sb.append(ls.get(0));
+		for (int i = 0; i < ls.size(); i++) {
+			sb.append(ls.get(i)).append(splitter);
+		}
+		
+		return sb.toString();
+	}
+	
+
+	public static String listListToString(List<List<String>> ls,String splitter){
+		if (ls == null) return null;
+		if (ls.size() == 0) return "";
+		if (ls.size() == 1) return listToString(ls.get(0),"")+splitter;
+		StringBuilder sb = new StringBuilder();
+		//sb.append(listToString(ls.get(0),""));
+		for (int i = 0; i < ls.size(); i++) {
+			sb.append(listToString(ls.get(i),"")).append(splitter);
+		}
+		
+		//System.out.println(sb.toString());
+		return sb.toString();
+	}
+
+	
+    private static int lastSeparator(String path) {
+    	int i1 = path.lastIndexOf("/");
+    	if (i1 < 0) {
+    		i1 = path.lastIndexOf("\\");
+    	}
+    	return i1;
+    }
+    
+    public static String getLastPath(String path) {
+    	//File p = new File(path);
+    	int i1 = lastSeparator(path);
+    	String fn = null;
+    	String p1 = null;
+    	if (i1>0 && i1+1< path.length()) {
+    		fn = path.substring(i1+1);
+    		p1 = path.substring(0, i1);
+    	}else {
+    		return path;
+    	}
+    	
+    	if (p1 != null) {
+    		i1 =  lastSeparator(p1);
+    		if (i1 >=0 && i1+1<p1.length()) {
+    			p1 = p1.substring(i1+1);
+    			return p1+"/"+fn;
+    		}else {
+    			return path;
+    		}
+    	}
+    	return fn;
+    }
+	
+	  /**
+	   * Split a string based on a separator, but don't split if it's inside
+	   * a string.  Assume '\' escapes the next char both inside and
+	   * outside strings.
+	   */
+	  public static void splitSmart(String s, char separator, List<String> lst) {
+	    int pos = 0, start = 0, end = s.length();
+	    char inString = 0;
+	    char ch = 0;
+	    while (pos < end) {
+	      char prevChar = ch;
+	      ch = s.charAt(pos++);
+	      if (ch == '\\') {    // skip escaped chars
+	        pos++;
+	      } else if (inString != 0 && ch == inString) {
+	        inString = 0;
+	      } else if (ch == '\'' || ch == '"') {
+	        // If char is directly preceeded by a number or letter
+	        // then don't treat it as the start of a string.
+	        // Examples: 50" TV, or can't
+	        if (!Character.isLetterOrDigit(prevChar)) {
+	          inString = ch;
+	        }
+	      } else if (ch == separator && inString == 0) {
+	        lst.add(s.substring(start, pos - 1));
+	        start = pos;
+	      }
+	    }
+	    if (start < end) {
+	      lst.add(s.substring(start, end));
+	    }
+
+	    /***
+	     if (SolrCore.log.isLoggable(Level.FINEST)) {
+	     SolrCore.log.trace("splitCommand={}", lst);
+	     }
+	     ***/
+
+	  }
+    
+    
+    // /a/b/c will be returned as ["a","b","c"]
+    public static List<String> getPathSegments(String path) {
+      if (path == null || path.isEmpty()) return emptyList();
+      List<String> parts = new ArrayList<String>() {
+        @Override
+        public boolean add(String s) {
+          if (s == null || s.isEmpty()) return false;
+          return super.add(s);
+        }
+      };
+      splitSmart(path, '/', parts);
+      return parts;
+    }
+
+	// range表示从 maxLen往前 找句子分割点,截止maxLen-range
+	public static String[] split(String text,int maxLen,int range){
+		List<String> ls = new ArrayList<>();
+		int num = text.length()/maxLen;
+		int lastPos = 0;
+		int i=0;
+
+		num = num*2;
+		while(i<num){
+			//int nowpos = SenUtil.getSplitPos(text, maxLen*(i+1), 256);
+			int nowpos = SenUtil.getSplitPos(text, lastPos+maxLen, range);
+
+			if (nowpos >lastPos){
+				String t1 = text.substring(lastPos,nowpos);
+				lastPos = nowpos;
+				//ds = te.getOccurence(t1, ds,occurence);
+				ls.add(t1);
+				if (lastPos >= text.length()){
+					break;
+				}
+			}
+			i++;
+		}
+		return ls.toArray(new String[0]);
+	}
+
+	public static String[] split(String text,int maxLen,int range,int maxOverlap){
+		List<String> ls = new ArrayList<>();
+		int num = text.length()/maxLen;
+		int lastPos = 0;
+		int i=0;
+
+		num = num*2;
+		while(i<num){
+			//int nowpos = SenUtil.getSplitPos(text, maxLen*(i+1), 256);
+			int nowpos = SenUtil.getSplitPos(text, lastPos+maxLen, range);
+
+			if (nowpos >lastPos){
+				String t1 = text.substring(lastPos,nowpos);
+				lastPos = nowpos;
+				//ds = te.getOccurence(t1, ds,occurence);
+				ls.add(t1);
+				if (lastPos >= text.length()){
+					break;
+				}
+
+				int overlap1 = SenUtil.getSplitPosFromLeft(text, lastPos, maxOverlap);
+				if (overlap1 < lastPos){
+					lastPos = overlap1;
+				}
+			}
+
+			i++;
+		}
+		return ls.toArray(new String[0]);
+	}
+
+    
+//    public static void main(String[] args) {
+//		String p = "./image/media/image1.jpeg";
+//		p = "\\media\\image1.jpeg";
+//		System.out.println(getLastPath(p));
+//	}
+}

+ 480 - 0
gtbook/src/main/java/org/cnnlp/data/book/GTNode.java

@@ -0,0 +1,480 @@
+package org.cnnlp.data.book;
+
+import java.io.BufferedOutputStream;
+import java.io.Externalizable;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import gnu.trove.TIntArrayList;
+
+public class GTNode implements INode,Externalizable{
+	
+	private static final long serialVersionUID = 1L;
+
+	protected int id;
+
+	protected String label;
+
+	// 指的文本块类型  paragraph/table/html/image 
+	protected int type;
+
+	// 是  ROOT/NODE/LEAF
+	protected int nodeType;
+	
+	protected int depth;
+	protected IElement value;
+
+	protected List<INode> children;
+
+	
+//	private transient INode father;
+//	
+//	public INode getFather() {
+//		return father;
+//	}
+//
+//	public void setFather(INode father) {
+//		this.father = father;
+//	}
+
+	public GTNode() {
+	}
+
+	public GTNode(int nodeType) {
+		this.nodeType = nodeType;
+	}
+
+	public int getId() {
+		return id;
+	}
+
+	public void setId(int id) {
+		this.id = id;
+	}
+	
+	public String getLabel() {
+		return label;
+	}
+
+
+	public void setLabel(String label) {
+		this.label = label;
+	}
+
+	// 指的文本块类型  paragraph/table/html/image 
+	public int getType() {
+		return type;
+	}
+
+
+	public void setType(int type) {
+		this.type = type;
+	}
+
+
+	public int getDepth() {
+		return depth;
+	}
+
+
+	public void setDepth(int depth) {
+		this.depth = depth;
+	}
+
+
+	public void setValue(IElement value) {
+		this.value = value;
+	}
+
+
+	public void setChildren(List<INode> children) {
+		this.children = children;
+	}
+	
+	@Override
+	public void addChild(INode node) {
+		if (children == null){
+			children = new ArrayList<INode>();
+		}
+		children.add(node);
+	}
+
+	@Override
+	public Object getValue() {
+		return value;
+	}
+
+
+	@Override
+	public List<INode> getChildren() {
+		return children;
+	}
+
+
+	@Override
+	public boolean isLeaf() {
+		if (nodeType == GTBookConstants.LEAF){
+			return true;
+		}
+		return false;
+	}
+
+	public int size(){
+		if (children == null){
+			return 0;
+		}else{
+			return children.size();
+		}
+	}
+	
+	
+	
+	
+	@Override
+	public String toString() {
+//		return "GTNode [label=" + label + ", type=" + type + ", depth=" + depth + ", value=" + value + ", children="
+//				+ children + "]";
+		if (nodeType == GTBookConstants.LEAF){
+			if (value != null && value instanceof MDElement){
+				MDElement e = (MDElement)value;
+				String out = GTBookUtil.listToString(e.getHtml(), " ");
+				out = out.replaceAll("\r\n","\n");
+				if (out.length()>=16){
+					out = out.substring(0, 12)+"...";
+				}
+				return out;
+			}else{
+				return "";
+			}
+		}else{
+			String out = label;
+			return out;
+		}
+		
+	}
+
+	public String toSimpleString() {
+		if (nodeType == GTBookConstants.LEAF){
+			if (value != null && value instanceof MDElement){
+				MDElement e = (MDElement)value;
+				String out = GTBookUtil.listToString(e.getHtml(), " ");
+				if (out.length()>=16){
+					out = out.substring(0, 12)+"...";
+				}
+				return out;
+			}
+		}else{
+			
+		}
+		return "";
+	}
+
+
+	@Override
+	public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {		
+
+		id = in.readInt();
+		type = in.readInt();
+		Object obj = in.readObject();
+		if (obj != null){
+			label = (String)obj;
+		}
+		nodeType = in.readInt();
+		depth = in.readInt();
+
+		obj = in.readObject();
+		if (obj != null){
+			value = (IElement)obj;
+		}
+
+		int len = in.readInt();
+		if (len >0){
+			List<INode> ls = new ArrayList<>();
+			for (int i = 0; i < len; i++) {
+				INode in1 = (INode)in.readObject();
+				ls.add(in1);
+			}
+			children = ls;
+		}
+	}
+
+	public void saveObj(File file){
+		try {
+			ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
+			writeExternal(out);
+			out.close();
+		} catch (Exception e) {
+			//e.printStackTrace();
+			//log.error(e.getMessage());
+		}
+	}
+	
+	@Override
+	public void writeExternal(ObjectOutput out) throws IOException {
+//		protected String label;
+//
+//		protected int type;
+//		protected int depth;
+//		protected IElement value;
+//
+//		protected List<INode> children;
+		out.writeInt(id);
+		out.writeInt(type);		
+		out.writeObject(label);
+		out.writeInt(nodeType);
+		out.writeInt(depth);
+		out.writeObject(value);
+		if (children == null){
+			out.writeInt(-1);
+		}else{
+			out.writeInt(children.size());
+			for(INode nd : children){
+				out.writeObject(nd);
+			}
+		}
+	}
+
+	@Override
+	public List<String> getText() {
+		List<String> sb = new ArrayList<>();
+		if (value != null){
+			sb.addAll(value.getText());
+		}else{
+			if (label != null){
+				sb.add(label);
+			}				
+		}
+		
+		if (!isLeaf() &&  (children != null)){
+			for(INode nd : children){
+				sb.addAll(nd.getText());
+			}
+		}
+		return sb;
+
+		
+		
+//		StringBuilder sb = new StringBuilder();
+//		if (label != null){
+//			sb.append(label).append("\n");
+//		}
+//		if (isLeaf()){
+//			sb.append(value.getText()).append("\n");
+//		}else{
+//			//List<INode> children
+//			if (children != null){
+//				for(INode nd : children){
+//					sb.append(nd.getText());
+//				}
+//			}
+//		}
+//		return sb.toString();
+	}
+
+//	private void append(List<String>  src,List<String>  src2,String keyEnter) {
+//		if (src2 == null) return;
+//		for (int i = 0; i < src2.size(); i++) {
+//			src.add(src2.get(i)+keyEnter);
+//		}
+//	}
+	
+	@Override
+	public List<String> getHtml() {
+		List<String> sb = new ArrayList<>();
+		if (value != null){
+			sb.addAll(value.getHtml());
+			//2025.2.18 增加
+			if (nodeType == GTBookConstants.NODE){
+				sb.add("\n");
+			}
+			//append(sb,value.getHtml(),"\n");
+			sb.add("\n");
+		}
+
+		if (!isLeaf() && (children != null)){
+				for(INode nd : children){
+					sb.addAll(nd.getHtml());
+					//append(sb,nd.getHtml(),"\n");
+				}
+		}
+		//sb.add("\n");
+		return sb;
+	}
+
+	public List<INode> traverseDescendants(TIntArrayList path){
+		//System.out.println(path.toString());
+		List<INode> nds = new ArrayList<>();
+		//Deque<INode> stack = new LinkedList<>();
+		nds.add(this);
+		//System.out.println(label);
+		if (isLeaf()){
+			//return null;
+
+		}else{
+			if (children != null) {
+			for (int i = 0; i < children.size(); i++) {
+				INode nd1 = children.get(i);
+				//nds.add(nd1);
+				if (nd1 instanceof GTNode){
+					TIntArrayList path2 = (TIntArrayList)path.clone();
+					path2.add(i);
+					List<INode> nd1s = ((GTNode)nd1).traverseDescendants(path2);
+					nds.addAll(nd1s);
+				}
+			}
+			}
+		}
+		return nds;
+	}
+	
+	public void traverseDescendants(TIntArrayList path,IVisitor visitor){
+		//List<INode> nds = new ArrayList<>();
+		//nds.add(this);
+		//System.out.println(label);
+		visitor.visit(this,path);
+		
+		//System.out.println(label);
+		if (!isLeaf() && children!= null){
+			for (int i = 0; i < children.size(); i++) {
+				INode nd1 = children.get(i);
+				if (nd1 instanceof GTNode){
+					TIntArrayList path2 = (TIntArrayList)path.clone();
+					path2.add(i);
+					((GTNode)nd1).traverseDescendants(path2,visitor);
+				}
+			}
+		}
+
+	}
+
+	// 2020.3.7 增加,用于标红的处理
+	@Override
+	public List<List<String>> getHtmls() {
+		List<List<String>> lls = new ArrayList<List<String>>();
+
+		//"<a id=\"first_1\"></a>"
+		
+		List<String> as = new ArrayList<String>();
+		//2020.9.22
+		//as.add("<a id=\""+"pg_"+getId()+"\"></a>");
+		as.add("<a id=\""+"pg_"+getId()+"\"></a>\n");
+		
+		lls.add(as);
+		if (value != null){
+//			System.out.println("v="+value.getHtml());
+//			List<String> ls1 = value.getHtml();
+//			if (ls1 != null && ls1.size()>0) {
+//				String s1 = ls1.get(0);
+//				if (s1.startsWith("<") || s1.startsWith("|") ) {
+//				}else {
+//					lls.add(as);
+//				}
+//				lls.add(value.getHtml());
+//			}
+			lls.add(value.getHtml());
+			//sb.addAll(value.getHtml());
+			//append(sb,value.getHtml(),"\n");
+		}
+		if (!isLeaf() && (children != null)){
+			for(INode nd : children){
+				
+				List<List<String>> lls1 = nd.getHtmls();
+				//System.out.println("=="+lls1.get(0));
+				
+				lls.addAll(lls1);
+				//sb.addAll(nd.getHtml());
+				//append(sb,nd.getHtml(),"\n");
+				
+			}
+		}
+		return lls;
+	}
+
+	
+	
+	public static GTNode buildRoot(String lable){
+		GTNode root = new GTNode(GTBookConstants.ROOT);
+		root.setLabel(lable);
+		return root;
+	}
+
+	public static GTNode buildLeaf(IElement object){
+		GTNode leaf = new GTNode(GTBookConstants.LEAF);
+		leaf.setValue(object);
+		return leaf;
+	}
+
+	public static GTNode buildNode(IElement object){
+		GTNode leaf = new GTNode(GTBookConstants.NODE);
+		leaf.setValue(object);
+		return leaf;
+	}
+
+	public static GTNode buildRoot(String lable,int id){
+		GTNode root = new GTNode(GTBookConstants.ROOT);
+		root.setId(id);
+		root.setLabel(lable);
+		return root;
+	}
+
+	public static GTNode buildLeaf(IElement object,int id){
+		GTNode leaf = new GTNode(GTBookConstants.LEAF);
+		leaf.setId(id);
+		leaf.setValue(object);
+		return leaf;
+	}
+
+	public static GTNode buildNode(IElement object,int id){
+		GTNode node = new GTNode(GTBookConstants.NODE);
+		node.setId(id);
+		node.setValue(object);
+		return node;
+	}
+	
+	public static GTNode buildRoot(int id){
+		GTNode root = new GTNode(GTBookConstants.ROOT);
+		root.setId(id);
+		return root;
+	}
+
+	public static GTNode buildLeaf(int id){
+		GTNode leaf = new GTNode(GTBookConstants.LEAF);
+		leaf.setId(id);
+		return leaf;
+	}
+
+	public static GTNode buildNode(int id){
+		GTNode node = new GTNode(GTBookConstants.NODE);
+		node.setId(id);
+		return node;
+	}
+
+	// 原始md文件的字符数
+	public int getCharSize() {
+		int count = 0;
+		if (isLeaf()){
+			List<String> ls = value.getHtml();
+			if (ls != null) {
+				for(String s: ls) {
+					count = count+s.length();
+				}
+			}
+		}else{
+			if (children != null) {
+				for (int i = 0; i < children.size(); i++) {
+					INode nd1 = children.get(i);
+					if (nd1 instanceof GTNode){
+						count = count+((GTNode)nd1).getCharSize();
+					}
+				}
+			}
+		}
+		return count;
+	}
+}

+ 5 - 0
gtbook/src/main/java/org/cnnlp/data/book/IBook.java

@@ -0,0 +1,5 @@
+package org.cnnlp.data.book;
+
+public interface IBook {
+
+}

+ 12 - 0
gtbook/src/main/java/org/cnnlp/data/book/IElement.java

@@ -0,0 +1,12 @@
+package org.cnnlp.data.book;
+
+import java.io.Serializable;
+import java.util.List;
+
+public interface IElement extends Serializable{
+
+	List<String> getText();
+	
+	List<String> getHtml();
+	
+}

+ 28 - 0
gtbook/src/main/java/org/cnnlp/data/book/INode.java

@@ -0,0 +1,28 @@
+package org.cnnlp.data.book;
+
+import java.util.List;
+
+//import com.googlecode.concurrenttrees.radix.node.Node;
+
+public interface INode extends IElement{
+
+	int getId();
+	
+	// 指的文本块类型  paragraph/table/html/image 
+	int getType();
+	
+	List<INode> getChildren();
+	
+	void addChild(INode node);
+	
+	Object getValue();
+	
+	boolean isLeaf();
+
+	String getLabel();
+	
+    List<List<String>> getHtmls();
+
+    // 获取节点的字符数
+	int getCharSize() ;
+}

+ 9 - 0
gtbook/src/main/java/org/cnnlp/data/book/IVisitor.java

@@ -0,0 +1,9 @@
+package org.cnnlp.data.book;
+
+import gnu.trove.TIntArrayList;
+
+public interface IVisitor {
+	
+	public void visit(INode now,TIntArrayList path);
+	
+}

+ 121 - 0
gtbook/src/main/java/org/cnnlp/data/book/MDElement.java

@@ -0,0 +1,121 @@
+package org.cnnlp.data.book;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.ArrayList;
+import java.util.List;
+
+public class MDElement implements IElement,Externalizable{
+	private static final long serialVersionUID = 1L;
+	
+	List<String> text;
+	List<String> html;
+	
+	public MDElement() {
+		text = new ArrayList<>();
+		html = new ArrayList<>();
+	}
+	
+	public MDElement(String txt,String htm) {
+		this.text = new ArrayList<String>();
+		text.add(txt);
+		this.html =  new ArrayList<String>();
+		html.add(htm);
+		
+	}
+	
+	public MDElement(List<String> txts,List<String> htms) {
+		this.text = txts;
+		this.html = htms;
+	}
+	
+	public int size(){
+		return text.size();
+	}
+	public void add(String txt,String htm) {
+		text.add(txt);
+		html.add(htm);
+	}
+	
+	public List<String> getText() {
+		return text;
+	}
+
+	public void setText(List<String>  text) {
+		this.text = text;
+	}
+
+	public List<String>  getHtml() {
+		return html;
+	}
+
+	public void setHtml(List<String>  html) {
+		this.html = html;
+	}
+
+
+	@Override
+	public String toString() {
+		//return "MDElement [htm=" + htm + "]";
+		String out = GTBookUtil.listToString(html, "<br>");
+		out = out.replaceAll("\r\n","\n");
+		if (out.length()>=16){
+			out = out.substring(0, 12)+"...";
+		}
+		return out;
+	}
+
+	@Override
+	public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+		List<String> temp = null;
+		int len = in.readInt();
+		if (len >=0){
+			temp = new ArrayList<>();
+			for (int i = 0; i < len; i++) {
+				String s1 = in.readUTF();
+				temp.add(s1);
+			}
+		}
+		text = temp;
+		
+		List<String> temp2 = null;
+		len = in.readInt();
+		if (len >=0){
+			temp2 = new ArrayList<>();
+			for (int i = 0; i < len; i++) {
+				String s1 = in.readUTF();
+				temp2.add(s1);
+			}
+		}
+		html = temp2;
+		
+	}
+
+	@Override
+	public void writeExternal(ObjectOutput out) throws IOException {
+		if (text == null){
+			out.writeInt(-1);
+		}else{
+			out.writeInt(text.size());
+			
+			for (int i = 0; i < text.size(); i++) {
+				//System.out.println("text len="+text.get(i).length());
+				out.writeUTF(text.get(i));
+			}
+		}
+
+		if (html == null){
+			out.writeInt(-1);
+		}else{
+			out.writeInt(html.size());
+			for (int i = 0; i < html.size(); i++) {
+				//System.out.println("html len="+html.get(i).length());
+				out.writeUTF(html.get(i));
+			}
+		}
+
+	}
+
+}

+ 115 - 0
gtbook/src/main/java/org/cnnlp/data/book/PrettyPrinter.java

@@ -0,0 +1,115 @@
+/**
+ * Copyright 2012-2013 Niall Gallagher
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.cnnlp.data.book;
+
+//import com.googlecode.concurrenttrees.radix.node.Node;
+//import com.googlecode.concurrenttrees.radix.node.util.PrettyPrintable;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Utility methods to generate semi-graphical string representations of trees.
+ *
+ * @author Niall Gallagher
+ */
+public class PrettyPrinter {
+
+    /**
+     * Private constructor, not used.
+     */
+    PrettyPrinter() {
+    }
+
+    /**
+     * Generates a semi-graphical string representation of a given tree.
+     * <p/>
+     * Example output:<br/>
+     * <pre>
+     * ○
+     * └── ○ B (1)
+     *     └── ○ A (2)
+     *         └── ○ N (3)
+     *             ├── ○ AN (5)
+     *             │   └── ○ A (6)
+     *             └── ○ DANA (4)
+     * </pre>
+     *
+     * @param tree The tree for which the semi-graphical representation should be generated
+     * @return A semi-graphical string representation of the tree
+     */
+    public static String prettyPrint(INode tree) {
+        StringBuilder sb = new StringBuilder();
+        prettyPrint(tree, sb, "", true, true);
+        return sb.toString();
+    }
+
+    /**
+     * Generates a semi-graphical string representation of a given tree, writing it to a given {@link Appendable}.
+     * <p/>
+     * Example output:<br/>
+     * <pre>
+     * ○
+     * └── ○ B (1)
+     *     └── ○ A (2)
+     *         └── ○ N (3)
+     *             ├── ○ AN (5)
+     *             │   └── ○ A (6)
+     *             └── ○ DANA (4)
+     * </pre>
+     *
+     *  tree The tree for which the semi-graphical representation should be generated
+     *  appendable The object to which the tree should be written
+     */
+//    public static void prettyPrint(PrettyPrintable tree, Appendable appendable) {
+//        prettyPrint(tree.getNode(), appendable, "", true, true);
+//    }
+
+    static void prettyPrint(INode node, Appendable sb, String prefix, boolean isTail, boolean isRoot) {
+    	try {
+    		StringBuilder label = new StringBuilder();
+    		if (isRoot) {
+    			label.append("○");
+    			if (node.getChildren().size() > 0) {
+    				label.append(" ");
+    			}
+    		}
+    		//            if (node.getChildren() != null){
+    		//            label.append(node.getChildren());
+    		//            }
+    		if (node.getLabel() != null){
+    			label.append(node.getLabel());
+    		}
+    		if (node.getValue() != null) {
+    			label.append(" (").append(node.getValue()).append(")");
+    		}
+    		sb.append(prefix).append(isTail ? isRoot ? "" : "└── ○ " : "├── ○ ").append(label).append("\n");
+
+    		List<INode> children = node.getChildren();
+    		if (children != null){
+    			for (int i = 0; i < children.size() - 1; i++) {
+    				prettyPrint(children.get(i), sb, prefix + (isTail ? isRoot ? "" : "    " : "│   "), false, false);
+    			}
+    			if (!children.isEmpty()) {
+    				prettyPrint(children.get(children.size() - 1), sb, prefix + (isTail ? isRoot ? "" : "    " : "│   "), true, false);
+    			}
+    		}
+    	}catch (IOException ioException) {
+    		// Rethrow the checked exception as a runtime exception...
+    		throw new IllegalStateException(ioException);
+    	}
+    }
+}

+ 11 - 0
gtbook/src/main/java/org/cnnlp/data/document/GDocConstants.java

@@ -0,0 +1,11 @@
+package org.cnnlp.data.document;
+
+public class GDocConstants {
+
+    public static final String CURRENT_SECTION_TOC ="currentSectionToc";
+    public static final String RAW_CONTENT = "rawContent";
+    public static final String FILE_NAME = "fileName";
+    public static final String FROM_IDX = "fromIdx";
+    public static final String TO_IDX = "toIdx";
+    public static final String FIRST_HEADING = "heading";
+}

+ 332 - 0
gtbook/src/main/java/org/cnnlp/data/document/GDocument.java

@@ -0,0 +1,332 @@
+/*
+ * Copyright 2023-2024 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.cnnlp.data.document;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+//import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.cnnlp.data.util.id.IdGenerator;
+import org.cnnlp.data.util.id.RandomIdGenerator;
+
+//import org.springframework.ai.document.ContentFormatter;
+//import org.springframework.ai.document.DefaultContentFormatter;
+//import org.springframework.ai.document.MetadataMode;
+//import org.springframework.ai.document.id.IdGenerator;
+//import org.springframework.ai.document.id.RandomIdGenerator;
+//import org.springframework.ai.model.Media;
+//import org.springframework.lang.Nullable;
+//import org.springframework.util.Assert;
+//import org.springframework.util.StringUtils;
+
+/**
+ * A document is a container for the content and metadata of a document. It also contains
+ * the document's unique ID.
+ *
+ * A Document can hold either text content or media content, but not both.
+ *
+ * It is intended to be used to take data from external sources as part of spring-ai's ETL
+ * pipeline.
+ *
+ * <p>
+ * Example of creating a text document: <pre>{@code
+ * // Using constructor
+ * Document textDoc = new Document("Sample text content", Map.of("source", "user-input"));
+ *
+ * // Using builder
+ * Document textDoc = Document.builder()
+ *     .text("Sample text content")
+ *     .metadata("source", "user-input")
+ *     .build();
+ * }</pre>
+ *
+ * <p>
+ * Example of creating a media document: <pre>{@code
+ * // Using constructor
+ * Media imageContent = new Media(MediaType.IMAGE_PNG, new byte[] {...});
+ * Document mediaDoc = new Document(imageContent, Map.of("filename", "sample.png"));
+ *
+ * // Using builder
+ * Document mediaDoc = Document.builder()
+ *     .media(new Media(MediaType.IMAGE_PNG, new byte[] {...}))
+ *     .metadata("filename", "sample.png")
+ *     .build();
+ * }</pre>
+ *
+ * <p>
+ * Example of checking content type and accessing content: <pre>{@code
+ * if (document.isText()) {
+ *     String textContent = document.getText();
+ *     // Process text content
+ * } else {
+ *     Media mediaContent = document.getMedia();
+ *     // Process media content
+ * }
+ * }</pre>
+ */
+//@JsonIgnoreProperties({ "contentFormatter", "embedding" })
+public class GDocument {
+
+//	public static final ContentFormatter DEFAULT_CONTENT_FORMATTER = DefaultContentFormatter.defaultConfig();
+
+	/**
+	 * Unique ID
+	 */
+	private final String id;
+
+	/**
+	 * Document string content.
+	 */
+	private final String text;
+
+	/**
+	 * Document media content
+	 */
+//	private final Media media;
+
+	/**
+	 * Metadata for the document. It should not be nested and values should be restricted
+	 * to string, int, float, boolean for simple use with Vector Dbs.
+	 */
+	private final Map<String, Object> metadata;
+
+	/**
+	 * A numeric score associated with this document that can represent various types of
+	 * relevance measures.
+	 * <p>
+	 * Common uses include:
+	 * <ul>
+	 * <li>Measure of similarity between the embedding value of the document's text/media
+	 * and a query vector, where higher scores indicate greater similarity (opposite of
+	 * distance measure)
+	 * <li>Text relevancy rankings from retrieval systems
+	 * <li>Custom relevancy metrics from RAG patterns
+	 * </ul>
+	 * <p>
+	 * Higher values typically indicate greater relevance or similarity.
+	 */
+	//@Nullable
+	private final Double score;
+
+	/**
+	 * Mutable, ephemeral, content to text formatter. Defaults to Document text.
+	 */
+	//@JsonIgnore
+	//private ContentFormatter contentFormatter = DEFAULT_CONTENT_FORMATTER;
+
+	@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
+	public GDocument(@JsonProperty("content") String content) {
+		this(content, new HashMap<>());
+	}
+
+	public GDocument(String text, Map<String, Object> metadata) {
+		this(new RandomIdGenerator().generateId(), text, metadata, null);
+	}
+
+	public GDocument(String id, String text, Map<String, Object> metadata) {
+		this(id, text, metadata, null);
+	}
+
+//	public Document(Media media, Map<String, Object> metadata) {
+//		this(new RandomIdGenerator().generateId(), null, media, metadata, null);
+//	}
+//
+//	public Document(String id, Media media, Map<String, Object> metadata) {
+//		this(id, null, media, metadata, null);
+//	}
+
+	private GDocument(String id, String text, Map<String, Object> metadata, Double score) {
+//		Assert.hasText(id, "id cannot be null or empty");
+//		Assert.notNull(metadata, "metadata cannot be null");
+//		Assert.noNullElements(metadata.keySet(), "metadata cannot have null keys");
+//		Assert.noNullElements(metadata.values(), "metadata cannot have null values");
+//		Assert.isTrue(text != null ^ media != null, "exactly one of text or media must be specified");
+
+		this.id = id;
+		this.text = text;
+//		this.media = media;
+		this.metadata = new HashMap<>(metadata);
+		this.score = score;
+	}
+
+	public static Builder builder() {
+		return new Builder();
+	}
+
+	/**
+	 * Returns the unique identifier for this document.
+	 * <p>
+	 * This ID is either explicitly provided during document creation or generated using
+	 * the configured {@link IdGenerator} (defaults to {@link RandomIdGenerator}).
+	 * @return the unique identifier of this document
+	 * @see RandomIdGenerator
+	 */
+	public String getId() {
+		return this.id;
+	}
+
+	/**
+	 * Returns the document's text content, if any.
+	 * @return the text content
+	 */
+	//@Nullable
+	public String getText() {
+		return this.text;
+	}
+
+	/**
+	 * Determines whether this document contains text or media content.
+	 * @return true if this document contains text content (accessible via
+	 * {@link #getText()}), false if it contains media content (accessible via
+	 */
+//	public boolean isText() {
+//		return this.text != null;
+//	}
+
+
+	/**
+	 * Returns the metadata associated with this document.
+	 * <p>
+	 * The metadata values are restricted to simple types (string, int, float, boolean)
+	 * for compatibility with Vector Databases.
+	 * @return the metadata map
+	 */
+	public Map<String, Object> getMetadata() {
+		return this.metadata;
+	}
+
+
+	public Double getScore() {
+		return this.score;
+	}
+
+
+	public Builder mutate() {
+		return new Builder().id(this.id).text(this.text).metadata(this.metadata).score(this.score);
+	}
+
+	@Override
+	public boolean equals(Object o) {
+		if (o == null || this.getClass() != o.getClass()) {
+			return false;
+		}
+		GDocument document = (GDocument) o;
+		return Objects.equals(this.id, document.id) && Objects.equals(this.text, document.text)
+				&& Objects.equals(this.metadata, document.metadata)
+				&& Objects.equals(this.score, document.score);
+	}
+
+	@Override
+	public int hashCode() {
+		return Objects.hash(this.id, this.text, this.metadata, this.score);
+	}
+
+	@Override
+	public String toString() {
+		return "Document{" + "id='" + this.id + '\'' + ", text='" + this.text + '\''
+				+ ", metadata=" + this.metadata + ", score=" + this.score + '}';
+	}
+
+	public static class Builder {
+
+		private String id;
+
+		private String text;
+
+
+		private Map<String, Object> metadata = new HashMap<>();
+
+		private Double score;
+
+		private IdGenerator idGenerator = new RandomIdGenerator();
+
+		public Builder idGenerator(IdGenerator idGenerator) {
+			//Assert.notNull(idGenerator, "idGenerator cannot be null");
+			this.idGenerator = idGenerator;
+			return this;
+		}
+
+		public Builder id(String id) {
+			//Assert.hasText(id, "id cannot be null or empty");
+			this.id = id;
+			return this;
+		}
+
+		/**
+		 * Sets the text content of the document.
+		 * <p>
+		 * Either text or media content must be set before building the document, but not
+		 * both.
+		 * @param text the text content
+		 * @return the builder instance
+		 */
+		public Builder text(String text) {
+			this.text = text;
+			return this;
+		}
+
+
+		public Builder metadata(Map<String, Object> metadata) {
+			//Assert.notNull(metadata, "metadata cannot be null");
+			this.metadata = metadata;
+			return this;
+		}
+
+		public Builder metadata(String key, Object value) {
+			//Assert.notNull(key, "metadata key cannot be null");
+			//Assert.notNull(value, "metadata value cannot be null");
+			this.metadata.put(key, value);
+			return this;
+		}
+
+		/**
+		 * Sets a score value for this document.
+		 * <p>
+		 * Common uses include:
+		 * <ul>
+		 * <li>Measure of similarity between the embedding value of the document's
+		 * text/media and a query vector, where higher scores indicate greater similarity
+		 * (opposite of distance measure)
+		 * <li>Text relevancy rankings from retrieval systems
+		 * <li>Custom relevancy metrics from RAG patterns
+		 * </ul>
+		 * <p>
+		 * Higher values typically indicate greater relevance or similarity.
+		 * @param score the document score, may be null
+		 * @return the builder instance
+		 */
+		public Builder score(Double score) {
+			this.score = score;
+			return this;
+		}
+
+		public static boolean hasText(String str) {
+			return str != null && !str.isBlank();
+		}
+
+		public GDocument build() {
+			if (!hasText(this.id)) {
+				this.id = this.idGenerator.generateId(this.text, this.metadata);
+			}
+			return new GDocument(this.id, this.text, this.metadata, this.score);
+		}
+	}
+
+}

+ 479 - 0
gtbook/src/main/java/org/cnnlp/data/html/HtmlRender.java

@@ -0,0 +1,479 @@
+package org.cnnlp.data.html;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.cnnlp.data.book.*;
+import org.cnnlp.data.md.DocRender;
+import org.cnnlp.data.md.GTDoc;
+import org.cnnlp.data.md.GTDocs;
+import org.cnnlp.data.md.MDRegxUtil;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import gnu.trove.TIntArrayList;
+
+
+//d:/data/pdf2htmlex/pdf2htmlEX.exe  --embed-css 0 --embed-font 0 --embed-javascript  0 --embed-image 0 --embed-outline 0 --embed-external-font 0 --split-pages 1 
+
+public class HtmlRender {
+
+	private static Logger log = LoggerFactory.getLogger(DocRender.class);
+	
+	public final static String URL_PREFIX = "tourl";
+	//<!DOCTYPE html>
+	//<!-- Created by pdf2htmlEX (https://github.com/coolwanglu/pdf2htmlex) -->
+	//	<html xmlns="http://www.w3.org/1999/xhtml">
+	String htmlStart = "<!DOCTYPE html>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n";
+	//String htmlStart = "<!DOCTYPE html>\n<html><head>\n<meta charset=\"UTF-8\">\n" ;
+	String htmlHead = "</head>\n<body>\n";
+	String htmlEnd = "</body>\n</html>";
+	
+	String pageStart = "<div id=\"page-container\">";
+	String pageEnd = "</div>";
+	
+	String bodyFirst = "<div id=\"sidebar\">\r\n" + 
+			"<div id=\"outline\">\r\n" + 
+			"</div>\r\n" + 
+			"</div>";
+	
+	
+	String tagPageContainer = "page-container";
+	
+	protected File source;
+	protected Map<String,String> gparams;
+		
+	protected GTBook gbook;
+	protected GTDocs gdocs;
+	
+	
+	public HtmlRender() {
+		init();
+	}
+	
+	public void init(){
+		
+	}
+	
+	
+	private List<String> getHeadInfo(Document doc, String tag){
+		List<String> metas = new ArrayList<>();
+		Elements bmetas = doc.head().select(tag);
+		if (bmetas != null)
+		for(Element e1: bmetas) {
+			//System.out.println(e1.outerHtml());
+			metas.add(e1.outerHtml());
+		}
+		return metas;
+	}
+	
+	private void appendHeadHtml(StringBuilder head,List<String> metas) {
+		//StringBuilder sb = new StringBuilder();
+		//sb.append("<head>");
+		//sb.append("</head>");
+		if (metas != null) {
+		   for(String s : metas) {
+			   head.append(s).append("\n");
+		   }
+		}
+	}
+	
+	public String getHtml(String htmlHead,String page,boolean wrapHtml){
+		StringBuilder sb = new StringBuilder();
+		//htmlr.render(nd,sb);
+		//System.out.println(sb.toString());
+		if (wrapHtml){
+			String html = htmlStart +
+					//headPlugIn +  // add your stylesheets, scripts styles etc.
+					// uncomment line below for adding style for custom embedded fonts
+					// nonLatinFonts +
+					//"<head>\n" +
+					htmlHead+"\n"+
+					//"</head>\n" +
+					"<body>\n" +
+					bodyFirst +"\n"+
+                    "<div id=\"page-container\">"+
+					page + "\n" +
+                    "</div>\n"+
+					htmlEnd;
+
+			//System.out.println(html);
+			return html;
+		}else{
+			return sb.toString();
+		}
+	}
+	
+	
+	//<div id="page-container">
+	public void parse(String fn,String charset) throws IOException {
+		Document doc = Jsoup.parse(new File(fn), charset);
+		doc.outputSettings().prettyPrint(false);
+		
+		List<String> metas = getHeadInfo(doc, "meta");
+		List<String> scripts = getHeadInfo(doc, "script");
+		List<String> links = getHeadInfo(doc, "link");
+		
+//		System.out.println(metas);
+//		System.out.println(links);
+//		System.out.println(scripts);
+	
+		Element shead = doc.head();
+		String head = shead.outerHtml();
+		
+	
+		
+		Element container = doc.getElementById("page-container");
+		
+		Elements lasts = container.previousElementSiblings();
+		
+		Elements nexts = container.nextElementSiblings();
+		
+		System.out.println(lasts.outerHtml());
+
+		System.out.println("=========");
+		System.out.println(nexts.toString());
+		
+		Elements es = container.children();
+		
+//		Elements es2 = es.clone();
+//		
+//		Element e1 = es.get(0);
+//		String h1 = e1.outerHtml();
+//		String h2 = e1.text();
+//		String h3 = e1.toString();
+//		String h4 = e1.ownText();		
+//		container.html(h1);
+
+		
+		int len = es.size();
+		//toString/outerHtml 这两个方法用来取得一个元素中的html内容
+		for (int i = 0; i < len; i++) {
+			Element e1 = es.get(i);
+			String h1 = e1.outerHtml();
+			String t1 = e1.text();
+			
+//			Elements bs2 = e1.previousElementSiblings();
+//			
+//			Elements bs3 = e1.nextElementSiblings();
+			
+			String html = getHtml(head, h1, true);
+			//System.out.println(html);
+			//System.out.println("=========");
+			List<String> ls1 = MDRegxUtil.splitByBrackets(h1);
+			//System.out.println(ls1);
+			for(String s:ls1) {
+				System.out.println(s);
+			}
+			break;
+		}
+
+		
+		
+//		for (int i = len-1; i >0; i--) {
+//			es.remove(i);			
+//		}
+//		System.out.println(es.outerHtml());
+		
+//		for (int i = 1; i < len; i++) {
+//			Element e1 = es.get(i);
+//			es.remove(i);
+//			System.out.println("=============");
+//			System.out.println(e1.toString());
+//			
+//			System.out.println("---------");
+//			System.out.println(e1.outerHtml());
+//			
+//		}
+		
+		//System.out.println(doc.outerHtml());
+		
+	}
+	
+	
+	   private static void print(String msg, Object... args) {
+	        System.out.println(String.format(msg, args));
+	    }
+
+	    private static String trim(String s, int width) {
+	        if (s.length() > width)
+	            return s.substring(0, width-1) + ".";
+	        else
+	            return s;
+	    }
+	    
+	public void renderToBook(Map<String,String> params) throws IOException{
+		
+        String imagePrefix = params.get(URL_PREFIX);
+        if (imagePrefix == null) imagePrefix = "";
+        
+		Document doc = Jsoup.parse(source, "utf-8");
+		doc.outputSettings().prettyPrint(false);
+		
+		String fn = source.getAbsolutePath();
+		//String cssFile = imagePrefix+FilenameUtils.getBaseName(fn)+".css";
+		String cssFile = imagePrefix+"p2h.css";
+		
+		//System.out.println("baseName="+FilenameUtils.getBaseName(fn));
+		//System.out.println("name="+FilenameUtils.getName(fn));
+		
+		Element container = doc.getElementById(tagPageContainer);
+		if (container == null) return;
+		Elements es = container.children();
+		if (es == null) return;
+		
+		int len = es.size();
+		
+		String head = null;
+		Element th = doc.head();
+		if (th != null) {			
+			head = th.outerHtml();
+		}
+		th.select("script").remove();
+		
+		GTNode root = GTNode.buildRoot("");
+		GTNode[] bns = new GTNode[len];
+		
+		GTBook book = new GTBook();
+		book.setRoot(root);
+		
+        if (params != null){
+        	String title = params.get(GTDoc.TITLE);
+        	book.setTitle(title);
+        	
+        	String id = params.get("id");
+        	book.setId(id);
+        	
+        	String tags = params.get(GTDoc.TAGS);
+        	if (tags != null) {
+        		book.putMeta(GTDoc.TAGS, tags);	        		
+        	}
+        	
+        	String extInfo = params.get(GTBook.SOURCE_TYPE);
+        	if (extInfo != null) {
+        		book.putMeta(GTBook.SOURCE_TYPE, extInfo);        		
+        	}
+        	
+        	extInfo = params.get(GTBook.SOURCE_PRODUCER);
+        	if (extInfo != null) {
+        		book.putMeta(GTBook.SOURCE_PRODUCER, extInfo);        		
+        	}
+        	
+        	if (head != null) {
+        		book.putMeta(GTBook.SOURCE_HTML_HEAD, head);
+        	}
+        	
+        	if (cssFile != null) {
+        		book.putMeta(GTBook.SOURCE_HTML_CSS, cssFile);
+        	}
+        }
+		
+    	this.gbook = book;
+    	this.gdocs =  new GTDocs();
+    	
+    	int baseNo = 1;
+    	int depth = 1;
+    	for (int i = 0; i < len; i++) {
+			Element e1 = es.get(i);
+			//String h1 = e1.outerHtml();
+			String t1 = e1.text();
+			
+			Elements media = e1.select("img[src]");
+			for(Element src : media) {
+//				print(" * %s: <%s> %sx%s (%s)",
+//                        src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
+//                        trim(src.attr("alt"), 20));
+				//System.out.println(src.toString());
+				String iurl = src.attr("src");
+				if (iurl != null) {
+					int i1 = iurl.indexOf('/');
+					if (i1 <0) {
+						src.attr("src", imagePrefix+iurl);
+					}
+					//System.out.println(src.attr("src"));
+				}
+			}
+			
+			
+//			Elements bs2 = e1.previousElementSiblings();
+//			Elements bs3 = e1.nextElementSiblings();
+			//String html = getHtml(head, h1, true);
+			//System.out.println(html);
+			String h1 = e1.outerHtml();
+			//System.out.println(h1);
+			//System.out.println("=========");
+			List<String> hs1 = MDRegxUtil.splitByBrackets(h1);
+			List<String> ts1 = new ArrayList<>();
+			
+			ts1.add(t1);
+			
+			//System.out.println(ls1);
+			//for(String s:hs1) {
+			//	System.out.println(s);
+			//}
+			
+			GTNode gn = GTNode.buildNode(null,i+baseNo);
+			gn.setLabel("第 "+Integer.toString(i+1)+" 页");
+			gn.setDepth(depth);
+			root.addChild(gn);
+			
+			baseNo++;
+			GTNode leaf = GTNode.buildLeaf(i+baseNo);
+			leaf.setType(GTBookConstants.HTML_PAGE);
+			leaf.setDepth(depth+1);
+			gn.addChild(leaf);
+			MDElement v = new MDElement(ts1,hs1);
+		    leaf.setValue(v);
+			
+			String s2 = StrUtil.getMeaningful(t1);
+			//System.out.println("b="+b);
+
+			if (s2.length()>1) {
+				GTDoc gdoc = new GTDoc();
+				gdoc.addField(GTDoc.PARAGRAPHID, GTDoc.PREFIX_PARAGRAPHID+gn.getId());
+				gdoc.addField(GTDoc.CONTENT, s2);
+				gdoc.addField(GTDoc.TITLE, gn.getLabel());
+				gdoc.addField(GTDoc.PATH, Integer.toString(i));
+				gdoc.addField(GTDoc.BOOKID, gbook.getId());
+
+				String tags = null;
+				if (gparams != null) {
+					tags = gparams.get(GTDoc.TAGS);
+					gdoc.addField(GTDoc.TAGS, tags);
+				}
+
+				//System.out.println(gdoc.toListString());
+				//System.out.println("=========");
+				gdocs.add(gdoc);
+			}
+		}
+		
+    	Element body = doc.body();
+    	String bh = "<body><div id=\"page-container\">"+
+				"${_page_1_}" + "\n" +
+                "</div></body>";
+    	body.html(bh);
+    	
+    	//System.out.println(doc.toString());
+    	gbook.addMeta(GTBook.SOURCE_HTML_TEMPLATE, doc.toString());
+	}
+	
+	
+	public void parse(File htmlFile,Map<String,String> params) throws IOException {
+		this.source = htmlFile;
+		this.gparams = params;
+		
+	    //Node doc = MDHelper.toMDNode(mdFile);
+        //DocTree dt = MDHelper.getDocumentTree(doc);
+        //this.tree = dt;
+        
+        renderToBook(params);
+        
+        //toGTDoc(params);
+        
+	}
+	
+	public GTBook getGBook() {
+		return gbook;
+	}
+	
+	public GTDocs getGDocs() {
+		return gdocs;
+	}
+	
+	private boolean isPageNo(String text) {
+		if (text == null || text.length()<5 || text.length()>=10) {
+			return false;
+		}
+		
+		if (text.charAt(0) == '第' && text.charAt(text.length()-1)=='页') {
+			return true;
+		}
+		return false;
+	}
+	
+	public void toTxt(File fn) throws IOException{
+		TIntArrayList path = new TIntArrayList();
+		List<String> ls = new ArrayList<>();
+
+		GTNode root = gbook.getRoot();
+		
+		IVisitor visitor = new IVisitor() {
+			@Override
+			public void visit(INode now, TIntArrayList path) {
+				if (now.isLeaf()){
+					String text = GTBookUtil.listToString(now.getText(), "\n");					
+					ls.add(text);
+				}else{
+					String text = now.getLabel();
+					if (!isPageNo(text)) {						
+						ls.add(text);
+					}
+				}
+			}
+		};
+		root.traverseDescendants(path, visitor);
+
+		BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+		try {
+			for(String s : ls){
+				s= s.replace("<br>", "\n");
+				out.write(s);
+				out.write("\n");
+			}
+		}finally {
+			if (out != null){
+				out.close();
+			}
+		}
+	}
+
+	
+	public void parse(File mdFile,Map<String,String> params,File txtFile,File bookFile,File gdocFile) throws IOException{
+		
+		log.info("start parse markdown file : "+mdFile.getAbsolutePath());		
+		parse(mdFile,params);
+       
+		log.info("start toTxt : "+txtFile.getAbsolutePath());
+		toTxt(txtFile);
+
+		log.info("start toGTBook : "+bookFile.getAbsolutePath());
+        gbook.saveZipObj(bookFile);
+        
+        log.info("start toGTDoc : "+gdocFile.getAbsolutePath());
+        gdocs.save(gdocFile);
+        
+	}
+	
+//	public static void main(String[] args) throws IOException {
+//		String fn = "D:\\data\\pdf2htmlex\\test3\\HMT-1.html";
+//
+//		Map<String,String> params = new HashMap<String, String>();
+//		
+//		params.put(GTBook.SOURCE_TYPE, "pdf");
+//		params.put(GTBook.SOURCE_PRODUCER, "pdf2htmlEX");
+//		params.put("id", "test1");
+//		params.put(GTDoc.TAGS, "IT");
+//		params.put(URL_PREFIX, "./123456789/image/");
+//		HtmlRender hr = new HtmlRender();
+//		//hr.parse(fn,"utf-8");
+//		//hr.parse(new File(fn), params);
+//		
+//		String txtFile = fn+".1.txt";
+//		String bookFile = fn+".1.gtb";
+//		String gdocFile = fn+".1.gdoc";
+//		
+//		hr.parse(new File(fn), params, new File(txtFile), new File(bookFile), new File(gdocFile));
+//		
+//	}
+}

+ 135 - 0
gtbook/src/main/java/org/cnnlp/data/html/StrUtil.java

@@ -0,0 +1,135 @@
+package org.cnnlp.data.html;
+
+import java.util.Arrays;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class StrUtil {
+
+	/**
+	 * 判断是否是中日韩文字
+	 * @param c     要判断的字符
+	 * @return      true或false
+	 */
+	private static boolean isChinese(char c) {
+	    Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
+	    if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
+	            || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
+	            || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
+	            || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
+	            || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
+	            || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
+	        return true;
+	    }
+	    return false;
+	}
+
+
+	/**
+	 * 判断是否是数字或者是英文字母
+	 * @param c
+	 * @return
+	 */
+	public static boolean isCharOrDigital(char c){
+	    if((c >='0' && c<='9')||(c >='a' && c<='z' ||  c >='A' && c<='Z')){
+	        return true;
+	    }
+	    return false;
+	}
+	
+	
+	public static boolean isMessyCode(String strName) {
+	    //去除字符串中的空格 制表符 换行 回车
+	    Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");
+	    Matcher m = p.matcher(strName);
+	    String after = m.replaceAll("");
+	    //去除字符串中的标点符号
+	    String temp = after.replaceAll("\\p{P}", "");
+	    //处理之后转换成字符数组
+	    char[] ch = temp.trim().toCharArray();
+	    for (int i = 0; i < ch.length; i++) {
+	        char c = ch[i];
+	        //判断是否是数字或者英文字符
+	        if (!isCharOrDigital(c)) {
+	            //判断是否是中日韩文
+	            if (!isChinese(c)) {
+	                //如果不是数字或者英文字符也不是中日韩文则表示是乱码返回true
+	                return true;
+	            }
+	        }
+	    }
+	    //表示不是乱码 返回false
+	    return false;
+	}
+
+	public static boolean isMeaningful(String s) {
+		if (s == null || s.length()<=0) return false;
+		char[] ch = s.toCharArray();
+		boolean ok = false;
+	    for (int i = 0; i < ch.length; i++) {
+	        char c = ch[i];
+	        //判断是否是数字或者英文字符
+	        if (isChinese(c) || isCharOrDigital(c)) {
+	        	ok = true;
+	        	break;
+	        }
+	    }
+	    return ok;
+	}
+
+	public static boolean isMeaningful(String s,double alpha) {
+		if (s == null || s.length()<=0) return false;
+		char[] ch = s.toCharArray();
+		int counter = 0;;
+	    for (int i = 0; i < ch.length; i++) {
+	        char c = ch[i];
+	        //判断是否是数字或者英文字符
+	        if (isChinese(c) || isCharOrDigital(c)) {
+	        	counter++;
+	        }
+	    }
+	    double d = counter;
+	    d = d/ch.length;
+	    if (d >= alpha) {
+	    	return true;
+	    }
+	    return false;
+	}
+
+	public static String getMeaningful(String s) {
+		if (s == null || s.length()<=0) return "";
+		char[] ch = s.toCharArray();
+		char[] ch2 = new char[ch.length];
+		int counter = 0;
+		int lastI = -1;
+	    for (int i = 0; i < ch.length; i++) {
+	        char c = ch[i];
+	        //判断是否是数字或者英文字符
+	        if (isChinese(c) || isCharOrDigital(c)) {
+	        	if (i == lastI+1) {
+	        		ch2[counter++] = c;
+	        		lastI = i;
+	        	}else {
+	        		ch2[counter++] = ' ';
+	        		ch2[counter++] = c;
+	        		lastI = i;
+	        	}
+	        }
+	    }
+	    
+	    if (counter < ch2.length) {
+	    	ch2 = Arrays.copyOf(ch2, counter);
+	    }
+	    String s2 = new String(ch2);
+	    return s2;
+	}
+	
+//	public static void main(String[] args) {
+//		String s = "󱖟󰊥 󲴏󱎵󰨆󲩔 󲕽󲚲󱷒a";
+//		//s="自由贸易试验区世纪大道";
+//		//System.out.println(isMeaningful(s));
+//		s="中国(上海)自由贸易试验区世纪大道 100 号上海环球金融中心 75 楼 75T30 室 中国(上海)自由贸易试验区商城路 618 号 北京市朝阳区安立路 66 号 4 号楼 北京市朝阳区建国门外大街 1 号 国贸大厦 2 座 27 层及 28 层 上海市黄浦区广东路 689 号 󰾪󰄁󱙫󰑝󰡒󱷒󱚙󲕽󰀐󱜂󰒰󲤖󰐺󲲴󳠉󱎵󲩵󳗿󳎚󱜂󰒰󰐺 󱕅󱰀󲅖󳗿󳎚󳠉󱰚󱝤󲴱󳗿󳎚󳠉󱡺󰾪󰷪󲩵󱴶󳒓󲲴󱎵󳗿󳎚󲩵 󱴶󲜔󱜂󰒰󱎵󲩵󳗿󳎚󰑝󳑣󱎵󳗿󳎚󱩑󲩵 Semiconductor Manufacturing International Corporation 中芯国际集成电路制造有限公司 Cricket Square, Hutchins Drive, P.O. Box 2681, Grand Cayman, KY1-1111 Cayman Islands 首次公开发行人民币普通股(A 股)股票 并在科创板上市招股说明书 (注册稿) 󰌿󰑝󱎵󲕽󱉤󲤨󳐱󱰀󰪨󲣲󰍄󲣲󱐂󲕽󱐩󱜼󰑝󰀌󱷒󲤥󰌿 󰐺󰂟󲕽󱷒󱚙󱎵󰨆󰉹󳖵󳑣󱉙󲩵󱴶󰢔󱎵󰀌󱷒󲤥󰌿󲩵 󱎵󰂟 联席保荐机构(主承销商) 联席主承销商 北京市西城区阜成门外大街 29 号 ";
+//		s="ab c";
+//		System.out.println(getMeaningful(s));
+//	}
+}

+ 1687 - 0
gtbook/src/main/java/org/cnnlp/data/md/DocRender.java

@@ -0,0 +1,1687 @@
+package org.cnnlp.data.md;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.cnnlp.data.book.*;
+import org.cnnlp.data.util.FileUtil;
+//import org.commonmark.node.Document;
+//import org.commonmark.node.Node;
+//import org.commonmark.renderer.html.HtmlRenderer;
+//import org.commonmark.renderer.text.TextContentRenderer;
+//import org.cnnlp.data.test.TitleExtract.HeadingExtension;
+import org.jsoup.Jsoup;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.vladsch.flexmark.ast.HtmlBlock;
+import com.vladsch.flexmark.ast.Paragraph;
+import com.vladsch.flexmark.ext.tables.TableBlock;
+import com.vladsch.flexmark.parser.Parser;
+import com.vladsch.flexmark.profile.pegdown.Extensions;
+import com.vladsch.flexmark.profile.pegdown.PegdownOptionsAdapter;
+import com.vladsch.flexmark.util.ast.Node;
+import com.vladsch.flexmark.util.ast.TextCollectingVisitor;
+//import com.vladsch.flexmark.util.ast.TextCollectingVisitor2;
+import com.vladsch.flexmark.util.data.DataHolder;
+import com.vladsch.flexmark.util.data.MutableDataSet;
+import com.vladsch.flexmark.util.sequence.BasedSequence;
+//import com.vladsch.flexmark.util.sequence.SegmentedSequence;
+
+import gnu.trove.TIntArrayList;
+
+public class DocRender {
+
+	private static Logger log = LoggerFactory.getLogger(DocRender.class);
+	
+    final private static DataHolder OPTIONS = PegdownOptionsAdapter.flexmarkOptions(
+            Extensions.ALL
+    );
+    static final MutableDataSet FORMAT_OPTIONS = new MutableDataSet();
+    static {
+        // copy extensions from Pegdown compatible to Formatting
+        FORMAT_OPTIONS.set(Parser.EXTENSIONS, Parser.EXTENSIONS.get(OPTIONS));
+    }
+    
+    static protected final Parser PARSER = Parser.builder(OPTIONS).build();
+   
+    // 用于解决 writeUTF(String),太长引起的exception
+    public static final int MAX_STRING_LEN = 20000;
+    
+    // 用于没有父节点的 段落
+    public static final String DEFALT_LABEL = "...";
+    
+    HtmlToPlainText formatter = null;
+    
+	int htmlLevel = 3;
+	int indexDocLevel = 6;
+	
+	//TextContentRenderer textr;
+	TextCollectingVisitor textr;
+	//HtmlRenderer htmlr;
+	
+	String fileDir = "C:/data/pdf/test/";
+	String htmlDir = "/mdhtml/";
+	String indexDir = "/gdoc/";
+	
+	String htmlBaseName = "yls";
+	String indexBaseName = "yls";
+	
+	String htmlExtName = ".html";
+	String indexExtName = ".gdoc";
+	
+	String headPlugIn = "<link rel=\"stylesheet\" type=\"text/css\" href=\"./css/document.css\" />\n" +
+	"<meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n"+
+	"<meta name=\"viewport\" content=\"width=device-width,initial-scale=1.0\">\n";
+	
+	String htmlStart = "<!DOCTYPE html>\n<html><head>\n<meta charset=\"UTF-8\">\n" ;
+	String htmlHead = "</head>\n<body>\n";
+	String htmlEnd = "</body>\n</html>";
+	
+	Writer out = null;
+	
+	protected File source;
+	protected Map<String,String> gparams;
+	
+	protected DocTree tree;
+	
+	protected GTBook gbook;
+	protected GTDocs gdocs;
+	
+	public DocRender() {
+		init();
+	}
+	
+	
+	public void init(){
+	    //HtmlRenderer renderer = HtmlRenderer.builder().escapeHtml(false).softbreak("").build();
+	    //HtmlRenderer renderer = HtmlRenderer.builder(OPTIONS).build();
+	    //this.htmlr = renderer;
+	    
+	    // stripNewlines(true) 去掉回车
+	    //TextContentRenderer text = TextContentRenderer.builder().stripNewlines(true).build();
+	    TextCollectingVisitor textCollectingVisitor = new TextCollectingVisitor();
+	    this.textr = textCollectingVisitor;
+	}
+	
+	
+	public String getHeadPlugIn() {
+		return headPlugIn;
+	}
+
+	public void setHeadPlugIn(String headPlugIn) {
+		this.headPlugIn = headPlugIn;
+	}
+	
+	//<meta http-equiv="X-UA-Compatible" content="IE=edge">
+	//  <meta name="viewport" content="width=device-width,initial-scale=1.0">
+//	public String getHtml(Node nd,boolean wrapHtml){
+//		StringBuilder sb = new StringBuilder();
+//		htmlr.render(nd,sb);
+//		//System.out.println(sb.toString());
+//		if (wrapHtml){
+//			String html = htmlStart +
+//					headPlugIn +  // add your stylesheets, scripts styles etc.
+//					// uncomment line below for adding style for custom embedded fonts
+//					// nonLatinFonts +
+//					htmlHead + sb.toString() + "\n" +
+//					htmlEnd;
+//
+//			//System.out.println(html);
+//			return html;
+//		}else{
+//			return sb.toString();
+//		}
+//	}
+	private void visit(Node nd,int depth) {
+		//BasedSequence bs1 = nd.baseSubSequence(nd.getStartOffset(), nd.getEndOffset());
+		//System.out.println(depth+"->"+bs1.toString());
+		if (nd.hasChildren()) {
+			for (Node child = nd.getFirstChild(); child != null; child = child.getNext()) {
+				//children.add(child);
+				visit(child,depth+1);
+			}  
+		}
+
+	}
+	
+	public String getMDTxt(Node nd){
+		//StringBuilder sb = new StringBuilder();
+//		htmlr.render(nd,sb);
+		//System.out.println(sb.toString());
+//		if (wrapHtml){
+//			String html = htmlStart +
+//					headPlugIn +  // add your stylesheets, scripts styles etc.
+//					// uncomment line below for adding style for custom embedded fonts
+//					// nonLatinFonts +
+//					htmlHead + sb.toString() + "\n" +
+//					htmlEnd;
+//
+//			//System.out.println(html);
+//			return html;
+//		}else{
+//			return sb.toString();
+//		}
+		//visit(nd,0);
+		BasedSequence bs1 = nd.baseSubSequence(nd.getStartOffset(), nd.getEndOffset());	
+		return bs1.toString();
+	}
+
+	
+	//private String wrapHTMLTag(String tagStart,String tagEnd,String content)	
+	//}
+
+	
+	
+	
+	public String getText(Node nd){
+		//StringBuilder sb = new StringBuilder();
+		//textr.render(nd,sb);
+		//return sb.toString();
+		return textr.collectAndGetText(nd);
+	}
+	
+//	private TreeBlock makeTreeBlock(int index,){
+//		
+//	}
+	
+	private String getUrl(int index){
+		String fn = htmlDir + htmlBaseName+String.valueOf(index)+".html";
+		return fn;
+	}
+	
+//	private List<String> getHeadlines(int i,String[] titles,int[] fathers){
+//		
+//	}
+	
+	private void writeOut(GTDoc doc) throws IOException{
+		if (out == null){
+			String fn = fileDir+indexDir + indexBaseName+"1.gdoc";
+			BufferedWriter out1 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+			this.out = out1;
+		}
+		
+		if (out != null){
+			List<String> ls = doc.toListString();
+			for(String s :ls){
+				out.write(s);
+			}
+		}
+	}
+	
+
+	/*
+	public void renderToHtml(DocTree dt) throws IOException{
+		//init();
+		
+		List<Node> ls = dt.getSource();
+		int[] depthes = dt.getDepthes();
+		int[] fathers = dt.getFathers();
+		
+		String[] titles = new String[ls.size()];
+		String[] urls = new String[ls.size()];
+		
+		int chapters = 1;
+		int beginIndex =0;
+		
+		// 生成 html 文件
+		for (int i = 0; i < depthes.length; i++) {
+			
+			if (depthes[i] == htmlLevel){
+				if (i <= beginIndex) continue;
+				
+				Document tb = new Document();
+				int startLevel = depthes[beginIndex];
+				tb.appendChild(ls.get(beginIndex));				
+				for (int j = beginIndex+1; j < i; j++) {
+					if (depthes[j]>0 && depthes[j]<= startLevel) break;
+					tb.appendChild(ls.get(j));
+				}
+				urls[beginIndex] = getUrl(chapters);
+				String html = getHtml(tb,true);
+				FileUtil.saveStringToFile(html, fileDir+urls[beginIndex]);
+				chapters++;
+				
+				beginIndex = i;
+//				Document tb = new Document();
+//				tb.appendChild(ls.get(i));
+//				int j = i+1;
+//				for (; j < fathers.length; j++) {
+//					if (depthes[j]<0 || depthes[j] > htmlLevel){
+//						tb.appendChild(ls.get(j));
+//					}else{
+//						break;
+//					}
+//					//System.out.println("i="+i+"  j="+j);
+//				}
+//				
+//				urls[i] = getUrl(chapters);
+//				String html = getHtml(tb);
+//				FileUtil.saveStringToFile(html, fileDir+urls[i]);
+//				i = j-1;
+//				chapters++;
+				//System.out.println("=================");
+			}else{
+				
+			}
+		}
+		
+		if (beginIndex < depthes.length){
+			Document tb = new Document();
+			for (int j = beginIndex; j < depthes.length; j++) {
+				tb.appendChild(ls.get(j));
+				
+			}
+			urls[beginIndex] = getUrl(chapters);
+			String html = getHtml(tb,true);
+			FileUtil.saveStringToFile(html, fileDir+urls[beginIndex]);
+		}
+	}
+	
+	public void renderToJson(DocTree dt) throws IOException{
+		//init();
+		
+		List<Node> ls = dt.getSource();
+		int[] depthes = dt.getDepthes();
+		int[] fathers = dt.getFathers();
+		
+		String[] titles = new String[ls.size()];
+		String[] urls = new String[ls.size()];
+		
+		int chapters = 1;
+		int beginIndex =0;
+		// 生成 markdown的文档结构
+		
+		JsonObject[] jos = new JsonObject[depthes.length];
+		JsonArray firstNodes = new JsonArray(); 
+		for (int i = 0; i < depthes.length; i++) {	
+			Node nd = ls.get(i);
+			if (depthes[i] >=0){
+				JsonObject obj = new JsonObject();
+				obj.put("id", i);
+				obj.put("title", stripLast(titles[i]));
+				
+				int ii1 = i;
+				while(ii1 >=0 && urls[ii1]== null){
+					ii1--;
+				}
+				obj.put("url", urls[ii1]);
+				
+				jos[i] = obj;
+				if (fathers[i]>=0){
+					JsonObject fo = jos[fathers[i]];
+					Object ch1 = fo.get("children");
+					JsonArray children = null;
+					if (ch1 == null){
+						children = new JsonArray();
+						children.add(obj);
+						fo.put("children", children);
+					}else{						
+						children = (JsonArray)ch1;
+						children.add(obj);
+					}
+				}
+				
+				if (depthes[i] == dt.getMinLevel()){
+					firstNodes.add(obj);
+				}
+			}
+		}
+		
+		JsonObject prop = new JsonObject();
+		prop.put("children", "children");
+		prop.put("title", "title");
+		
+		JsonObject ret = new JsonObject();
+		ret.put("data", firstNodes);
+		ret.put("defaultProps", prop);
+		
+		 StringWriter outJson = new StringWriter();
+		 try {
+			 ret.toJson(outJson);
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		 System.out.println(outJson.toString());
+		 String fn = fileDir+indexDir +htmlBaseName+".json";
+		 FileUtil.saveStringToFile(outJson.toString(), fn);
+		 
+		
+		if (out != null){
+			out.close();
+		}
+	}
+	
+	
+	
+	public void renderToGTDoc(DocTree dt) throws IOException{
+		//init();
+		
+		List<Node> ls = dt.getSource();
+		int[] depthes = dt.getDepthes();
+		int[] fathers = dt.getFathers();
+		
+		String[] titles = new String[ls.size()];
+		String[] urls = new String[ls.size()];
+		
+		int chapters = 1;
+		int beginIndex =0;
+		
+		// 生成 markdown文件
+		int sections = 1;
+		for (int i = 0; i < depthes.length; i++) {
+			
+			Node nd = ls.get(i);
+			Document tb = null;
+			int nextI = i;
+			int titleI = i;
+			
+			if (depthes[i] >=0){
+				titles[i] = getText(nd);
+				
+				if (depthes[i] == indexDocLevel){
+					tb = new Document();
+					//tb.appendChild(ls.get(i));
+					String title = titles[i];
+					int j = i+1;
+					for (; j < fathers.length; j++) {
+						if (depthes[j]<0 || depthes[j] > indexDocLevel){
+							tb.appendChild(ls.get(j));
+						}else{
+							break;
+						}
+					}
+					titleI = i;
+//					String text = getText(tb);
+//
+//					System.out.println("title2="+titles[i]);
+//					System.out.println("text2="+text);
+//				
+//					System.out.println("=============");
+//					sections++;
+					nextI = j-1;
+				}
+				
+			}else{
+				tb = new Document();
+				tb.appendChild(nd);
+				int j = i+1;
+				titleI = i-1;
+				for (; j < fathers.length; j++) {
+					if (depthes[j]<0 ){
+						tb.appendChild(ls.get(j));
+					}else{
+						break;
+					}
+				}
+				nextI = j-1;
+			}
+			
+			
+			if (tb != null){
+				
+				String text = getText(tb);
+
+				//System.out.println("title="+titles[titleI]);
+				//System.out.println("text="+text);
+			    //System.out.println("i="+i+"   titleI="+titleI);
+				int[] fathers1 = dt.getFather(titleI);
+				//System.out.println(Arrays.toString(fathers1));
+				//String[] hs = getHeadlines(titleI);
+				//System.out.println("=============");
+				GTDoc gdoc = new GTDoc();
+				gdoc.addField(GTDoc.TITLE, titles[titleI]);
+				gdoc.addField(GTDoc.CONTENT, text);
+				if (fathers1.length>=1){
+					gdoc.addField(GTDoc.CHAPTER, titles[fathers1[fathers1.length-1]]);
+				}
+				if (fathers1.length>=2){
+					gdoc.addField(GTDoc.SECTION, titles[fathers1[fathers1.length-2]]);
+				}
+				if (fathers1.length>=3){
+					gdoc.addField(GTDoc.PART, titles[fathers1[fathers1.length-3]]);
+				}
+				
+				int ii1 = titleI;
+				while(ii1 >=0 && urls[ii1]== null){
+					ii1--;
+				}
+				gdoc.addField(GTDoc.PATH, urls[ii1]);
+				
+				writeOut(gdoc);
+				sections++;
+				i = nextI;
+			}
+		}
+		
+	
+	}
+	*/
+	
+	
+	protected String stripLast(String s){
+		if (s == null || s.length()<1) return s;
+		if (s.endsWith("\n")){
+			s = s.substring(0,s.length()-1);
+		}
+		
+		s = s.replace("<br>", "  ");
+		return s;
+	}
+	
+	protected boolean processImageNote(List<String> hs2){
+		//[!, [C:\\Users\\kevin-pc\\Desktop\\2020\\index.png], (/images/1237763635937497090/resource/image9.png), ]
+
+		//System.out.println(txt);
+		if (hs2.get(0).equals("!") && hs2.get(1).length()>4) {
+			String s1 = hs2.get(1);
+			if (s1 != null && s1.startsWith("[") && s1.endsWith("]")) {
+				if (s1.lastIndexOf('.') >=1 || s1.startsWith("[http")) {
+					hs2.set(1, "[]");
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+	
+	protected boolean processHeadingNote(List<String> hs1){
+		//(1)朱红毛斑蛾 {#朱红毛斑蛾 .标题3}
+		//System.out.println(txt);
+	    Iterator<String> itr = hs1.iterator(); 
+	    boolean isRemoved = false;
+        while (itr.hasNext()) { 
+            String x = itr.next(); 
+    		if (x.startsWith("{#")) {
+                itr.remove();
+                isRemoved = true;
+    		}
+        } 
+  
+		return isRemoved;
+	}
+	
+	protected String processHtmlBlock(String s){
+		if (formatter == null) {			
+			formatter = new HtmlToPlainText();
+		}
+	    String converted = formatter.getPlainText(Jsoup.parse(s));
+	    return converted;
+	}
+	
+	
+//	private MDElement processNode1(Node nd1) {
+//
+//		List<String> ts2 = new ArrayList<String>();
+//		List<String> hs2 = new ArrayList<String>();
+//		//基本上就 这 三种类型
+//		//Paragraph{} /HtmlBlock{} /OrderedList{}
+//		if (nd1 instanceof HtmlBlock) {
+//			HtmlBlock p1 = (HtmlBlock)nd1;
+//			
+//			List<BasedSequence> ls1 = p1.getContentLines();
+//			//System.out.println(p1.getContentLines());
+//			//System.out.println("==");
+//			for (int i = 0; i < ls1.size(); i++) {
+//				hs2.add(ls1.get(i).toString());
+//			}
+//			String txt = processHtmlBlock(getHtml(p1,false));
+//        	ts2.add(txt);
+//        	
+//		}else if (nd1 instanceof Paragraph) {
+//			Paragraph p1 = (Paragraph)nd1;
+//			List<BasedSequence> ls1 = p1.getContentLines();
+//			//System.out.println(ls1.size());
+//			if (ls1.size() > 0) {
+//                if (ls1.get(0).startsWith("|")) {
+//                	for (int i = 0; i < ls1.size(); i++) {
+//						hs2.add(ls1.get(i).toString());
+//					}
+//                	
+//                	String txt = getText(nd1);
+//                	ts2.add(txt);
+//                }
+//			}
+//		}
+//
+//		if (ts2.size() <=0) {
+//
+//			String htm = getHtml(nd1,false);
+//			hs2 = MDRegxUtil.splitByBrackets(htm);
+//			//[!, [C:\\Users\\kevin-pc\\Desktop\\2020\\index.png], (/images/1237763635937497090/resource/image9.png), ]
+//			//System.out.println(htm);
+//			//System.out.println("=====");
+//			boolean isRepl = false;
+//			if (hs2.size()>=2) {
+//				isRepl = processImageNote(hs2);
+//			}
+//
+//			//ts2 = new ArrayList<String>();
+//			String txt = null; 
+//			if (isRepl) {
+//				String mdTxt1 = GTBookUtil.listToString(hs2, "");
+//				Node nnd1 = PARSER.parse(mdTxt1);        
+//				txt = getText(nnd1);
+//			}else {
+//				txt = getText(nd1);
+//			}
+//			ts2.add(txt);
+//
+//		}
+//		//MDElement v = new MDElement(txt,html);
+//		MDElement v = new MDElement(ts2,hs2);
+//				
+//		return v;
+//	}
+	
+	
+	protected void processNode(Node nd1,GTNode gn) {
+
+		List<String> ts2 = new ArrayList<String>();
+		List<String> hs2 = new ArrayList<String>();
+		//基本上就 这 三种类型
+		//Paragraph{} /HtmlBlock{} /OrderedList{} /TableBlock{} /HtmlInline{}/BulletList{}
+		if (nd1 instanceof HtmlBlock) {
+			HtmlBlock p1 = (HtmlBlock)nd1;
+			
+			List<BasedSequence> ls1 = p1.getContentLines();
+			//System.out.println(p1.getContentLines());
+			//System.out.println("==");
+			for (int i = 0; i < ls1.size(); i++) {
+				hs2.add(ls1.get(i).toString());
+			}
+			String txt = processHtmlBlock(getMDTxt(p1));
+        	ts2.add(txt);
+        	gn.setType(GTBookConstants.MD_HTMLBLOCK);
+        	
+		}else if (nd1 instanceof TableBlock) {
+			//Paragraph p1 = (Paragraph)nd1;
+			TableBlock p1 = (TableBlock)nd1;
+//			List<BasedSequence> ls1 = p1.getContentLines();
+//			System.out.println(ls1.size());
+//        	for (int i = 0; i < ls1.size(); i++) {
+//				hs2.add(ls1.get(i).toString());
+//			}
+			String md = getMDTxt(p1);
+			hs2.add(md);
+        	String txt = getText(nd1);
+        	ts2.add(txt);
+			
+			gn.setType(GTBookConstants.MD_TABLE);
+			
+		}else if (nd1 instanceof Paragraph) {
+			Paragraph p1 = (Paragraph)nd1;
+			String md = getMDTxt(p1);
+			//[下划线]{.underline}
+			if (md.length()> 12) {
+				String md1 = MDRegxUtil.convertUnderline(md);
+				if (md1.length() != md.length()) {
+					hs2.add(md1);
+					String txt = processHtmlBlock(md1);
+			     	ts2.add(txt);
+					gn.setType(GTBookConstants.MD_HTMLBLOCK);
+				}
+			}
+		}
+
+//	}else if (nd1 instanceof Paragraph) {
+//		Paragraph p1 = (Paragraph)nd1;
+//		List<BasedSequence> ls1 = p1.getContentLines();
+//		//System.out.println(ls1.size());
+//		if (ls1.size() > 1) {
+//            if (ls1.get(0).startsWith("|") || ls1.get(0).startsWith("+-")) {
+//            	for (int i = 0; i < ls1.size(); i++) {
+//					hs2.add(ls1.get(i).toString());
+//				}
+//            	
+//            	String txt = getText(nd1);
+//            	if (txt != null) {
+//            		txt = txt.replaceAll("\\+-", "");
+//            		txt = txt.replaceAll("-", "");
+//            		txt = txt.replaceAll("\\+=", "");
+//            		txt = txt.replaceAll("=", "");
+//            		txt = txt.replaceAll(" ", "");
+//            		txt = txt.replaceAll("\\|", " ");         
+//            	}
+//            	ts2.add(txt);
+//            	gn.setType(GTNode.TABLE);
+//            }else {
+//            	gn.setType(GTNode.PARAGRAPH);
+//            }
+//		}
+//	}
+
+
+		if (ts2.size() <=0) {
+
+			String htm = getMDTxt(nd1);
+			hs2 = MDRegxUtil.splitByBrackets(htm);
+			//[!, [C:\\Users\\kevin-pc\\Desktop\\2020\\index.png], (/images/1237763635937497090/resource/image9.png), ]
+			//System.out.println(htm);
+			//System.out.println("=====");
+			boolean isRepl = false;
+			if (hs2.size()>=2) {
+				isRepl = processImageNote(hs2);
+			}
+
+			//ts2 = new ArrayList<String>();
+			String txt = null; 
+			if (isRepl) {
+				String mdTxt1 = GTBookUtil.listToString(hs2, "");
+				Node nnd1 = PARSER.parse(mdTxt1);        
+				txt = getText(nnd1);
+			}else {
+				txt = getText(nd1);
+			}
+			ts2.add(txt);
+
+		}
+
+		//// 2020.8.26 要进行长串的处理
+		String txt = ts2.get(0);
+		if (txt.length() >MAX_STRING_LEN) {
+			List<String> ls = splitLongString(txt);
+			ts2 = ls;
+		}
+		
+		boolean isTooLong = false;
+		for (String s : hs2) {
+			if (s.length() > MAX_STRING_LEN) {
+				isTooLong = true;
+				break;
+			}
+		}
+		
+		if (isTooLong) {
+			List<String> ls = new ArrayList<>();
+			for (String s : hs2) {
+				if (s.length() > MAX_STRING_LEN) {
+					List<String> ls1 = splitLongString(s);
+					ls.addAll(ls1);
+				}else {
+					ls.add(s);
+				}
+			}
+			hs2 = ls;
+		}
+		
+		MDElement v = new MDElement(ts2,hs2);
+	    gn.setValue(v);
+
+	}
+	
+	private List<String> splitLongString(String s){
+		int maxLineLen = MAX_STRING_LEN;
+		int len = s.length();
+		int its = len/maxLineLen;
+		
+		List<String> ls = new ArrayList<>();
+		int nowOffset = 0;
+		for (int i = 0; i < its; i++) {
+			String s1 = s.substring(nowOffset,nowOffset+maxLineLen);
+			nowOffset=nowOffset+maxLineLen;
+			ls.add(s1);
+		}
+		if (len==nowOffset) {
+			
+		}else {
+			String s1 = s.substring(nowOffset);
+			ls.add(s1);
+		}
+		return ls;
+	}
+	
+	public GTBook renderToBook(DocTree dt) throws IOException{
+		//init();
+		
+		List<Node> ls = dt.getSource();
+		int[] depthes = dt.getDepthes();
+		int[] fathers = dt.getFathers();
+		
+		String[] titles = new String[ls.size()];
+		//String[] urls = new String[ls.size()];
+		//
+		//int chapters = 1;
+		//int beginIndex =0;
+		
+		GTNode root = GTNode.buildRoot("");
+		GTNode[] bns = new GTNode[depthes.length];
+		
+		GTBook book = new GTBook();
+		book.setRoot(root);
+		
+		//System.out.println("headings="+dt.getHeadings());
+		
+		if (dt.getHeadings()>0) {
+			int baseNo = 1;
+			for (int i = 0; i < depthes.length; i++) {
+
+				Node nd = ls.get(i);
+
+				if (depthes[i] >=0){
+					
+					//System.out.println(i+" "+txt1);
+					String htm1 = getMDTxt(nd);
+					List<String> hs1 = MDRegxUtil.splitByBrackets(htm1);
+				
+					boolean isRemoved = false;
+					if (hs1.size()>1) {
+						isRemoved = processHeadingNote(hs1);
+					}
+					String txt1 = null;
+					if (isRemoved) {
+						String mdTxt1 = GTBookUtil.listToString(hs1, "");
+						Node nnd1 = PARSER.parse(mdTxt1);        
+						txt1 = getText(nnd1);
+						//System.out.println(txt1);
+					}else {
+						txt1 = getText(nd);
+					}
+					
+					List<String> ts1 = new ArrayList<String>();
+					ts1.add(txt1);
+					
+					//MDElement v1 = new MDElement(txt1,htm1);
+					MDElement v1 = new MDElement(ts1,hs1);
+					titles[i] = stripLast(txt1);
+					GTNode gn = GTNode.buildNode(null,i+baseNo);
+					gn.setLabel(titles[i]);
+					gn.setDepth(depthes[i]);
+					gn.setValue(v1);
+                    gn.setType(GTBookConstants.MD_HEADING);
+					bns[i] = gn;
+
+					if (fathers[i]>=0){
+						bns[fathers[i]].addChild(gn);
+					}else{
+						root.addChild(gn);
+					}
+
+					int j = i+1;
+					for (; j < fathers.length; j++) {
+						if (depthes[j]<0 ){
+							//基本上就 这 三种类型
+							//Paragraph{} /HtmlBlock{} /OrderedList{}
+							Node nd1 = ls.get(j);
+							// 先进行类型检测
+							//GTNode.buildLeaf(v,j+1);
+							//leaf.setDepth(depthes[i]+1);
+							
+							//MDElement v = processNode1(nd1);
+							GTNode leaf = GTNode.buildLeaf(j+baseNo);
+							leaf.setDepth(depthes[i]+1);
+							processNode(nd1,leaf);
+							
+							gn.addChild(leaf);
+						}else{
+							break;
+						}
+					}
+
+					i = j-1;
+				}else {
+					
+					// 叶子节点,没有标题
+					
+					int nowDepth = root.getDepth()+1;
+					MDElement v1 = new MDElement("","");
+					GTNode gn = GTNode.buildNode(i+1);
+					gn.setDepth(nowDepth);
+					gn.setLabel(DEFALT_LABEL);
+					gn.setType(GTBookConstants.MD_NO_HEADING);
+					gn.setValue(v1);
+					
+					
+					if (fathers[i]>=0){
+						bns[fathers[i]].addChild(gn);
+					}else{
+						root.addChild(gn);
+					}
+					
+					baseNo++;
+					int j = i;
+					for (; j < fathers.length; j++) {
+						if (depthes[j]<0 ){
+							//基本上就 这 三种类型
+							//Paragraph{} /HtmlBlock{} /OrderedList{}
+							Node nd1 = ls.get(j);
+							// 先进行类型检测
+							//GTNode.buildLeaf(v,j+1);
+							//leaf.setDepth(depthes[i]+1);
+							
+							//MDElement v = processNode1(nd1);
+							GTNode leaf = GTNode.buildLeaf(j+baseNo);
+							leaf.setDepth(nowDepth+1);
+							processNode(nd1,leaf);
+							
+							gn.addChild(leaf);
+						}else{
+							break;
+						}
+					}
+
+					i = j-1;
+				}
+			}
+		}else {
+			//没有 章节
+			GTNode ch1 = GTNode.buildNode(null,1);
+			ch1.setLabel("全书");
+			
+			root.addChild(ch1);
+			for (int i = 0; i < depthes.length; i++) {
+				Node nd = ls.get(i);
+				//String txt1 = getText(nd);
+//				String htm1 = getHtml(nd,false);
+//				
+//				List<String> hs2 = MDRegxUtil.splitByBrackets(htm1);
+//				boolean isRepl = false;
+//				if (hs2.size()>=2) {
+//					isRepl = processImageNote(hs2);
+//				}
+//				
+//				
+//				List<String> ts2 = new ArrayList<String>();
+//				String txt1 = null;
+//				if (isRepl) {
+//					String mdTxt1 = GTBookUtil.listToString(hs2, "");
+//					Node document = PARSER.parse(mdTxt1);        
+//					txt1 = getText(document);
+//				}else {
+//					txt1 = getText(nd);
+//				}
+//				ts2.add(txt1);
+				
+				//MDElement v = processNode1(nd);
+				GTNode gn = GTNode.buildLeaf(i+2);
+				processNode(nd,gn);
+				Object obj = gn.getValue();
+				if (obj != null) {
+					MDElement e = (MDElement)obj;
+					List<String> ts=e.getText();
+					String txt1 = null;
+					if (ts != null && ts.size()>0) {
+						txt1 = ts.get(0);
+					}
+					titles[i] = stripLast(txt1);
+					gn.setLabel(getSub(titles[i]));
+					//gn.setDepth(depthes[i]);
+					gn.setDepth(1);
+
+				}
+				ch1.addChild(gn);
+
+			}
+		}
+		return book;
+	}
+	
+
+	protected String getSub(String text) {
+		if (text == null || text.length()< 10) {
+			return text;
+		}else {
+			return text.substring(0,10);
+		}
+	}
+	
+	
+//	private void renderToBook() throws IOException{
+//		//init();
+//		
+//		DocTree dt = this.tree;
+//		List<Node> ls = dt.getSource();
+//		int[] depthes = dt.getDepthes();
+//		int[] fathers = dt.getFathers();
+//		
+//		String[] titles = new String[ls.size()];
+//		//String[] urls = new String[ls.size()];
+//		//
+//		//int chapters = 1;
+//		//int beginIndex =0;
+//		
+//		GTNode root = GTNode.buildRoot("");
+//		GTNode[] bns = new GTNode[depthes.length];
+//		
+//		GTBook book = new GTBook();
+//		book.setRoot(root);
+//		
+//		//System.out.println("headings="+dt.getHeadings());
+//		
+//		if (dt.getHeadings()>0) {
+//			int baseNo = 1;
+//			for (int i = 0; i < depthes.length; i++) {
+//
+//				Node nd = ls.get(i);
+//
+//				if (depthes[i] >=0){
+//					
+//					//System.out.println(i+" "+txt1);
+//					String htm1 = getMDTxt(nd);
+//					List<String> hs1 = MDRegxUtil.splitByBrackets(htm1);
+//				
+//					boolean isRemoved = false;
+//					if (hs1.size()>1) {
+//						isRemoved = processHeadingNote(hs1);
+//					}
+//					String txt1 = null;
+//					if (isRemoved) {
+//						String mdTxt1 = GTBookUtil.listToString(hs1, "");
+//						Node nnd1 = PARSER.parse(mdTxt1);        
+//						txt1 = getText(nnd1);
+//						//System.out.println(txt1);
+//					}else {
+//						txt1 = getText(nd);
+//					}
+//					
+//					List<String> ts1 = new ArrayList<String>();
+//					ts1.add(txt1);
+//					
+//					//MDElement v1 = new MDElement(txt1,htm1);
+//					MDElement v1 = new MDElement(ts1,hs1);
+//					titles[i] = stripLast(txt1);
+//					GTNode gn = GTNode.buildNode(null,i+baseNo);
+//					gn.setLabel(titles[i]);
+//					gn.setDepth(depthes[i]);
+//					gn.setValue(v1);
+//                    gn.setType(GTNode.MD_HEADING);
+//					bns[i] = gn;
+//
+//					if (fathers[i]>=0){
+//						bns[fathers[i]].addChild(gn);
+//					}else{
+//						root.addChild(gn);
+//					}
+//
+//					int j = i+1;
+//					for (; j < fathers.length; j++) {
+//						if (depthes[j]<0 ){
+//							//基本上就 这 三种类型
+//							//Paragraph{} /HtmlBlock{} /OrderedList{}
+//							Node nd1 = ls.get(j);
+//							// 先进行类型检测
+//							//GTNode.buildLeaf(v,j+1);
+//							//leaf.setDepth(depthes[i]+1);
+//							
+//							//MDElement v = processNode1(nd1);
+//							GTNode leaf = GTNode.buildLeaf(j+baseNo);
+//							leaf.setDepth(depthes[i]+1);
+//							processNode(nd1,leaf);
+//							
+//							gn.addChild(leaf);
+//						}else{
+//							break;
+//						}
+//					}
+//
+//					i = j-1;
+//				}else {
+//					
+//					// 叶子节点,没有标题
+//					
+//					int nowDepth = root.getDepth()+1;
+//					MDElement v1 = new MDElement("","");
+//					GTNode gn = GTNode.buildNode(i+1);
+//					gn.setDepth(nowDepth);
+//					gn.setLabel(DEFALT_LABEL);
+//					gn.setType(GTNode.MD_NO_HEADING);
+//					gn.setValue(v1);
+//					bns[i] = gn;
+//					
+//					if (fathers[i]>=0){
+//						bns[fathers[i]].addChild(gn);
+//					}else{
+//						root.addChild(gn);
+//					}
+//					
+//					baseNo++;
+//					int j = i;
+//					for (; j < fathers.length; j++) {
+//						if (depthes[j]<0 ){
+//							//基本上就 这 三种类型
+//							//Paragraph{} /HtmlBlock{} /OrderedList{}
+//							Node nd1 = ls.get(j);
+//							// 先进行类型检测
+//							//GTNode.buildLeaf(v,j+1);
+//							//leaf.setDepth(depthes[i]+1);
+//							
+//							//MDElement v = processNode1(nd1);
+//							GTNode leaf = GTNode.buildLeaf(j+baseNo);
+//							leaf.setDepth(nowDepth+1);
+//							processNode(nd1,leaf);
+//							
+//							gn.addChild(leaf);
+//						}else{
+//							break;
+//						}
+//					}
+//
+//					i = j-1;
+//				}
+//			}
+//		}else {
+//			//没有 章节
+//			GTNode ch1 = GTNode.buildNode(null,1);
+//			ch1.setLabel("全书");
+//			
+//			root.addChild(ch1);
+//			for (int i = 0; i < depthes.length; i++) {
+//				Node nd = ls.get(i);
+//				//String txt1 = getText(nd);
+////				String htm1 = getHtml(nd,false);
+////				
+////				List<String> hs2 = MDRegxUtil.splitByBrackets(htm1);
+////				boolean isRepl = false;
+////				if (hs2.size()>=2) {
+////					isRepl = processImageNote(hs2);
+////				}
+////				
+////				
+////				List<String> ts2 = new ArrayList<String>();
+////				String txt1 = null;
+////				if (isRepl) {
+////					String mdTxt1 = GTBookUtil.listToString(hs2, "");
+////					Node document = PARSER.parse(mdTxt1);        
+////					txt1 = getText(document);
+////				}else {
+////					txt1 = getText(nd);
+////				}
+////				ts2.add(txt1);
+//				
+//				//MDElement v = processNode1(nd);
+//				GTNode gn = GTNode.buildLeaf(i+2);
+//				processNode(nd,gn);
+//				Object obj = gn.getValue();
+//				if (obj != null) {
+//					MDElement e = (MDElement)obj;
+//					List<String> ts=e.getText();
+//					String txt1 = null;
+//					if (ts != null && ts.size()>0) {
+//						txt1 = ts.get(0);
+//					}
+//					titles[i] = stripLast(txt1);
+//					gn.setLabel(getSub(titles[i]));
+//					//gn.setDepth(depthes[i]);
+//					gn.setDepth(1);
+//
+//				}
+//				ch1.addChild(gn);
+//
+//			}
+//		}
+//		
+//		this.gbook = book;
+//	}
+	
+	
+	//private void toGTDoc(GTBook book,File fn,Map<String,String> params) throws IOException{
+//	private void toGTDoc(Map<String,String> params) throws IOException{
+//		GTBook book = this.gbook;
+//		String tags = null;
+//		if (params != null) {
+//			tags = params.get(GTDoc.TAGS);
+//		}
+//		TIntArrayList path = new TIntArrayList();
+//
+//		INode[] fathers = new INode[256];
+//
+//		List<GTDoc> docs = new ArrayList<>();
+//
+//		final int[] counter = new int[1];
+//		counter[0] = 0;
+//
+//		GTNode root = book.getRoot();
+//		final String id = book.getId();
+//		final String tags1 = tags;
+//		IVisitor visitor = new IVisitor() {
+//			@Override
+//			public void visit(INode now, TIntArrayList path) {
+//				if (path.size()>=1){
+//					fathers[path.size()-1] = now;
+//				}else{
+//					fathers[0] = now;                	
+//				}
+//				if (now.isLeaf()){
+//
+//					counter[0]++;
+//					int titleI = path.size()-2;
+//					String text = GTBookUtil.listToString(now.getText(), "\n");
+//					GTDoc gdoc = new GTDoc();
+//					gdoc.addField(GTDoc.PARAGRAPHID, GTDoc.PREFIX_PARAGRAPHID+now.getId());
+//					gdoc.addField(GTDoc.CONTENT, text);
+//
+//					if (titleI >=0){
+//						gdoc.addField(GTDoc.TITLE, fathers[titleI].getLabel());						
+//					}
+//
+//					if (titleI >=1){
+//						gdoc.addField(GTDoc.CHAPTER, fathers[0].getLabel());						
+//					}
+//					if (titleI >=2){
+//						gdoc.addField(GTDoc.SECTION, fathers[1].getLabel());						
+//					}
+//					if (titleI >=3){
+//						gdoc.addField(GTDoc.PART, fathers[2].getLabel());						
+//					}
+//
+//					path.remove(path.size()-1);
+//					TIntArrayList path2 = book.getRelativePath(path);
+//					String url =book.getPath(path2);
+//
+//					gdoc.addField(GTDoc.PATH, url);
+//					gdoc.addField(GTDoc.BOOKID, id);
+//
+//					if (tags1 != null) {
+//						gdoc.addField(GTDoc.TAGS, tags1);
+//					}
+//					docs.add(gdoc);
+//
+//				}
+//			}
+//		};
+//
+//		root.traverseDescendants(path, visitor);
+//
+//		if (counter[0] == 0) {
+//			//System.out.println("没有叶子");
+//			List<INode> ls = root.getChildren();
+//			TIntArrayList path2 = new TIntArrayList();
+//			for (int i = 0; i < ls.size(); i++) {
+//				INode now = ls.get(i);
+//				String text = GTBookUtil.listToString(now.getText(), "\n");
+//				GTDoc gdoc = new GTDoc();
+//				gdoc.addField(GTDoc.PARAGRAPHID, GTDoc.PREFIX_PARAGRAPHID+now.getId());
+//				gdoc.addField(GTDoc.CONTENT, text);
+//				gdoc.addField(GTDoc.TITLE, now.getLabel());
+//
+//				path2.setQuick(0, i);
+//				String url =book.getPath(path2);
+//
+//				gdoc.addField(GTDoc.PATH, url);
+//				gdoc.addField(GTDoc.BOOKID, id);
+//
+//				if (tags1 != null) {
+//					gdoc.addField(GTDoc.TAGS, tags1);
+//				}
+//				docs.add(gdoc);
+//			}
+//		}
+//
+//		
+//		gdocs = new GTDocs();
+//		gdocs.setDocs(docs);
+//		//System.out.println("ls_ize="+ls.size());
+//
+//		//writeOut(gdoc);
+//		
+////		BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+////
+////		try {
+////			for(GTDoc gdoc : docs){
+////				List<String> ls = gdoc.toListString();
+////				for(String s :ls){
+////					out.write(s);
+////				}
+////				out.write("\n");
+////			}
+////		}finally {
+////			if (out != null){
+////				out.close();
+////			}
+////		}
+//
+//	}
+	
+	
+	protected GTDoc nodeToGDoc(GTNode now, TIntArrayList path,GTNode[] bns) {
+		
+		String text = GTBookUtil.listToString(now.getText(), "\n");
+		GTDoc gdoc = new GTDoc();
+		gdoc.addField(GTDoc.PARAGRAPHID, GTDoc.PREFIX_PARAGRAPHID+now.getId());
+		gdoc.addField(GTDoc.CONTENT, text);
+
+		//bns[titleI].getLabel()
+//		if (titleI >=0){
+//			gdoc.addField(GTDoc.TITLE, fathers[titleI].getLabel());						
+//		}
+//
+//		if (titleI >=1){
+//			gdoc.addField(GTDoc.CHAPTER, fathers[0].getLabel());						
+//		}
+//		if (titleI >=2){
+//			gdoc.addField(GTDoc.SECTION, fathers[1].getLabel());						
+//		}
+//		if (titleI >=3){
+//			gdoc.addField(GTDoc.PART, fathers[2].getLabel());						
+//		}
+
+		int titleI = path.size()-1;
+		
+		INode fn = null;
+		if (titleI >=0){
+			fn = gbook.getAbsoluteNode(path);  //bns[path.getQuick(titleI)]
+			gdoc.addField(GTDoc.TITLE, fn.getLabel());		
+		}
+
+		if (titleI >=1){
+			fn = gbook.getAbsoluteNode(gbook.getRoot(), path.getQuick(0));
+			gdoc.addField(GTDoc.CHAPTER, fn.getLabel());						
+		}
+		if (titleI >=2){
+			fn = gbook.getAbsoluteNode(fn, path.getQuick(1));
+			gdoc.addField(GTDoc.SECTION, fn.getLabel());						
+		}
+		if (titleI >=3){
+			fn = gbook.getAbsoluteNode(fn, path.getQuick(2));
+			gdoc.addField(GTDoc.PART, fn.getLabel());						
+		}
+
+		//path.remove(path.size()-1);
+		
+		TIntArrayList path2 = gbook.getRelativePath(path);
+		String url =gbook.getPath(path2);
+
+		gdoc.addField(GTDoc.PATH, url);
+		gdoc.addField(GTDoc.BOOKID, gbook.getId());
+
+		String tags = null;
+		if (gparams != null) {
+			tags = gparams.get(GTDoc.TAGS);
+			if (tags != null && tags.length()>0) {
+				gdoc.addField(GTDoc.TAGS, tags);
+			}
+		}
+		
+		//System.out.println(gdoc.toListString());
+		return gdoc;
+	}
+	
+	
+	public void renderToBook(Map<String,String> params) throws IOException{
+		//init();
+		DocTree dt = this.tree;
+		List<Node> ls = dt.getSource();
+		int[] depthes = dt.getDepthes();
+		int[] fathers = dt.getFathers();
+		
+		String[] titles = new String[ls.size()];
+		
+		GTNode root = GTNode.buildRoot("");
+		GTNode[] bns = new GTNode[depthes.length];
+		
+		GTBook book = new GTBook();
+		book.setRoot(root);
+		
+        if (params != null){
+        	String title = params.get("title");
+        	book.setTitle(title);
+        	
+        	String id = params.get("id");
+        	book.setId(id);
+        	
+        	String tags = params.get("tags");
+        	if (tags != null) {
+        		book.putMeta("tags", tags);	        		
+        	}
+        }
+		
+    	this.gbook = book;
+		//System.out.println("headings="+dt.getHeadings());
+
+		// 以下是 GTDoc 部分
+    	// 用于GTBook中的 路径
+    	TIntArrayList[] pathes = new TIntArrayList[fathers.length];
+        
+        this.gdocs =  new GTDocs();
+        
+		if (dt.getHeadings()>0) {
+			int baseNo = 1;
+			for (int i = 0; i < depthes.length; i++) {
+
+				Node nd = ls.get(i);
+
+				if (depthes[i] >=0){
+					
+					//System.out.println(i+" "+txt1);
+					String htm1 = getMDTxt(nd);
+					List<String> hs1 = MDRegxUtil.splitByBrackets(htm1);
+				
+					boolean isRemoved = false;
+					if (hs1.size()>1) {
+						isRemoved = processHeadingNote(hs1);
+					}
+					String txt1 = null;
+					if (isRemoved) {
+						String mdTxt1 = GTBookUtil.listToString(hs1, "");
+						Node nnd1 = PARSER.parse(mdTxt1);        
+						txt1 = getText(nnd1);
+						//System.out.println(txt1);
+					}else {
+						txt1 = getText(nd);
+					}
+					
+					List<String> ts1 = new ArrayList<String>();
+					ts1.add(txt1);
+					
+					//MDElement v1 = new MDElement(txt1,htm1);
+					MDElement v1 = new MDElement(ts1,hs1);
+					titles[i] = stripLast(txt1);
+					GTNode gn = GTNode.buildNode(null,i+baseNo);
+					gn.setLabel(titles[i]);
+					gn.setDepth(depthes[i]);
+					gn.setValue(v1);
+                    gn.setType(GTBookConstants.MD_HEADING);
+					bns[i] = gn;
+					TIntArrayList path = null; 
+					if (fathers[i]>=0){
+						
+						bns[fathers[i]].addChild(gn);
+						int i1 =  bns[fathers[i]].size()-1;
+						TIntArrayList fp = pathes[fathers[i]];
+						path = (TIntArrayList)fp.clone();
+						path.add(i1);
+						pathes[i] = path;
+						
+						//path = getPath(fathers,i);
+					}else{
+						root.addChild(gn);
+						int i1 =  root.size()-1;
+						path = new TIntArrayList();
+						path.add(i1);
+						pathes[i] = path;
+					}
+
+					
+					int j = i+1;
+					for (; j < fathers.length; j++) {
+						if (depthes[j]<0 ){
+							//基本上就 这 三种类型
+							//Paragraph{} /HtmlBlock{} /OrderedList{}
+							Node nd1 = ls.get(j);
+							// 先进行类型检测
+							//GTNode.buildLeaf(v,j+1);
+							//leaf.setDepth(depthes[i]+1);
+							
+							//MDElement v = processNode1(nd1);
+							GTNode leaf = GTNode.buildLeaf(j+baseNo);
+							leaf.setDepth(depthes[i]+1);
+							processNode(nd1,leaf);
+							
+							gn.addChild(leaf);
+							
+							GTDoc gdoc = nodeToGDoc(leaf, path,bns);
+							gdocs.add(gdoc);
+						}else{
+							break;
+						}
+					}
+
+					i = j-1;
+				}else {
+					
+					// 叶子节点,没有标题
+					
+					int nowDepth = root.getDepth()+1;
+					MDElement v1 = new MDElement("","");
+					GTNode gn = GTNode.buildNode(i+baseNo);
+					gn.setDepth(nowDepth);
+					gn.setLabel(DEFALT_LABEL);
+					gn.setType(GTBookConstants.MD_NO_HEADING);
+					gn.setValue(v1);
+					bns[i] = gn;
+					TIntArrayList path = null; 
+					if (fathers[i]>=0){
+						bns[fathers[i]].addChild(gn);
+						int i1 =  bns[fathers[i]].size()-1;
+						TIntArrayList fp = pathes[fathers[i]];
+						path = (TIntArrayList)fp.clone();
+						path.add(i1);
+						pathes[i] = path;
+					}else{
+						root.addChild(gn);
+						int i1 =  root.size()-1;
+						path = new TIntArrayList();
+						path.add(i1);
+						pathes[i] = path;
+					}
+					
+					baseNo++;
+					int j = i;
+					for (; j < fathers.length; j++) {
+						if (depthes[j]<0 ){
+							//基本上就 这 三种类型
+							//Paragraph{} /HtmlBlock{} /OrderedList{}
+							Node nd1 = ls.get(j);
+							// 先进行类型检测
+							//GTNode.buildLeaf(v,j+1);
+							//leaf.setDepth(depthes[i]+1);
+							
+							//MDElement v = processNode1(nd1);
+							GTNode leaf = GTNode.buildLeaf(j+baseNo);
+							leaf.setDepth(nowDepth+1);
+							processNode(nd1,leaf);
+							
+							gn.addChild(leaf);
+							
+							GTDoc gdoc = nodeToGDoc(leaf, path,bns);
+							gdocs.add(gdoc);
+						}else{
+							break;
+						}
+					}
+
+					i = j-1;
+				}
+			}
+		}else {
+			//没有 章节
+			GTNode ch1 = GTNode.buildNode(null,1);
+			ch1.setLabel("全书");
+			
+			root.addChild(ch1);
+			TIntArrayList path = new TIntArrayList();
+			int i1 =  root.size()-1;
+			path.add(i1);
+			
+			for (int i = 0; i < depthes.length; i++) {
+				Node nd = ls.get(i);
+				
+				//MDElement v = processNode1(nd);
+				GTNode gn = GTNode.buildLeaf(i+2);
+				processNode(nd,gn);
+				Object obj = gn.getValue();
+				if (obj != null) {
+					MDElement e = (MDElement)obj;
+					List<String> ts=e.getText();
+					String txt1 = null;
+					if (ts != null && ts.size()>0) {
+						txt1 = ts.get(0);
+					}
+					titles[i] = stripLast(txt1);
+					gn.setLabel(getSub(titles[i]));
+					//gn.setDepth(depthes[i]);
+					gn.setDepth(1);
+
+				}
+				ch1.addChild(gn);
+				GTDoc gdoc = nodeToGDoc(gn, path,bns);
+				gdocs.add(gdoc);
+			}
+		}
+	
+	}
+	
+	public void parse(File mdFile,Map<String,String> params,File txtFile,File bookFile,File gdocFile,File bookToMdFile) throws IOException{
+		log.info("start parse markdown file : "+mdFile.getAbsolutePath());		
+		parse(mdFile,params);
+       
+		log.info("start toTxt : "+txtFile.getAbsolutePath());
+		toTxt(txtFile);
+
+		log.info("start toGTBook : "+bookFile.getAbsolutePath());
+        gbook.saveZipObj(bookFile);
+        
+        log.info("start toGTDoc : "+gdocFile.getAbsolutePath());
+        gdocs.save(gdocFile);
+        
+        if (bookToMdFile != null) {
+            log.info("start book to md file : "+bookToMdFile.getAbsolutePath());
+            gbook.toMd(bookToMdFile);
+        }
+	}
+	
+	public void parse(File mdFile,Map<String,String> params,File txtFile,File bookFile,File gdocFile) throws IOException{
+		parse(mdFile, params, txtFile, bookFile, gdocFile, null);
+	}
+	
+	public void toTxt(File fn) throws IOException{
+		TIntArrayList path = new TIntArrayList();
+		List<String> ls = new ArrayList<>();
+
+		GTNode root = gbook.getRoot();
+		
+		IVisitor visitor = new IVisitor() {
+			@Override
+			public void visit(INode now, TIntArrayList path) {
+				
+				if (now.isLeaf()){
+					String text = GTBookUtil.listToString(now.getText(), "\n");					
+					ls.add(text);
+				}else{
+					String text = now.getLabel();
+					ls.add(text);
+				}
+			}
+		};
+		root.traverseDescendants(path, visitor);
+
+		BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+		try {
+			for(String s : ls){
+				s= s.replace("<br>", "\n");
+				out.write(s);
+				out.write("\n");
+			}
+		}finally {
+			if (out != null){
+				out.close();
+			}
+		}
+	}
+
+	
+//	public static void toGTBook(File mdFile,String charSet,File gtFile,Map<String,String> params) throws IOException{
+//
+//		Parser parser = Parser.builder().build();
+//		String s = FileUtil.getStringFromFile(mdFile, charSet);
+//
+//		//s = CRLFProcess(s);
+//		// 去除图形的{width="
+//		String content = MDHelper.stripImageBorder(s);
+//
+//		if (params != null) {
+//			String fromUrl = params.get("fromurl");
+//			String toUrl = params.get("tourl");
+//			if (fromUrl != null) {
+//				if (toUrl == null) {
+//					toUrl = "";
+//				}
+//				MDUrlProcessor mdp = new MDUrlProcessor();
+//				content = mdp.replaceUrl(content, fromUrl, toUrl);
+//			}
+//		}
+//
+//		//System.out.println(content);
+//		//String content = s;
+////	       Node doc;
+////	       try (StringReader reader = new StringReader(content)) {
+////                doc = parser.parseReader(reader);
+////           }
+//
+//		Node doc = MDHelper.toMDNode(content);
+//		DocTree dt = MDHelper.getDocumentTree(doc);
+//		//dt.outDebugInfo();
+//
+//		DocRender dr = new DocRender();
+//		dr.init();
+//		GTBook book = dr.renderToBook(dt);
+//		//input1.close();
+//
+//		if (params != null){
+//			String title = params.get("title");
+//			book.setTitle(title);
+//
+//			String id = params.get("id");
+//			book.setId(id);
+//
+//			String tags = params.get("tags");
+//			if (tags != null) {
+//				book.putMeta("tags", tags);
+//			}
+//		}
+//
+////	        String s = PrettyPrinter.prettyPrint(book.getRoot());
+////	        System.out.println(s);
+//
+//		//book.saveObj(gtFile);
+//		book.saveZipObj(gtFile);
+////	        System.out.println("======");
+////	        System.out.println(book.getHtml("2-1"));
+//	}
+	
+	
+	public void parse(File mdFile,Map<String,String> params) throws IOException {
+		this.source = mdFile;
+		this.gparams = params;
+		
+		String charSet = params.get("charset");
+		String s = FileUtil.getStringFromFile(mdFile, charSet);
+
+		//s = CRLFProcess(s);
+		// 去除图形的{width="
+		String content = MDHelper.stripImageBorder(s);
+
+		if (params != null) {
+			String fromUrl = params.get("fromurl");
+			String toUrl = params.get("tourl");
+			if (fromUrl != null) {
+				if (toUrl == null) {
+					toUrl = "";
+				}
+//				MDUrlProcessor mdp = new MDUrlProcessor();
+//				content = mdp.replaceUrl(content, fromUrl, toUrl);
+				content = MDUrlProcessor.replaceUrl(content, fromUrl, toUrl);
+			}
+		}
+		
+		
+	    //Node doc = MDHelper.toMDNode(mdFile);
+		Node doc = MDHelper.toMDNode(content);
+        DocTree dt = MDHelper.getDocumentTree(doc);
+        this.tree = dt;
+        
+        renderToBook(params);
+        
+        //toGTDoc(params);
+        
+	}
+	
+	public GTBook getGBook() {
+		return gbook;
+	}
+	
+	public GTDocs getGDocs() {
+		return gdocs;
+	}
+	
+	
+}

+ 1018 - 0
gtbook/src/main/java/org/cnnlp/data/md/DocRender2.java

@@ -0,0 +1,1018 @@
+package org.cnnlp.data.md;
+
+import com.vladsch.flexmark.ast.*;
+import com.vladsch.flexmark.ext.obs.comments.Comments;
+import com.vladsch.flexmark.ext.tables.TableBlock;
+import com.vladsch.flexmark.ext.yaml.front.matter.AbstractYamlFrontMatterVisitor;
+import com.vladsch.flexmark.ext.yaml.front.matter.YamlFrontMatterBlock;
+import com.vladsch.flexmark.util.ast.Node;
+import com.vladsch.flexmark.util.ast.TextCollectingVisitor;
+import com.vladsch.flexmark.util.collection.iteration.ReversiblePeekingIterable;
+import com.vladsch.flexmark.util.sequence.BasedSequence;
+import gnu.trove.TIntArrayList;
+import org.cnnlp.data.book.*;
+import org.cnnlp.data.splitter.SplitUtils;
+import org.cnnlp.data.util.FileUtil;
+import org.cnnlp.data.util.SenUtil;
+import org.jsoup.Jsoup;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+
+
+/*
+public com.vladsch.flexmark.ast.Document parse(String input)
+Parse the specified input text into a tree of nodes.
+Note that this method is thread-safe (a new parser state is used for each invocation).
+ */
+
+// 和DocRender.java相比,增加了Comments和元数据的处理
+// 以及 DEFALT_LABEL = "";
+public class DocRender2 {
+
+    private static Logger log = LoggerFactory.getLogger(DocRender2.class);
+
+//    final private static DataHolder OPTIONS = PegdownOptionsAdapter.flexmarkOptions(
+//            Extensions.ALL
+//    );
+//    static final MutableDataSet FORMAT_OPTIONS = new MutableDataSet();
+//    static {
+//        // copy extensions from Pegdown compatible to Formatting
+//        FORMAT_OPTIONS.set(Parser.EXTENSIONS, Parser.EXTENSIONS.get(OPTIONS));
+//    }
+//
+//    static protected final Parser PARSER = Parser.builder(OPTIONS).build();
+
+    // 用于解决 writeUTF(String),太长引起的exception
+    public static final int MAX_STRING_LEN = 20000;
+
+    // 用于没有父节点的 段落
+    //public static final String DEFALT_LABEL = "...";
+    public static final String DEFALT_LABEL = "";
+    private String DEFALT_ROOT_LABEL = "";//"全_书";
+    HtmlToPlainText formatter = null;
+
+    int htmlLevel = 3;
+    int indexDocLevel = 6;
+
+    //TextContentRenderer textr;
+    TextCollectingVisitor textr;
+    //HtmlRenderer htmlr;
+
+    String fileDir = "C:/data/pdf/test/";
+    String htmlDir = "/mdhtml/";
+    String indexDir = "/gdoc/";
+
+    String htmlBaseName = "yls";
+    String indexBaseName = "yls";
+
+    String htmlExtName = ".html";
+    String indexExtName = ".gdoc";
+
+    String headPlugIn = "<link rel=\"stylesheet\" type=\"text/css\" href=\"./css/document.css\" />\n" +
+            "<meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n" +
+            "<meta name=\"viewport\" content=\"width=device-width,initial-scale=1.0\">\n";
+
+    String htmlStart = "<!DOCTYPE html>\n<html><head>\n<meta charset=\"UTF-8\">\n";
+    String htmlHead = "</head>\n<body>\n";
+    String htmlEnd = "</body>\n</html>";
+
+    Writer out = null;
+
+    protected File source;
+    protected Map<String, Object> gparams;
+
+    protected DocTree tree;
+
+    protected GTBook gbook;
+
+    protected MDElement faqComments;
+    public DocRender2() {
+        init();
+    }
+
+
+    public void init() {
+        //HtmlRenderer renderer = HtmlRenderer.builder().escapeHtml(false).softbreak("").build();
+        //HtmlRenderer renderer = HtmlRenderer.builder(OPTIONS).build();
+        //this.htmlr = renderer;
+
+        // stripNewlines(true) 去掉回车
+        //TextContentRenderer text = TextContentRenderer.builder().stripNewlines(true).build();
+        TextCollectingVisitor textCollectingVisitor = new TextCollectingVisitor();
+        this.textr = textCollectingVisitor;
+    }
+
+
+    public String getHeadPlugIn() {
+        return headPlugIn;
+    }
+
+    public void setHeadPlugIn(String headPlugIn) {
+        this.headPlugIn = headPlugIn;
+    }
+
+    private void visit(Node nd, int depth) {
+        //BasedSequence bs1 = nd.baseSubSequence(nd.getStartOffset(), nd.getEndOffset());
+        //System.out.println(depth+"->"+bs1.toString());
+        if (nd.hasChildren()) {
+            for (Node child = nd.getFirstChild(); child != null; child = child.getNext()) {
+                //children.add(child);
+                visit(child, depth + 1);
+            }
+        }
+
+    }
+
+    public String getMDTxt(Node nd) {
+        BasedSequence bs1 = nd.baseSubSequence(nd.getStartOffset(), nd.getEndOffset());
+        return bs1.toString();
+    }
+
+
+    public String getText(Node nd) {
+        //StringBuilder sb = new StringBuilder();
+        //textr.render(nd,sb);
+        //return sb.toString();
+        String s = textr.collectAndGetText(nd);
+        s = s.replaceAll("\r\n", "\n");
+        return s;
+    }
+
+    private String getUrl(int index) {
+        String fn = htmlDir + htmlBaseName + String.valueOf(index) + ".html";
+        return fn;
+    }
+
+
+    private void writeOut(GTDoc doc) throws IOException {
+        if (out == null) {
+            String fn = fileDir + indexDir + indexBaseName + "1.gdoc";
+            BufferedWriter out1 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+            this.out = out1;
+        }
+
+        if (out != null) {
+            List<String> ls = doc.toListString();
+            for (String s : ls) {
+                out.write(s);
+            }
+        }
+    }
+
+    protected String stripLast(String s) {
+        if (s == null || s.length() < 1) return s;
+        if (s.endsWith("\n")) {
+            s = s.substring(0, s.length() - 1);
+        }
+
+        s = s.replace("<br>", "  ");
+        return s;
+    }
+
+    protected boolean processImageNote(List<String> hs2) {
+        //[!, [C:\\Users\\kevin-pc\\Desktop\\2020\\index.png], (/images/1237763635937497090/resource/image9.png), ]
+
+        //System.out.println(txt);
+        if (hs2.get(0).equals("!") && hs2.get(1).length() > 4) {
+            String s1 = hs2.get(1);
+            if (s1 != null && s1.startsWith("[") && s1.endsWith("]")) {
+                if (s1.lastIndexOf('.') >= 1 || s1.startsWith("[http")) {
+                    hs2.set(1, "[]");
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    protected boolean processHeadingNote(List<String> hs1) {
+        //(1)朱红毛斑蛾 {#朱红毛斑蛾 .标题3}
+        //System.out.println(txt);
+        Iterator<String> itr = hs1.iterator();
+        boolean isRemoved = false;
+        while (itr.hasNext()) {
+            String x = itr.next();
+            if (x.startsWith("{#")) {
+                itr.remove();
+                isRemoved = true;
+            }
+        }
+
+        return isRemoved;
+    }
+
+    protected String processHtmlBlock(String s) {
+        if (formatter == null) {
+            formatter = new HtmlToPlainText();
+        }
+        String converted = formatter.getPlainText(Jsoup.parse(s));
+        return converted;
+    }
+
+    public String getPath(TIntArrayList path) {
+        StringBuilder sb = new StringBuilder();
+        //sb.append(cid).append("/");
+        if (path.size() > 0) {
+            sb.append(path.getQuick(0));
+            for (int i = 1; i < path.size(); i++) {
+                sb.append("-").append(path.getQuick(i));
+            }
+            return sb.toString();
+        } else {
+            // 没有父节点,例如 "前言",有时不带#号
+            return "";
+        }
+    }
+
+    protected void processComments(List<String> comments,TIntArrayList path){
+        if (faqComments == null){
+            faqComments = new MDElement();
+        }
+        comments.forEach(k->faqComments.add(k,getPath(path)));
+    }
+
+    // 增加path,主要是 Comments 需要指向路径
+    // 2025.2.19 增加 HtmlCommentBlock处理
+    protected void processNode(Node nd1, GTNode gn, TIntArrayList path) {
+
+        List<String> ts2 = new ArrayList<String>();
+        List<String> hs2 = new ArrayList<String>();
+        //基本上就 这 三种类型
+        //Paragraph{} /HtmlBlock{} /OrderedList{} /TableBlock{} /HtmlInline{}/BulletList{}
+        if (nd1 instanceof HtmlBlock) {
+            HtmlBlock p1 = (HtmlBlock) nd1;
+
+            List<BasedSequence> ls1 = p1.getContentLines();
+            //System.out.println(p1.getContentLines());
+            //System.out.println("==");
+            for (int i = 0; i < ls1.size(); i++) {
+                hs2.add(ls1.get(i).toString());
+            }
+            String txt = processHtmlBlock(getMDTxt(p1));
+            ts2.add(txt);
+            gn.setType(GTBookConstants.MD_HTMLBLOCK);
+
+        } else if (nd1 instanceof TableBlock) {
+            TableBlock p1 = (TableBlock) nd1;
+
+            String md = getMDTxt(p1);
+            hs2.add(md);
+            String txt = getText(nd1);
+            ts2.add(txt);
+
+            gn.setType(GTBookConstants.MD_TABLE);
+            //} else if (nd1 instanceof YamlFrontMatterBlock) {
+            // 注解
+        } else if (nd1 instanceof Comments) {
+            Comments p1 = (Comments) nd1;
+
+            String md = getMDTxt(p1);
+            hs2.add(md);
+            String txt = getText(nd1);
+            ts2.add(txt);
+
+            gn.setType(GTBookConstants.MD_COMMENTS);
+            processComments(List.of(txt),path);
+
+        } else if (nd1 instanceof HtmlCommentBlock) {
+            HtmlCommentBlock p1 = (HtmlCommentBlock) nd1;
+
+            String md = getMDTxt(p1);
+            hs2.add(md);
+
+            String txt = getText(nd1);
+            ts2.add(txt);
+
+            gn.setType(GTBookConstants.MD_COMMENTS);
+            processComments(List.of(txt),path);
+        } else if (nd1 instanceof Paragraph) {
+            Paragraph p1 = (Paragraph) nd1;
+            Node firstChild = p1.getFirstChild();
+            if (firstChild instanceof Comments) {
+                List<String> comments = new ArrayList<>();
+                StringBuilder sb = new StringBuilder();
+                ReversiblePeekingIterable<Node> children = p1.getChildren();
+                children.forEach(n -> {
+                    //System.out.println(getMDTxt(n));
+                    //System.out.println();
+                    String t1 = getText(n);
+                    if (n instanceof Comments) {
+                        comments.add(t1);
+                    } else {
+                        sb.append(t1);
+                    }
+                });
+                String htm = getMDTxt(p1);
+                String txt = comments.get(0) + sb.toString() + "\n";
+                //System.out.println("path="+path.toString());
+                //System.out.println("txt="+txt);
+                hs2.add(htm);
+                ts2.add(txt);
+                processComments(comments, path);
+            } else if (firstChild instanceof Image) {
+                StringBuilder sb = new StringBuilder();
+                ReversiblePeekingIterable<Node> children = p1.getChildren();
+                children.forEach(n -> {
+                    String t1 = getText(n);
+                    if (n instanceof Image) {
+                        if (SenUtil.isMeaninglessString(t1)){
+                        }else{
+                            sb.append(t1);
+                        }
+                    } else {
+                        sb.append(t1);
+                    }
+                });
+                String htm = getMDTxt(p1);
+                String txt = sb.toString() + "\n";
+                hs2.add(htm);
+                ts2.add(txt);
+
+            } else {
+                String md = getMDTxt(p1);
+                //[下划线]{.underline}
+                if (md.length() > 12) {
+                    String md1 = MDRegxUtil.convertUnderline(md);
+                    if (md1.length() != md.length()) {
+                        hs2.add(md1);
+                        String txt = processHtmlBlock(md1);
+                        ts2.add(txt);
+                        gn.setType(GTBookConstants.MD_HTMLBLOCK);
+                    }
+                }
+            }
+        } else if (nd1 instanceof OrderedList) {
+            String htm = getMDTxt(nd1);
+            String txt = getText(nd1);
+            txt = txt.replaceAll("\n\n\n", "\n\n");
+            //System.out.println(txt);
+            //System.out.println(htm);
+            hs2.add(htm);
+            ts2.add(txt);
+        } else if (nd1 instanceof BulletList) {
+            // 2025.3.7 增加
+            String htm = SplitUtils.getMDTxt(nd1);
+            String txt = getText(nd1);
+            txt = txt.replaceAll("\n\n", "\n");
+            hs2.add(htm);
+            ts2.add(txt);
+        }
+
+        if (ts2.size() <= 0) {
+
+            // 2024.1.7 这部分用于pandoc转出的md的图片杂乱信息的处理,暂时注释掉
+//            String htm = getMDTxt(nd1);
+//            hs2 = MDRegxUtil.splitByBrackets(htm);
+//            //[!, [C:\\Users\\kevin-pc\\Desktop\\2020\\index.png], (/images/1237763635937497090/resource/image9.png), ]
+//            boolean isRepl = false;
+//            if (hs2.size() >= 2) {
+//                isRepl = processImageNote(hs2);
+//            }
+//
+//            //ts2 = new ArrayList<String>();
+//            String txt = null;
+//            if (isRepl) {
+//                String mdTxt1 = GTBookUtil.listToString(hs2, "");
+//                Node nnd1 = MDHelper.PARSER.parse(mdTxt1);
+//                txt = getText(nnd1);
+//            } else {
+//                txt = getText(nd1);
+//            }
+//            ts2.add(txt);
+
+            String htm = getMDTxt(nd1);
+            String txt = getText(nd1);
+            hs2.add(htm);
+            ts2.add(txt);
+
+        }
+
+        //// 2020.8.26 要进行长串的处理
+        String txt = ts2.get(0);
+        if (txt.length() > MAX_STRING_LEN) {
+            List<String> ls = splitLongString(txt);
+            ts2 = ls;
+        }
+
+        boolean isTooLong = false;
+        for (String s : hs2) {
+            if (s.length() > MAX_STRING_LEN) {
+                isTooLong = true;
+                break;
+            }
+        }
+
+        if (isTooLong) {
+            List<String> ls = new ArrayList<>();
+            for (String s : hs2) {
+                if (s.length() > MAX_STRING_LEN) {
+                    List<String> ls1 = splitLongString(s);
+                    ls.addAll(ls1);
+                } else {
+                    ls.add(s);
+                }
+            }
+            hs2 = ls;
+        }
+
+        MDElement v = new MDElement(ts2, hs2);
+        gn.setValue(v);
+
+    }
+
+    private List<String> splitLongString(String s) {
+        int maxLineLen = MAX_STRING_LEN;
+        int len = s.length();
+        int its = len / maxLineLen;
+
+        List<String> ls = new ArrayList<>();
+        int nowOffset = 0;
+        for (int i = 0; i < its; i++) {
+            String s1 = s.substring(nowOffset, nowOffset + maxLineLen);
+            nowOffset = nowOffset + maxLineLen;
+            ls.add(s1);
+        }
+        if (len == nowOffset) {
+
+        } else {
+            String s1 = s.substring(nowOffset);
+            ls.add(s1);
+        }
+        return ls;
+    }
+
+    /*
+    public GTBook renderToBook(DocTree dt) throws IOException {
+        //init();
+
+        List<Node> ls = dt.getSource();
+        int[] depthes = dt.getDepthes();
+        int[] fathers = dt.getFathers();
+
+        String[] titles = new String[ls.size()];
+
+        GTNode root = GTNode.buildRoot("");
+        GTNode[] bns = new GTNode[depthes.length];
+
+        GTBook book = new GTBook();
+        book.setRoot(root);
+
+        //System.out.println("headings="+dt.getHeadings());
+
+        if (dt.getHeadings() > 0) {
+            int baseNo = 1;
+            for (int i = 0; i < depthes.length; i++) {
+
+                Node nd = ls.get(i);
+
+                if (depthes[i] >= 0) {
+
+                    //System.out.println(i+" "+txt1);
+                    String htm1 = getMDTxt(nd);
+                    List<String> hs1 = MDRegxUtil.splitByBrackets(htm1);
+
+                    boolean isRemoved = false;
+                    if (hs1.size() > 1) {
+                        isRemoved = processHeadingNote(hs1);
+                    }
+                    String txt1 = null;
+                    if (isRemoved) {
+                        String mdTxt1 = GTBookUtil.listToString(hs1, "");
+                        Node nnd1 = MDHelper.PARSER.parse(mdTxt1);
+                        txt1 = getText(nnd1);
+                        //System.out.println(txt1);
+                    } else {
+                        txt1 = getText(nd);
+                    }
+
+                    List<String> ts1 = new ArrayList<String>();
+                    ts1.add(txt1);
+
+                    //MDElement v1 = new MDElement(txt1,htm1);
+                    MDElement v1 = new MDElement(ts1, hs1);
+                    titles[i] = stripLast(txt1);
+                    GTNode gn = GTNode.buildNode(null, i + baseNo);
+                    gn.setLabel(titles[i]);
+                    gn.setDepth(depthes[i]);
+                    gn.setValue(v1);
+                    gn.setType(GTNode.MD_HEADING);
+                    bns[i] = gn;
+
+                    if (fathers[i] >= 0) {
+                        bns[fathers[i]].addChild(gn);
+                    } else {
+                        root.addChild(gn);
+                    }
+
+                    int j = i + 1;
+                    for (; j < fathers.length; j++) {
+                        if (depthes[j] < 0) {
+                            //基本上就 这 三种类型
+                            //Paragraph{} /HtmlBlock{} /OrderedList{}
+                            Node nd1 = ls.get(j);
+                            // 先进行类型检测
+                            //GTNode.buildLeaf(v,j+1);
+                            //leaf.setDepth(depthes[i]+1);
+
+                            //MDElement v = processNode1(nd1);
+                            GTNode leaf = GTNode.buildLeaf(j + baseNo);
+                            leaf.setDepth(depthes[i] + 1);
+                            processNode(nd1, leaf);
+
+                            gn.addChild(leaf);
+                        } else {
+                            break;
+                        }
+                    }
+
+                    i = j - 1;
+                } else {
+
+                    // 叶子节点,没有标题
+
+                    int nowDepth = root.getDepth() + 1;
+                    MDElement v1 = new MDElement("", "");
+                    GTNode gn = GTNode.buildNode(i + 1);
+                    gn.setDepth(nowDepth);
+                    gn.setLabel(DEFALT_LABEL);
+                    gn.setType(GTNode.MD_NO_HEADING);
+                    gn.setValue(v1);
+
+
+                    if (fathers[i] >= 0) {
+                        bns[fathers[i]].addChild(gn);
+                    } else {
+                        root.addChild(gn);
+                    }
+
+                    baseNo++;
+                    int j = i;
+                    for (; j < fathers.length; j++) {
+                        if (depthes[j] < 0) {
+                            //基本上就 这 三种类型
+                            //Paragraph{} /HtmlBlock{} /OrderedList{}
+                            Node nd1 = ls.get(j);
+                            // 先进行类型检测
+                            //GTNode.buildLeaf(v,j+1);
+                            //leaf.setDepth(depthes[i]+1);
+
+                            //MDElement v = processNode1(nd1);
+                            GTNode leaf = GTNode.buildLeaf(j + baseNo);
+                            leaf.setDepth(nowDepth + 1);
+                            processNode(nd1, leaf);
+
+                            gn.addChild(leaf);
+                        } else {
+                            break;
+                        }
+                    }
+
+                    i = j - 1;
+                }
+            }
+        } else {
+            //没有 章节
+            GTNode ch1 = GTNode.buildNode(null, 1);
+            ch1.setLabel(DEFALT_ROOT_LABEL);
+
+            root.addChild(ch1);
+            for (int i = 0; i < depthes.length; i++) {
+                Node nd = ls.get(i);
+
+                GTNode gn = GTNode.buildLeaf(i + 2);
+                processNode(nd, gn);
+                Object obj = gn.getValue();
+                if (obj != null) {
+                    MDElement e = (MDElement) obj;
+                    List<String> ts = e.getText();
+                    String txt1 = null;
+                    if (ts != null && ts.size() > 0) {
+                        txt1 = ts.get(0);
+                    }
+                    titles[i] = stripLast(txt1);
+                    gn.setLabel(getSub(titles[i]));
+                    //gn.setDepth(depthes[i]);
+                    gn.setDepth(1);
+
+                }
+                ch1.addChild(gn);
+
+            }
+        }
+        return book;
+    }
+
+*/
+    protected String getSub(String text) {
+        if (text == null || text.length() < 10) {
+            return text;
+        } else {
+            return text.substring(0, 10);
+        }
+    }
+
+    private void processMetaMap(GTBook book, Map<String, Object> params) {
+        if (params != null && params.size() > 0) {
+            params.forEach((k, v) -> {
+                List<String> ts = new ArrayList<>();
+                List<String> ms = new ArrayList<>();
+                if (v instanceof String) {
+                    String v1 = (String) v;
+                    ts.add(v1);
+                    ms.add(v1);
+                } else if (v instanceof List) {
+                    List l1 = (List) v;
+                    l1.forEach(k1 -> {
+                        String k11 = k1.toString();
+                        ts.add(k11);
+                        ms.add(k11);
+                    });
+                } else {
+                    String v1 = v.toString();
+                    ts.add(v1);
+                    ms.add(v1);
+                }
+                MDElement me = new MDElement(ts, ms);
+                book.addMeta(k, me);
+            });
+        }
+    }
+
+    private void processMetaBlock(GTBook book, YamlFrontMatterBlock metaBlock) {
+        AbstractYamlFrontMatterVisitor visitor = new AbstractYamlFrontMatterVisitor();
+        visitor.visit(metaBlock);
+        Map<String, List<String>> data = visitor.getData();
+        if (data != null && data.size() > 0) {
+            data.forEach((k, v) -> {
+                List<String> ts = new ArrayList<>();
+                List<String> ms = new ArrayList<>();
+                ts.addAll(v);
+                ms.addAll(v);
+                MDElement me = new MDElement(ts, ms);
+                book.addMeta(k, me);
+            });
+        }
+    }
+
+    public void renderToBook(DocTree dt, Map<String, Object> params) throws IOException {
+        //init();
+        //DocTree dt = this.tree;
+        List<Node> ls = dt.getSource();
+
+        GTBook book = new GTBook();
+
+        if (ls.size() > 0 && (ls.get(0) instanceof YamlFrontMatterBlock)) {
+            //YamlFrontMatterBlock metaBlock = (YamlFrontMatterBlock) ls.remove(0);
+            YamlFrontMatterBlock metaBlock = (YamlFrontMatterBlock) ls.get(0);
+            processMetaBlock(book, metaBlock);
+        }
+
+        int[] depthes = dt.getDepthes();
+        int[] fathers = dt.getFathers();
+
+        String[] titles = new String[ls.size()];
+
+        GTNode root = GTNode.buildRoot("");
+        GTNode[] bns = new GTNode[depthes.length];
+
+        book.setRoot(root);
+
+        if (params != null && params.size() > 0) {
+//            String title = params.get("title");
+//            book.setTitle(title);
+//
+//            String id = params.get("id");
+//            book.setId(id);
+//
+//            String tags = params.get("tags");
+//            if (tags != null) {
+//                book.putMeta("tags", tags);
+//            }
+
+//            params.forEach((k,v) -> {
+//                if (v instanceof String){
+//                    book.putMeta(k,(String)v);
+//                } else if (v instanceof List) {
+//                    List l1 =(List)v;
+//                    List<String> l2 = new ArrayList<>();
+//                    l2.addAll(l1);
+//                    MDElement e = new MDElement(l2,l2);
+//                    book.addMeta(k,e);
+//                }
+//            });
+            processMetaMap(book, params);
+        }
+
+        this.gbook = book;
+        //System.out.println("headings="+dt.getHeadings());
+
+        // 以下是 GTDoc 部分
+        // 用于GTBook中的 路径
+        TIntArrayList[] pathes = new TIntArrayList[fathers.length];
+
+        //this.gdocs =  new GTDocs();
+
+        if (dt.getHeadings() > 0) {
+            int baseNo = 1;
+            for (int i = 0; i < depthes.length; i++) {
+
+                Node nd = ls.get(i);
+
+                if (depthes[i] >= 0) {
+
+                    //System.out.println(i+" "+txt1);
+                    String htm1 = getMDTxt(nd);
+                    List<String> hs1 = MDRegxUtil.splitByBrackets(htm1);
+
+                    boolean isRemoved = false;
+                    if (hs1.size() > 1) {
+                        isRemoved = processHeadingNote(hs1);
+                    }
+                    String txt1 = null;
+                    if (isRemoved) {
+                        String mdTxt1 = GTBookUtil.listToString(hs1, "");
+                        Node nnd1 = MDHelper.PARSER.parse(mdTxt1);
+                        txt1 = getText(nnd1);
+                        //System.out.println(txt1);
+                    } else {
+                        txt1 = getText(nd);
+                    }
+
+                    List<String> ts1 = new ArrayList<String>();
+                    ts1.add(txt1);
+
+                    MDElement v1 = new MDElement(ts1, hs1);
+                    titles[i] = stripLast(txt1);
+                    GTNode gn = GTNode.buildNode(null, i + baseNo);
+                    gn.setLabel(titles[i]);
+                    gn.setDepth(depthes[i]);
+                    gn.setValue(v1);
+                    gn.setType(GTBookConstants.MD_HEADING);
+                    bns[i] = gn;
+                    TIntArrayList path = null;
+                    if (fathers[i] >= 0) {
+
+                        bns[fathers[i]].addChild(gn);
+                        int i1 = bns[fathers[i]].size() - 1;
+                        TIntArrayList fp = pathes[fathers[i]];
+                        path = (TIntArrayList) fp.clone();
+                        path.add(i1);
+                        pathes[i] = path;
+
+                    } else {
+                        root.addChild(gn);
+                        int i1 = root.size() - 1;
+                        path = new TIntArrayList();
+                        path.add(i1);
+                        pathes[i] = path;
+                    }
+
+
+                    int j = i + 1;
+                    for (; j < fathers.length; j++) {
+                        if (depthes[j] < 0) {
+                            //基本上就 这 三种类型
+                            //Paragraph{} /HtmlBlock{} /OrderedList{}
+                            Node nd1 = ls.get(j);
+                            // 先进行类型检测
+                            //GTNode.buildLeaf(v,j+1);
+                            //leaf.setDepth(depthes[i]+1);
+
+                            GTNode leaf = GTNode.buildLeaf(j + baseNo);
+                            leaf.setDepth(depthes[i] + 1);
+                            gn.addChild(leaf);
+                            TIntArrayList path1 = (TIntArrayList) path.clone();
+                            path1.add(gn.size() - 1);
+                            processNode(nd1, leaf, path1);
+
+                        } else {
+                            break;
+                        }
+                    }
+
+                    i = j - 1;
+                } else {
+
+                    // 叶子节点,没有标题
+
+                    int nowDepth = root.getDepth() + 1;
+                    MDElement v1 = new MDElement("", "");
+                    GTNode gn = GTNode.buildNode(i + baseNo);
+                    gn.setDepth(nowDepth);
+                    gn.setLabel(DEFALT_LABEL);
+                    gn.setType(GTBookConstants.MD_NO_HEADING);
+                    gn.setValue(v1);
+                    bns[i] = gn;
+                    TIntArrayList path = null;
+                    if (fathers[i] >= 0) {
+                        bns[fathers[i]].addChild(gn);
+                        int i1 = bns[fathers[i]].size() - 1;
+                        TIntArrayList fp = pathes[fathers[i]];
+                        path = (TIntArrayList) fp.clone();
+                        path.add(i1);
+                        pathes[i] = path;
+                    } else {
+                        root.addChild(gn);
+                        int i1 = root.size() - 1;
+                        path = new TIntArrayList();
+                        path.add(i1);
+                        pathes[i] = path;
+                    }
+
+                    baseNo++;
+                    int j = i;
+                    for (; j < fathers.length; j++) {
+                        if (depthes[j] < 0) {
+                            //基本上就 这 三种类型
+                            //Paragraph{} /HtmlBlock{} /OrderedList{}
+                            Node nd1 = ls.get(j);
+                            // 先进行类型检测
+                            //GTNode.buildLeaf(v,j+1);
+                            //leaf.setDepth(depthes[i]+1);
+
+                            //MDElement v = processNode1(nd1);
+                            GTNode leaf = GTNode.buildLeaf(j + baseNo);
+                            leaf.setDepth(nowDepth + 1);
+                            gn.addChild(leaf);
+                            TIntArrayList path1 = (TIntArrayList) path.clone();
+                            path1.add(gn.size() - 1);
+                            processNode(nd1, leaf, path1);
+
+
+//							GTDoc gdoc = nodeToGDoc(leaf, path,bns);
+//							gdocs.add(gdoc);
+                        } else {
+                            break;
+                        }
+                    }
+
+                    i = j - 1;
+                }
+            }
+        } else {
+            //没有 章节
+            GTNode ch1 = GTNode.buildNode(null, 1);
+            ch1.setLabel(DEFALT_ROOT_LABEL);
+
+            root.addChild(ch1);
+            TIntArrayList path = new TIntArrayList();
+            int i1 = root.size() - 1;
+            path.add(i1);
+
+            for (int i = 0; i < depthes.length; i++) {
+                Node nd = ls.get(i);
+
+                //MDElement v = processNode1(nd);
+                GTNode gn = GTNode.buildLeaf(i + 2);
+                ch1.addChild(gn);
+                TIntArrayList path1 = (TIntArrayList) path.clone();
+                path1.add(ch1.size() - 1);
+                processNode(nd, gn, path1);
+                Object obj = gn.getValue();
+                if (obj != null) {
+                    MDElement e = (MDElement) obj;
+                    List<String> ts = e.getText();
+                    String txt1 = null;
+                    if (ts != null && ts.size() > 0) {
+                        txt1 = ts.get(0);
+                    }
+                    titles[i] = stripLast(txt1);
+                    gn.setLabel(getSub(titles[i]));
+                    //gn.setDepth(depthes[i]);
+                    gn.setDepth(1);
+
+                }
+
+//				GTDoc gdoc = nodeToGDoc(gn, path,bns);
+//				gdocs.add(gdoc);
+            }
+        }
+        if (faqComments != null){
+            book.addMeta(GTBookConstants.KEY_MD_COMMENTS,faqComments);
+        }
+
+    }
+
+//	public void parse(File mdFile,Map<String,String> params,File txtFile,File bookFile,File gdocFile,File bookToMdFile) throws IOException{
+//		log.info("start parse markdown file : "+mdFile.getAbsolutePath());
+//		parse(mdFile,params);
+//
+////		log.info("start toTxt : "+txtFile.getAbsolutePath());
+////		toTxt(txtFile);
+//
+//		log.info("start toGTBook : "+bookFile.getAbsolutePath());
+//        gbook.saveZipObj(bookFile);
+//
+////        log.info("start toGTDoc : "+gdocFile.getAbsolutePath());
+////        gdocs.save(gdocFile);
+//
+////        if (bookToMdFile != null) {
+////            log.info("start book to md file : "+bookToMdFile.getAbsolutePath());
+////            gbook.toMd(bookToMdFile);
+////        }
+//	}
+//
+//	public void parse(File mdFile,Map<String,String> params,File txtFile,File bookFile,File gdocFile) throws IOException{
+//		parse(mdFile, params, txtFile, bookFile, gdocFile, null);
+//	}
+
+//	public void toTxt(File fn) throws IOException{
+//		TIntArrayList path = new TIntArrayList();
+//		List<String> ls = new ArrayList<>();
+//
+//		GTNode root = gbook.getRoot();
+//
+//		IVisitor visitor = new IVisitor() {
+//			@Override
+//			public void visit(INode now, TIntArrayList path) {
+//
+//				if (now.isLeaf()){
+//					String text = GTBookUtil.listToString(now.getText(), "\n");
+//					ls.add(text);
+//				}else{
+//					String text = now.getLabel();
+//					ls.add(text);
+//				}
+//			}
+//		};
+//		root.traverseDescendants(path, visitor);
+//
+//		BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fn), "utf-8"));
+//		try {
+//			for(String s : ls){
+//				s= s.replace("<br>", "\n");
+//				out.write(s);
+//				out.write("\n");
+//			}
+//		}finally {
+//			if (out != null){
+//				out.close();
+//			}
+//		}
+//	}
+
+
+    public void parse(File mdFile, Map<String, Object> params) throws IOException {
+        this.source = mdFile;
+        this.gparams = params;
+
+        String charSet = "utf-8";
+        Object co = params.remove("charset");
+        if (co != null) {
+            charSet = (String) co;
+        }
+        String s = FileUtil.getStringFromFile(mdFile, charSet);
+        parse(s, params);
+    }
+
+    public void parse(String text, Map<String, Object> params) throws IOException {
+
+        DocTree dt = parseToDocTree(text, params);
+        renderToBook(dt, params);
+
+    }
+
+    public DocTree parseToDocTree(String text, Map<String, Object> params) throws IOException {
+        //s = CRLFProcess(s);
+        // 去除图形的{width="
+        String content = MDHelper.stripImageBorder(text);
+
+        if (params != null) {
+            Object fo1 = params.get(MdContants.FROM_URL);
+            Object to1 = params.get(MdContants.TO_URL);
+            if (fo1 != null) {
+                String fromUrl = (String) fo1;
+                String toUrl = "";
+
+                if (to1 != null) {
+                    toUrl = (String) to1;
+                }
+                content = MDUrlProcessor.replaceUrl(content, fromUrl, toUrl);
+            }
+
+            Object urlPrefix = params.get(MdContants.URL_PREFIX);
+            if (urlPrefix != null) {
+                content = MDUrlProcessor.appendUrlPrefix(content, (String) urlPrefix);
+            }
+        }
+
+        Node doc = MDHelper.toMDNode(content);
+        DocTree dt = MDHelper.getDocumentTree(doc);
+        this.tree = dt;
+        return dt;
+    }
+
+
+    public GTBook getGBook() {
+        return gbook;
+    }
+
+
+}

+ 312 - 0
gtbook/src/main/java/org/cnnlp/data/md/DocTree.java

@@ -0,0 +1,312 @@
+package org.cnnlp.data.md;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+import com.vladsch.flexmark.ast.Heading;
+import com.vladsch.flexmark.util.ast.Node;
+import com.vladsch.flexmark.util.ast.TextCollectingVisitor;
+
+//import org.commonmark.node.Heading;
+//import org.commonmark.node.Node;
+//import org.commonmark.renderer.text.TextContentRenderer;
+
+import gnu.trove.TIntArrayList;
+import org.cnnlp.data.util.ArrayUtil;
+
+public class DocTree implements Serializable {
+
+    String text;
+
+    List<Node> source;
+
+    int[] depthes;
+
+
+    int[] fathers;
+
+    int minLevel = Integer.MAX_VALUE;
+    int maxLevel = -1;
+    int headings = 0;
+
+    //TextContentRenderer textr = TextContentRenderer.builder().build();
+
+
+    public DocTree() {
+    }
+
+    private void init() {
+        if (source != null) {
+            depthes = new int[source.size()];
+            fathers = new int[source.size()];
+        } else {
+            depthes = new int[0];
+            fathers = new int[0];
+        }
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void setText(String text) {
+        this.text = text;
+    }
+
+    public List<Node> getSource() {
+        return source;
+    }
+
+    public void setSource(List<Node> source) {
+        this.source = source;
+        init();
+    }
+
+    public int getHeadings() {
+        return headings;
+    }
+
+    public void setHeadings(int headings) {
+        this.headings = headings;
+    }
+
+    public int[] getDepthes() {
+        return depthes;
+    }
+
+    public void setDepthes(int[] depthes) {
+        this.depthes = depthes;
+    }
+
+    public int[] getFathers() {
+        return fathers;
+    }
+
+    public void setFathers(int[] fathers) {
+        this.fathers = fathers;
+    }
+
+    public int getMinLevel() {
+        return minLevel;
+    }
+
+    public void setMinLevel(int minLevel) {
+        this.minLevel = minLevel;
+    }
+
+    public int getMaxLevel() {
+        return maxLevel;
+    }
+
+    public void setMaxLevel(int maxLevel) {
+        this.maxLevel = maxLevel;
+    }
+
+//	public void setDepth(int index,int depth){
+//		for (int i = index-1; i >=0; i--) {
+//			if (depthes[i] < depth){
+//				fathers[index] = i;
+//				break;
+//			}
+//		}
+//		depthes[index] = depth;
+//	}
+
+    public void setDepthAndFather(int index, int depth, int father) {
+        depthes[index] = depth;
+        fathers[index] = father;
+        if (depth > maxLevel) {
+            maxLevel = depth;
+        }
+        if (depth >= 0 && depth < minLevel) {
+            minLevel = depth;
+        }
+    }
+
+    private int getFather(int from, int depth) {
+        int father = from - 1;
+        int i = father;
+        while (i >= 0 && depthes[i] < 0) {
+            i--;
+        }
+        father = i;
+        while (father >= 0 && depthes[father] >= depth) {
+            father = fathers[father];
+        }
+        return father;
+    }
+
+    public void parse(List<Node> source) {
+        setSource(source);
+        int lastIndex = -1;
+        int lastDepth = -1;
+        int heads = 0;
+        for (int i = 0; i < source.size(); i++) {
+            Node nd = source.get(i);
+
+            if (nd instanceof Heading) {
+                Heading h1 = (Heading) nd;
+                //String s1 = textr.render(h1);
+                //String s1 = h1.getText().toString();
+                //System.out.println("i="+i+" -> "+h1.getLevel()+" "+s1);
+                int depth = h1.getLevel();
+                if (depth == lastDepth) {
+                    setDepthAndFather(i, depth, fathers[lastIndex]);
+                } else if (depth < lastDepth) {
+                    setDepthAndFather(i, depth, getFather(i, depth));
+                } else {
+                    setDepthAndFather(i, depth, lastIndex);
+                }
+                lastIndex = i;
+                lastDepth = depth;
+                heads++;
+            } else {
+                //String s1 = textr.render(nd);
+                //System.out.println("[[\n"+s1+"]]\n");
+                setDepthAndFather(i, -1, lastIndex);
+            }
+        }
+        this.headings = heads;
+    }
+
+    public int getDepth(int index) {
+        if (depthes[index] >= 0) {
+            return depthes[index];
+        } else {
+            return depthes[fathers[index]] + 1;
+        }
+    }
+
+    public void outDebugInfo() {
+        //TextContentRenderer textr = TextContentRenderer.builder().build();
+
+        TextCollectingVisitor textr = new TextCollectingVisitor();
+
+        for (int i = 0; i < depthes.length; i++) {
+            int depth = getDepth(i);
+            for (int j = 0; j < depth; j++) {
+                System.out.print(">>");
+            }
+            Node nd = source.get(i);
+            String content = null;
+            if (nd instanceof Heading) {
+                Heading h1 = (Heading) nd;
+                //System.out.println(h1.getText().toString());
+                content = textr.collectAndGetText(h1);
+                if (content.length() <= 0) {
+                    content = h1.getText().toString();
+                }
+            } else {
+                content = textr.collectAndGetText(nd);
+                int i1 = content.indexOf("\n");
+                if (i1 > 0) {
+                    content = content.substring(0, i1);
+                }
+            }
+            System.out.println(content);
+        }
+
+        System.out.println("minLevel=" + minLevel + " maxLevel=" + maxLevel);
+    }
+
+    public int[] getFather(int from) {
+        TIntArrayList ti = new TIntArrayList();
+        int father = fathers[from];
+
+        while (father >= 0) {
+            ti.add(father);
+            father = fathers[father];
+        }
+        return ti.toNativeArray();
+    }
+
+    public int insert(int idx, DocTree dt) {
+        this.headings = this.headings + dt.getHeadings();
+        int nowDepth = depthes[idx];
+        int minLevel1 = dt.getMinLevel();
+        //System.out.println("minLevel1="+minLevel1);
+        List<Node> source2 = dt.getSource();
+        int[] depthes2 = dt.getDepthes();
+        int[] fathers2 = dt.getFathers();
+        for (int i = 0; i < depthes2.length; i++) {
+            fathers2[i] = fathers2[i] + idx + 1;
+
+            int depth = depthes2[i];
+            if (depth >= 0) {
+                depth = depthes2[i] + nowDepth - (minLevel1 - 1);
+                depthes2[i] = depth;
+
+                if (depth > maxLevel) {
+                    maxLevel = depth;
+                }
+                if (depth >= 0 && depth < minLevel) {
+                    minLevel = depth;
+                }
+            }
+        }
+        source.addAll(idx + 1, source2);
+        depthes = ArrayUtil.insertArrayAt(depthes, depthes2, idx + 1);
+        fathers = ArrayUtil.insertArrayAt(fathers, fathers2, idx + 1);
+
+        int insertNum = fathers2.length;
+        for (int i = idx + 1 + insertNum; i < fathers.length; i++) {
+            if (fathers[i] > idx) {
+                fathers[i] = fathers[i] + insertNum;
+            }
+        }
+        return insertNum;
+    }
+
+    public int append(DocTree dt) {
+        int idx = source.size();
+        int nowDepth = 1;
+
+        this.headings = this.headings + dt.getHeadings();
+//        System.out.println("headings1="+headings1);
+//        if (minLevel1 <0){
+//            System.out.println("minLevel1="+minLevel1);
+//        }
+
+        List<Node> source2 = dt.getSource();
+        int[] depthes2 = dt.getDepthes();
+        int[] fathers2 = dt.getFathers();
+
+        int headings1 = dt.getHeadings();
+        if (headings1 <= 0) {
+            Arrays.fill(depthes2, nowDepth);
+            dt.setMinLevel(1);
+        }
+        int minLevel1 = dt.getMinLevel();
+        for (int i = 0; i < depthes2.length; i++) {
+            if (fathers2[i] >= 0) {
+                fathers2[i] = fathers2[i] + idx;
+            }
+            int depth = depthes2[i];
+            if (depth >= 0) {
+                // depth 不需要调整
+                depth = depthes2[i] + nowDepth - minLevel1;
+                depthes2[i] = depth;
+
+                if (depth > maxLevel) {
+                    maxLevel = depth;
+                }
+                if (depth >= 0 && depth < minLevel) {
+                    minLevel = depth;
+                }
+            }
+        }
+        source.addAll(idx, source2);
+        depthes = ArrayUtil.insertArrayAt(depthes, depthes2, idx);
+        fathers = ArrayUtil.insertArrayAt(fathers, fathers2, idx);
+
+        int insertNum = fathers2.length;
+//        for (int i = idx + 1 + insertNum; i < fathers.length; i++) {
+//            if (fathers[i] > idx) {
+//                fathers[i] = fathers[i] + insertNum;
+//            }
+//        }
+        return insertNum;
+    }
+
+}

Some files were not shown because too many files changed in this diff