From 337dae970d67c8cfaa6b0285909c1149571c23f5 Mon Sep 17 00:00:00 2001
From: niexiaolong <26566145@qq.com>
Date: Tue, 29 Aug 2017 14:30:59 +0800
Subject: [PATCH] =?UTF-8?q?=E5=85=A8=E5=88=87=E5=88=86=E5=AE=9E=E7=8E=B0?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

全切分算法，返回所有分词结果的合集。
例如：牛奶好喝。切分为：“牛”，“牛奶”，“奶”，“好”，“好喝”，“喝”
再索引的时候，以该种全切分算法存储索引，查询的时候，再用最大Ngram算法进行查询分词。配合使用，达到更优的效果。
---
 .../word/segmentation/impl/FullSegmentation.java      | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/src/main/java/org/apdplat/word/segmentation/impl/FullSegmentation.java b/src/main/java/org/apdplat/word/segmentation/impl/FullSegmentation.java
index ce8fb143..ca567e5e 100644
--- a/src/main/java/org/apdplat/word/segmentation/impl/FullSegmentation.java
+++ b/src/main/java/org/apdplat/word/segmentation/impl/FullSegmentation.java
@@ -56,10 +56,11 @@ public List<Word> segImpl(String text) {
         }
         //获取全切分结果
         List<Word>[] array = fullSeg(text);
-        //利用ngram计算分值
-        Map<List<Word>, Float> words = ngram(array);
-        //歧义消解（ngram分值优先、词个数少优先）
-        List<Word> result = disambiguity(words);
+        Set<Word> words = new HashSet<Word>();
+        for(List<Word> wordList : array){
+        	words.addAll(wordList);
+        }
+        List<Word> result =  new ArrayList<Word>(words);
         return result;
     }
     private List<Word> disambiguity(Map<List<Word>, Float> words){
@@ -284,4 +285,4 @@ public static void main(String[] args){
         String text = "蝶舞打扮得漂漂亮亮出现在张公公面前";
         System.out.println(m.seg(text));
     }
-}
\ No newline at end of file
+}