/* * CommonWordFinder.java * Created on Feb 24, 2005 */ package word; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; /** * Finds the most common words in a given text. */ public final class CommonWordFinder { private final WordBagFactory bagFactory; /** * Construct a common word finder with the specified delimeters. */ public CommonWordFinder(WordBagFactory bagFactory) { this.bagFactory = bagFactory; } public List/**/ findCommonWords(String text, int num) { CommonList commonList = new CommonList(num); WordBag bag = bagFactory.makeWordBag(); char[] textChars = text.toCharArray(); StringBuilder sb = new StringBuilder(); for(int i = 0; i < textChars.length; i++) { if (Character.isLetter(textChars[i])) { sb.append(textChars[i]); } else if (sb.length() > 0) { commonList.add(bag.add(sb.toString())); sb.setLength(0); } } return commonList.common; } private static class CommonList { private int threshold = 0; private final int num; private final LinkedList/**/ common; private CommonList(int num) { this.num = num; this.common = new LinkedList(); } private void add(WordCount strCount) { if (strCount.count() > threshold) { ListIterator/**/ i = common.listIterator(); while(i.hasNext()) { WordCount sc = (WordCount)i.next(); int diff = strCount.count() - sc.count(); // if less than, add it right before if (diff < 0) { i.previous(); break; } // otherwise, if encounter the same string, remove it else if (diff == 0 && strCount.word().equalsIgnoreCase(sc.word())) { i.remove(); } } // insert it and possibly set the new threshold i.add(strCount); if (common.size() == num + 1) { common.removeFirst(); threshold = ((WordCount)common.getFirst()).count(); } } } } }