Darfst du auch andere Datenstrukturen verwenden?
Dann würd ich dir zu dem TreeSet raten. Zusätzlich kontruierst du dir noch einen eigenen Datentyp der ein Wort und seine Häufigkeit umfasst. Den kannst du dann ins TreeSet packen.
EDIT:
Grad wiedergefunden, vllt kannste dir da nen paar Ideen abschaun.
[code=Java] public static void main(String[] args) {
String text = "I have a blue house with a blue window Blue is the colour of all that I wear Blue are the streets and all the trees are too I have a girlfriend and she is so blue";
WordFrequencyAnalysis wfa = new WordFrequencyAnalysis(false);
wfa.add(text);
Set<WordFrequency> frequencies = wfa.getWordFrequencies();
for (WordFrequency wf : frequencies) {
System.out.println(wf.getWord() + " -> " + wf.getFrequency());
}
}[/code]
[code=Java]public class WordFrequencyAnalysis {
private Map<String, WordFrequency> wordFrequencies;
private boolean caseSensitive;
public WordFrequencyAnalysis() {
this(false);
}
public WordFrequencyAnalysis(boolean caseSensitive) {
wordFrequencies = new HashMap<String, WordFrequency>();
this.caseSensitive = caseSensitive;
}
public void add(String text) {
// remove punctuation marks
text = text.replaceAll(",|\\.|!|\\?|-|_", "");
StringTokenizer tokenizer = new StringTokenizer(text);
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
addWord(token);
}
}
private void addWord(String word) {
if (!isCaseSensitive()) {
word = word.toLowerCase();
}
if (wordFrequencies.containsKey(word)) {
wordFrequencies.get(word).incrementFreqency();
} else {
wordFrequencies.put(word, new WordFrequency(word));
}
}
public Set<WordFrequency> getWordFrequencies() {
Set<WordFrequency> wfSet = new TreeSet<WordFrequency>();
for (Map.Entry<String, WordFrequency> entry : wordFrequencies.entrySet()) {
wfSet.add(entry.getValue());
}
return wfSet;
}
public int getWordFrequency(String word) {
if (!wordFrequencies.containsKey(word)) {
return 0;
}
return wordFrequencies.get(word).getFrequency();
}
public boolean isCaseSensitive() {
return caseSensitive;
}
}[/code]
[code=Java]public class WordFrequency implements Comparable<WordFrequency> {
private String word;
private int frequency;
public WordFrequency(String word) {
this.word = word;
frequency = 1;
}
public void incrementFreqency() {
incrementFrequency(1);
}
public void incrementFrequency(int i) {
frequency += i;
}
public String getWord() {
return word;
}
public int getFrequency() {
return frequency;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + frequency;
result = prime * result + ((word == null) ? 0 : word.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
WordFrequency other = (WordFrequency) obj;
if (frequency != other.frequency)
return false;
if (word == null) {
if (other.word != null)
return false;
} else if (!word.equals(other.word))
return false;
return true;
}
@Override
public int compareTo(WordFrequency o) {
if (frequency == o.frequency) {
return word.compareTo(o.word);
}
return frequency < o.frequency ? 1 : -1;
}
}[/code]