package dasherJava.core.languageModeling;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class LanguageAlphabet {
//This class contains a list of all characters and various data structures to enable efficiently accessing
//them. They are usually referenced by their symbol index, but due to the way the language model is
//implemented we need to distinguish between two types of symbol indices: There are world symbol indices
//(where all characters are included) and language model symbol indices (where only characters that do
//not have a fixed probability are included).
private final Map<LanguageCharacter, Integer> characterToWorldSymbolIndex = new HashMap<>();
private final Map<Integer, Integer> unicodeToLanguageModelSymbolIndex = new HashMap<>();
private final Map<Integer, LanguageCharacter> fixedProbabilityCharacters = new HashMap<>(); //by world symbol index
private final List<LanguageCharacter> characters = new ArrayList<>(); //sorted by world symbol index
//Note: The order of character groups does not matter, they are automatically sorted when rendering.
//Groups may overlap, there may be subgroups (i.e. groups fully contained within another group)
//and there may be characters not belonging to any group.
//Group specifications use world symbol indices.
private final List<CharacterGroup> characterGroups = new ArrayList<>();
public int getNumOfCharacters() {
return characters.size();
}
public LanguageCharacter getLanguageCharacter(int symbolIndex) {
return characters.get(symbolIndex);
}
public int getWorldSymbolIndex(LanguageCharacter character) throws IllegalArgumentException {
Integer symbolIndex = characterToWorldSymbolIndex.get(character);
if (symbolIndex<0)
throw new IllegalArgumentException("Alphabet doesn't contain the given LanguageCharacter");
return symbolIndex;
}
public int getLanguageModelSymbolIndex(int unicode) throws UnicodeNotFoundException {
Integer symbolIndex = unicodeToLanguageModelSymbolIndex.get(unicode);
if (symbolIndex==null)
throw new UnicodeNotFoundException(unicode);
return symbolIndex;
}
public Map<Integer, LanguageCharacter> getFixedProbabilityCharacters() {
return fixedProbabilityCharacters;
}
public void addCharacter(LanguageCharacter character) throws IllegalArgumentException {
if (characterToWorldSymbolIndex.containsKey(character))
throw new IllegalArgumentException("Alphabet already contains the given LanguageCharacter");
if (unicodeToLanguageModelSymbolIndex.containsKey(character.getUnicode()))
throw new IllegalArgumentException("Alphabet already contains Unicode value "+character.getUnicode());
characterToWorldSymbolIndex.put(character, characters.size());
if (character.getFixedProbability()>=0.0f) fixedProbabilityCharacters.put(characters.size(), character);
else unicodeToLanguageModelSymbolIndex.put(character.getUnicode(), unicodeToLanguageModelSymbolIndex.size());
characters.add(character);
}
public void addCharacterGroup(CharacterGroup characterGroup) {
characterGroups.add(characterGroup);
}
public List<CharacterGroup> getCharacterGroups() {
return characterGroups;
}
public static class UnicodeNotFoundException extends RuntimeException {
private final int unicode;
public UnicodeNotFoundException(int unicode) {
this.unicode=unicode;
}
public int getUnicode() {
return unicode;
}
}
}