DasherJava / src / dasherJava / core / languageModeling / LanguageAlphabet.java
LanguageAlphabet.java
Raw
package dasherJava.core.languageModeling;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class LanguageAlphabet {
	
	//This class contains a list of all characters and various data structures to enable efficiently accessing
	//them. They are usually referenced by their symbol index, but due to the way the language model is
	//implemented we need to distinguish between two types of symbol indices: There are world symbol indices
	//(where all characters are included) and language model symbol indices (where only characters that do
	//not have a fixed probability are included).
	
	private final Map<LanguageCharacter, Integer> characterToWorldSymbolIndex = new HashMap<>();
	private final Map<Integer, Integer> unicodeToLanguageModelSymbolIndex = new HashMap<>();
	private final Map<Integer, LanguageCharacter> fixedProbabilityCharacters = new HashMap<>(); //by world symbol index
	private final List<LanguageCharacter> characters = new ArrayList<>(); //sorted by world symbol index
	//Note: The order of character groups does not matter, they are automatically sorted when rendering.
	//Groups may overlap, there may be subgroups (i.e. groups fully contained within another group)
	//and there may be characters not belonging to any group.
	//Group specifications use world symbol indices.
	private final List<CharacterGroup> characterGroups = new ArrayList<>();
	
	public int getNumOfCharacters() {
		return characters.size();
	}
	
	public LanguageCharacter getLanguageCharacter(int symbolIndex) {
		return characters.get(symbolIndex);
	}
	
	public int getWorldSymbolIndex(LanguageCharacter character) throws IllegalArgumentException {
		Integer symbolIndex = characterToWorldSymbolIndex.get(character);
		if (symbolIndex<0)
			throw new IllegalArgumentException("Alphabet doesn't contain the given LanguageCharacter");
		return symbolIndex;
	}
	
	public int getLanguageModelSymbolIndex(int unicode) throws UnicodeNotFoundException {
		Integer symbolIndex = unicodeToLanguageModelSymbolIndex.get(unicode);
		if (symbolIndex==null)
			throw new UnicodeNotFoundException(unicode);
		return symbolIndex;
	}
	
	public Map<Integer, LanguageCharacter> getFixedProbabilityCharacters() {
		return fixedProbabilityCharacters;
	}
	
	public void addCharacter(LanguageCharacter character) throws IllegalArgumentException {
		if (characterToWorldSymbolIndex.containsKey(character))
			throw new IllegalArgumentException("Alphabet already contains the given LanguageCharacter");
		if (unicodeToLanguageModelSymbolIndex.containsKey(character.getUnicode()))
			throw new IllegalArgumentException("Alphabet already contains Unicode value "+character.getUnicode());
		characterToWorldSymbolIndex.put(character, characters.size());
		if (character.getFixedProbability()>=0.0f) fixedProbabilityCharacters.put(characters.size(), character);
		else unicodeToLanguageModelSymbolIndex.put(character.getUnicode(), unicodeToLanguageModelSymbolIndex.size());
		characters.add(character);
	}
	
	public void addCharacterGroup(CharacterGroup characterGroup) {
		characterGroups.add(characterGroup);
	}
	
	public List<CharacterGroup> getCharacterGroups() {
		return characterGroups;
	}
	
	public static class UnicodeNotFoundException extends RuntimeException {
		private final int unicode;
		public UnicodeNotFoundException(int unicode) {
			this.unicode=unicode;
		}
		public int getUnicode() {
			return unicode;
		}
	}
}