import string import os, sys import pickle import pandas as pd import nltk # from collections import Counter import functools import operator from constants import PAD_IDX, START_TKN class Vocabulary(object): """Basic Vocabulary""" def __init__(self): self.word2idx = {'': PAD_IDX, '': START_TKN, '': 2, '': 3} self.idx2word = {PAD_IDX: '', START_TKN: '', 2: '', 3: ''} self.idx = 4 def add_word(self, word): if not word in self.word2idx: self.word2idx[word] = self.idx self.idx2word[self.idx] = word self.idx += 1 def __call__(self, word): if not word.lower() in self.word2idx: return self.word2idx[''] return self.word2idx[word.lower()] def __len__(self): return len(self.word2idx)