package edu.ufl.cise.plc; import java.util.HashMap; import java.util.Map; public class Lexer implements ILexer { private enum State { START, IN_IDENT, IN_COMMENT, IN_STRING, HAVE_ZERO, HAVE_DOT, IN_FLOAT, IN_NUM, HAVE_EQ, HAVE_MINUS, HAVE_LT, HAVE_GT, HAVE_EX, HAVE_BSLASH} private final Map<String, IToken.Kind> reserved = new HashMap<String, IToken.Kind>(); private State state; private char[] chars; private int pos = 0; // position in input aka the index of chars[] private int line = 0; // first char of token position private int col = 0; // where the token starts private void initMap() { reserved.put("string", IToken.Kind.TYPE); reserved.put("int", IToken.Kind.TYPE); reserved.put("float", IToken.Kind.TYPE); reserved.put("boolean", IToken.Kind.TYPE); reserved.put("color", IToken.Kind.TYPE); reserved.put("image", IToken.Kind.TYPE); reserved.put("getWidth", IToken.Kind.IMAGE_OP); reserved.put("getHeight", IToken.Kind.IMAGE_OP); reserved.put("getRed", IToken.Kind.COLOR_OP); reserved.put("getGreen", IToken.Kind.COLOR_OP); reserved.put("getBlue", IToken.Kind.COLOR_OP); reserved.put("BLACK", IToken.Kind.COLOR_CONST); reserved.put("BLUE", IToken.Kind.COLOR_CONST); reserved.put("CYAN", IToken.Kind.COLOR_CONST); reserved.put("DARK_GRAY", IToken.Kind.COLOR_CONST); reserved.put("GRAY", IToken.Kind.COLOR_CONST); reserved.put("GREEN", IToken.Kind.COLOR_CONST); reserved.put("LIGHT_GRAY", IToken.Kind.COLOR_CONST); reserved.put("MAGENTA", IToken.Kind.COLOR_CONST); reserved.put("ORANGE", IToken.Kind.COLOR_CONST); reserved.put("PINK", IToken.Kind.COLOR_CONST); reserved.put("RED", IToken.Kind.COLOR_CONST); reserved.put("WHITE", IToken.Kind.COLOR_CONST); reserved.put("YELLOW", IToken.Kind.COLOR_CONST); reserved.put("true", IToken.Kind.BOOLEAN_LIT); reserved.put("false", IToken.Kind.BOOLEAN_LIT); reserved.put("if", IToken.Kind.KW_IF); reserved.put("else", IToken.Kind.KW_ELSE); reserved.put("fi", IToken.Kind.KW_FI); reserved.put("write", IToken.Kind.KW_WRITE); reserved.put("console", IToken.Kind.KW_CONSOLE); reserved.put("void", IToken.Kind.KW_VOID); } public Lexer(String input) { chars = input.toCharArray(); initMap(); } @Override public IToken next() throws LexicalException { int tokenLength = 0; // to keep track where next token starts String ss = ""; state = State.START; if (chars.length == 0) return new Token(IToken.Kind.EOF, 0, 0, ""); while (pos < chars.length) { char ch = chars[pos]; switch (state){ case START -> { if (Character.isJavaIdentifierStart(ch)) { ss = ss.concat(String.valueOf(ch)); pos++; tokenLength++; state = State.IN_IDENT; } else { switch (ch) { case '0' -> { ss = ss.concat(String.valueOf(ch)); pos++; tokenLength++; state = State.HAVE_ZERO; } case '1', '2', '3', '4', '5', '6', '7', '8', '9' -> { ss = ss.concat(String.valueOf(ch)); pos++; tokenLength++; state = State.IN_NUM; } case ' ', '\t' -> { pos++; col++; state = State.START; } case '\r', '\n' -> { pos++; col = 0; line++; state = State.START; } case '"' -> { ss += '"'; pos++; tokenLength++; state = State.IN_STRING; } case '#' -> { state = State.IN_COMMENT; pos++; col++; } case '=' -> { ss = ss.concat("="); state = State.HAVE_EQ; pos++; tokenLength++; } case '>' -> { ss = ss.concat(">"); state = State.HAVE_GT; pos++; tokenLength++; } case '<' -> { ss = ss.concat("<"); state = State.HAVE_LT; pos++; tokenLength++; } case '-' -> { ss = ss.concat("-"); state = State.HAVE_MINUS; pos++; tokenLength++; } case '!' -> { ss = ss.concat("!"); state = State.HAVE_EX; pos++; tokenLength++; } case '|' -> { ss = ss.concat("|"); pos++; Token t = new Token(IToken.Kind.OR, line, col, ss); col += tokenLength + 1; return t; } case '&' -> { ss = ss.concat("&"); pos++; Token t = new Token(IToken.Kind.AND, line, col, ss); col += tokenLength + 1; return t; } case '(' -> { ss = ss.concat("("); pos++; Token t = new Token(IToken.Kind.LPAREN, line, col, ss); col += tokenLength + 1; return t; } case ')' -> { ss = ss.concat(")"); pos++; Token t = new Token(IToken.Kind.RPAREN, line, col, ss); col += tokenLength + 1; return t; } case '[' -> { ss = ss.concat("["); pos++; Token t = new Token(IToken.Kind.LSQUARE, line, col, ss); col += tokenLength + 1; return t; } case ']' -> { ss = ss.concat("]"); pos++; Token t = new Token(IToken.Kind.RSQUARE, line, col, ss); col += tokenLength + 1; return t; } case '+' -> { ss = ss.concat("+"); pos++; Token t = new Token(IToken.Kind.PLUS, line, col, ss); col += tokenLength + 1; return t; } case '*' -> { ss = ss.concat("*"); pos++; Token t = new Token(IToken.Kind.TIMES, line, col, ss); col += tokenLength + 1; return t; } case '/' -> { ss = ss.concat("/"); pos++; Token t = new Token(IToken.Kind.DIV, line, col, ss); col += tokenLength + 1; return t; } case '%' -> { ss = ss.concat("%"); pos++; Token t = new Token(IToken.Kind.MOD, line, col, ss); col += tokenLength + 1; return t; } case ';' -> { ss = ss.concat(";"); pos++; Token t = new Token(IToken.Kind.SEMI, line, col, ss); col += tokenLength + 1; return t; } case ',' -> { ss = ss.concat(","); pos++; Token t = new Token(IToken.Kind.COMMA, line, col, ss); col += tokenLength + 1; return t; } case '^' -> { ss = ss.concat("^"); pos++; Token t = new Token(IToken.Kind.RETURN, line, col, ss); col += tokenLength + 1; return t; } default -> { throw new LexicalException("Invalid char"); } } } } case IN_IDENT -> { if (Character.isJavaIdentifierPart(ch)) { ss = ss.concat(String.valueOf(ch)); pos++; tokenLength++; } else { if (reserved.containsKey(ss)) { // if it's a reserved word Token t = new Token(reserved.get(ss), line, col, ss); col += tokenLength; return t; } else { Token t = new Token(IToken.Kind.IDENT, line, col, ss); col += tokenLength; return t; } } } case IN_COMMENT -> { switch (ch) { case '\r', '\n' -> { pos++; line++; col = 0; tokenLength = 0; state = State.START; } default -> { pos++; col++; } } } case IN_STRING -> { switch (ch) { case '\\' -> { pos++; tokenLength++; state = State.HAVE_BSLASH; } case '"' -> { // end of string_lit ss += '\"'; pos++; Token t = new Token(IToken.Kind.STRING_LIT, line, col, ss); col += tokenLength + 1; return t; } default -> { if (pos == chars.length - 1 && ch != '"') throw new LexicalException("Invalid string"); else { pos++; tokenLength++; ss += ch; } } } } // still in string_lit, check what's after '\' // if valid escape sequence, go back to IN_STRING case HAVE_BSLASH -> { switch (ch) { case 'b' -> { ss += '\b'; pos++; tokenLength++; state = State.IN_STRING; } case 't' -> { ss += '\t'; pos++; tokenLength++; state = State.IN_STRING; } case 'n' -> { ss += '\n'; pos++; tokenLength++; state = State.IN_STRING; } case 'f' -> { ss += '\f'; pos++; tokenLength++; state = State.IN_STRING; } case 'r' -> { ss += '\r'; pos++; tokenLength++; state = State.IN_STRING; } case '"' -> { ss += '"'; pos++; tokenLength++; state = State.IN_STRING; } case '\'' -> { ss += '\''; pos++; tokenLength++; state = State.IN_STRING; } case '\\' -> { ss += '\\'; pos++; tokenLength++; state = State.IN_STRING; } default -> throw new LexicalException("Unresolved escape sequence"); } } case IN_NUM -> { if (Character.isDigit(ch)) { ss = ss.concat(String.valueOf(ch)); pos++; tokenLength++; } else if (ch == '.') { ss = ss.concat(String.valueOf(ch)); pos++; tokenLength++; state = State.HAVE_DOT; } else { try { Integer.parseInt(ss); } catch (NumberFormatException e) { throw new LexicalException(e); } Token t = new Token(IToken.Kind.INT_LIT, line, col, ss); col += tokenLength; return t; } } case IN_FLOAT -> { if (Character.isDigit(ch)) { ss = ss.concat(String.valueOf(ch)); pos++; tokenLength++; } else { try { Float.parseFloat(ss); } catch (NumberFormatException e) { throw new LexicalException(e); } Token t = new Token(IToken.Kind.FLOAT_LIT, line, col, ss); col += tokenLength; return t; } } case HAVE_DOT -> { if (Character.isDigit(ch)) { ss = ss.concat(String.valueOf(ch)); pos++; tokenLength++; state = State.IN_FLOAT; } else { throw new LexicalException("Invalid token"); } } case HAVE_EQ -> { switch (ch) { case '=' -> { ss = ss.concat("="); pos++; Token t = new Token(IToken.Kind.EQUALS, line, col, ss); col += tokenLength + 1; return t; } default -> { Token t = new Token(IToken.Kind.ASSIGN, line, col, ss); col += tokenLength; return t; } } } case HAVE_GT -> { switch (ch) { case '>' -> { ss = ss.concat(">"); pos++; Token t = new Token(IToken.Kind.RANGLE, line, col, ss); col += tokenLength + 1; return t; } case '=' -> { ss = ss.concat("="); pos++; Token t = new Token(IToken.Kind.GE, line, col, ss); col += tokenLength + 1; return t; } default -> { Token t = new Token(IToken.Kind.GT, line, col, ss); col += tokenLength; return t; } } } case HAVE_LT -> { switch (ch) { case '<' -> { ss = ss.concat("<"); pos++; Token t = new Token(IToken.Kind.LANGLE, line, col, ss); col += tokenLength + 1; return t; } case '=' -> { ss = ss.concat("="); pos++; Token t = new Token(IToken.Kind.LE, line, col, ss); col += tokenLength + 1; return t; } case '-' -> { ss = ss.concat("-"); pos++; Token t = new Token(IToken.Kind.LARROW, line, col, ss); col += tokenLength + 1; return t; } default -> { Token t = new Token(IToken.Kind.LT, line, col, ss); col += tokenLength; return t; } } } case HAVE_MINUS -> { switch (ch) { case '>' -> { ss = ss.concat(">"); pos++; Token t = new Token(IToken.Kind.RARROW, line, col, ss); col += tokenLength + 1; return t; } default -> { Token t = new Token(IToken.Kind.MINUS, line, col, ss); col += tokenLength; return t; } } } case HAVE_EX -> { switch (ch) { case '=' -> { ss = ss.concat("="); pos++; Token t = new Token(IToken.Kind.NOT_EQUALS, line, col, ss); col += tokenLength + 1; return t; } default -> { Token t = new Token(IToken.Kind.BANG, line, col, ss); col += tokenLength; return t; } } } case HAVE_ZERO -> { if (ch == '.') { ss = ss.concat("."); pos++; tokenLength++; state = State.HAVE_DOT; } else { Token t = new Token(IToken.Kind.INT_LIT, line, col, ss); col += tokenLength; return t; } } default -> { throw new LexicalException("Invalid char"); } } } return new Token(IToken.Kind.EOF, line, col, ""); } @Override public IToken peek() throws LexicalException { int pos_ = pos; int col_ = col; int line_ = line; IToken a = next(); pos = pos_; col = col_; line = line_; return a; } }