Programming-Language-Design / src / parser / Cmm.g4
Cmm.g4
Raw
grammar Cmm;

@header {
    import ast.*;
    import ast.definition.*;
    import ast.definition.classes.*;
    import ast.expression.*;
    import ast.expression.classes.*;
    import ast.statement.*;
    import ast.statement.classes.*;
    import ast.type.*;
    import ast.type.classes.*;
}

/* SYNTAX RULES */

// -- PROGRAM
program returns [Program ast]:
    p1=program_definitions m1=main_function {$p1.ast.add($m1.ast); $ast = new Program($p1.ast.get(0).getLine(),$p1.ast.get(0).getColumn(),$p1.ast);} EOF
   ;

program_definitions returns [List<Definition> ast = new ArrayList<Definition>();]:
    (v1=var_definition {$v1.ast.forEach(var -> $ast.add(var));})* (f1=func_definition {$ast.add($f1.ast);})* (v2=var_definition {$v2.ast.forEach(var -> $ast.add(var));})* ;

main_function returns [FuncDefinition ast]:
    t1=void_type i1='main' '(' ')' b1=function_block {$ast = new FuncDefinition($t1.ast.getLine(), $t1.ast.getColumn(),$i1.text,
         new FunctionType($t1.ast.getLine(), $t1.ast.getColumn(),$t1.ast, new ArrayList<VarDefinition>()), $b1.ast);}
    ;

// -- EXPRESSION
expression returns [Expression ast]:
    '(' expression ')' { $ast = $expression.ast;}
    | e1=expression '[' e2=expression ']' { $ast = new ArrayIndexing($e1.ast.getLine(), $e1.ast.getColumn(), $e1.ast, $e2.ast);}
    | e1=expression '.' ID  { $ast = new FieldAccess($e1.ast.getLine(),$e1.ast.getColumn(),$e1.ast, $ID.text);}
    | f1=function_invocation { $ast = $f1.ast;}
    | '-' e1=expression { $ast = new UnaryMinus($e1.ast.getLine(), $e1.ast.getColumn(),$e1.ast);}
    | '!' e1=expression { $ast = new Negation($e1.ast.getLine(), $e1.ast.getColumn(),$e1.ast);}
    | '(' dt=primitive_type ')' e1=expression { $ast = new Cast($dt.ast.getLine(),$dt.ast.getColumn(),$dt.ast,$e1.ast);}
    | e1=expression op=('*'|'/'|'%') e2=expression  { $ast = new Arithmetic($e1.ast.getLine(), $e1.ast.getColumn(),$e1.ast,$op.text,$e2.ast);}
    | e1=expression op=('+'|'-') e2=expression      { $ast = new Arithmetic($e1.ast.getLine(), $e1.ast.getColumn(),$e1.ast,$op.text,$e2.ast);}
    | e1=expression op=('>'|'>='|'<'|'<='|'!='|'==') e2=expression { $ast = new Comparison($e1.ast.getLine(), $e1.ast.getColumn(),$e1.ast,$op.text,$e2.ast);}
    | e1=expression op=('&&'|'||') e2=expression { $ast = new Logical($e1.ast.getLine(), $e1.ast.getColumn(),$e1.ast,$op.text,$e2.ast);}
    /* Also possible to do: $ast = new Logical($e1.start.getLine(), $e1.start.getCharPositionInLine()+1,$e1.ast,$op.text,$e2.ast); We are getting the token, not the AST node*/
    | i1 = INT_CONSTANT { $ast = new IntLiteral($i1.getLine(), $i1.getCharPositionInLine()+1, LexerHelper.lexemeToInt($i1.text)); }
    | r1 = REAL_CONSTANT { $ast = new DoubleLiteral($r1.getLine(), $r1.getCharPositionInLine()+1, LexerHelper.lexemeToReal($r1.text)); }
    | c1 = CHAR_CONSTANT { $ast = new CharLiteral($c1.getLine(), $c1.getCharPositionInLine()+1, LexerHelper.lexemeToChar($c1.text)); }
    | ID { $ast = new Variable($ID.getLine(), $ID.getCharPositionInLine()+1, $ID.text); }
    ;


// -- FUNCTION INVOCATION (STMT & EXPR)
function_invocation returns [FunctionInvocation ast]:
     i2=ID '(' arguments ')' {$ast=new FunctionInvocation($i2.getLine(), $i2.getCharPositionInLine()+1,new Variable($i2.getLine(), $i2.getCharPositionInLine()+1,$i2.text), $arguments.ast);}
;
arguments returns [List<Expression> ast = new ArrayList<Expression>()]:
    e1=expression {$ast.add($e1.ast);} (',' e2=expression {$ast.add($e2.ast);} )*
    | //Empty string
    ;

// -- STATEMENT
statement returns [List<Statement> ast = new ArrayList<Statement>()]:
    'read' a1=arguments semicolon=';' { $a1.ast.forEach(expr -> $ast.add(new ReadStatement($semicolon.getLine(), $semicolon.getCharPositionInLine()+1, expr)));}
    | 'write' a1=arguments semicolon=';' { $a1.ast.forEach(expr -> $ast.add(new WriteStatement($semicolon.getLine(), $semicolon.getCharPositionInLine()+1, expr)));}
    | e1=expression '=' e2=expression ';' { $ast.add(new Assignment($e1.ast.getLine(),$e1.ast.getColumn(),$e1.ast, $e2.ast));}
    | 'return' e1=expression ';' { $ast.add(new Return($e1.ast.getLine(),$e1.ast.getColumn(),$e1.ast));}
    | 'if' '(' e1=expression ')' b1=block                { $ast.add(new IfElse($e1.ast.getLine(),$e1.ast.getColumn(),$e1.ast, $b1.ast, new ArrayList<Statement>()));}
    | 'if' '(' e1=expression ')' b1=block 'else' b2=block   { $ast.add(new IfElse($e1.ast.getLine(),$e1.ast.getColumn(),$e1.ast, $b1.ast, $b2.ast));}
    | 'while' '(' e1=expression ')' b1=block { $ast.add(new WhileLoop($e1.ast.getLine(),$e1.ast.getColumn(),$e1.ast, $b1.ast));}
    | f1=function_invocation ';' {$ast.add($f1.ast);}
    ;

block returns [List<Statement> ast = new ArrayList<Statement>()]:
    s1=statement {$s1.ast.forEach(stmt -> $ast.add(stmt));}
    | '{' (s2=statement {$s2.ast.forEach(stmt -> $ast.add(stmt));} )* '}'
    ;

//-- VAR DEFINITION
var_definition returns [List<VarDefinition> ast = new ArrayList<VarDefinition>()]:
    t1=type i1=ID {$ast.add(new VarDefinition($t1.ast.getLine(),$t1.ast.getColumn(), $i1.text, $t1.ast));}
        (',' i2=ID {$ast.add(new VarDefinition($t1.ast.getLine(),$t1.ast.getColumn(), $i2.text, $t1.ast));})* ';'
    ;
//-- FUNCTION DEFINITION
func_definition returns [FuncDefinition ast]:
    t1=return_type i1=ID '(' f1=function_param ')' b1=function_block {$ast = new FuncDefinition($t1.ast.getLine(), $t1.ast.getColumn(),$i1.text,
        new FunctionType($t1.ast.getLine(), $t1.ast.getColumn(),$t1.ast, $f1.ast), $b1.ast);}
    ;

return_type returns [Type ast]:
    p=primitive_type {$ast = $p.ast;}
    | v=void_type {$ast = $v.ast;}
    ;

function_block returns [List<Statement> ast = new ArrayList<Statement>()]:
    '{' (v1=var_definition{$v1.ast.forEach(vd -> $ast.add(vd));})* (s1=statement{$s1.ast.forEach(st -> $ast.add(st));})* '}'
    ;

function_param returns [List<VarDefinition> ast = new ArrayList<VarDefinition>()]:
    t1=primitive_type i1=ID {$ast.add(new VarDefinition($t1.ast.getLine(),$t1.ast.getColumn(), $i1.text, $t1.ast));}
        (',' t2=primitive_type i2=ID {$ast.add(new VarDefinition($t2.ast.getLine(),$t2.ast.getColumn(), $i2.text, $t2.ast));})*
    | //Empty string
    ;


//-- TYPE
primitive_type returns [Type ast]:
    i='int' {$ast = new IntType($i.getLine(), $i.getCharPositionInLine()+1);}
    | c='char' {$ast = new CharType($c.getLine(), $c.getCharPositionInLine()+1);}
    | d='double' {$ast = new DoubleType($d.getLine(), $d.getCharPositionInLine()+1);}
    ;
type returns [Type ast]:
    pt=primitive_type {$ast = $pt.ast;}
    | t=type '['i=INT_CONSTANT']' { $ast = new ArrayType($t.ast.getLine(), $t.ast.getColumn(), $t.ast, LexerHelper.lexemeToInt($i.text));
        $ast = ((ArrayType)$ast).organizeArraySizes($ast,$t.ast);}
    | 'struct' '{' rf=record_fields '}' { $ast = new RecordType($rf.ast.get(0).getLine(), $rf.ast.get(0).getColumn(), $rf.ast); ((RecordType)$ast).checkDuplicatedFields();}
    ;

//-- TYPE: VOID TYPE
void_type returns [Type ast]:
    v = 'void' {$ast = new VoidType($v.getLine(), $v.getCharPositionInLine()+1);}
    ;

//-- TYPE: RECORD FIELD TYPE
record_fields returns [List<RecordField> ast = new ArrayList<RecordField>()]:
    (t=type i1=ID { $ast.add(new RecordField($t.ast.getLine(),$t.ast.getColumn(), $t.ast, $i1.text));}
        (',' i2=ID {$ast.add( new RecordField($t.ast.getLine(),$t.ast.getColumn(), $t.ast, $i2.text));})* ';')*
    | //Empty string
    ;


/* LEXICAL RULES */

fragment
DIGIT: [0-9] ;

fragment
LETTER: [a-zA-Z] ;

fragment
MANTISSA_EXP: [eE][-+]?DIGIT+;

fragment
CHAR_CONTENT: .
            |'\\' INT_CONSTANT
            |'\\n'
            |'\\t';

INT_CONSTANT: [1-9] DIGIT*
            | '0'
            ;

ID: (LETTER|'_')(LETTER|'_'|DIGIT)* //[a-zA-Z_][a-zA-Z_0-9]*
    ;

REAL_CONSTANT: DIGIT+ '.' DIGIT* MANTISSA_EXP?
             | DIGIT* '.' DIGIT+ MANTISSA_EXP?
             | DIGIT+ MANTISSA_EXP
             ;

CHAR_CONSTANT: '\'' CHAR_CONTENT '\'';

LINE_COMMENT: '//' .*? ('\n'|'\r'|EOF) -> skip ;
BLOCK_COMMENT: '/*' .*? '*/' -> skip;

WHITE_SPACE: ' '+ -> skip;
NEW_LINE: '\n' -> skip;
TAB: '\t' -> skip;
CARRIAGE_RETURN: '\r'->skip;