grammar XGrammar; prog: xq+ EOF; ap: doc SLASH rp # ap_Slash //relative to root node | doc DSLASH rp # ap_Dslash //relative to any node ; rp: TAGNAME # rp_Tag | WILDCARD # rp_Wild | PERIOD # rp_Per | DPERIOD # rp_Dper | TEXT_FUNC # rp_Text | AT_SYM TAGNAME # rp_Attr | '(' rp ')' # rp_Par | rp SLASH rp # rp_Slash | rp DSLASH rp # rp_DSlash | rp '[' f ']' # rp_Filter | rp COMMA rp # rp_Concat ; f: rp # f_Rp | rp EQ rp # f_Eq // by object value | rp IS rp # f_Is // by object id | rp EQ STRINGCONST # f_Const // rp = StringConstant, ensure that EQ is not 'eq' | '(' f ')' # f_Par | f OR f # f_Or | f AND f # f_And | NOT f # f_Not ; doc: FILENAME; //collection: ; xq: ap # xq_Ap | '(' xq ')' # xp_Par | xq SLASH rp # xq_Slash | xq DSLASH rp # xq_Dslash | xq COMMA xq # xq_Concat | '<' TAGNAME '>' '{' xq '}' '<' SLASH TAGNAME '>' # xq_Tag | '<' TAGNAME '>' xq '<' SLASH TAGNAME '>' # xq_Tag2 | forClause (letClause)? (whereClause)? returnClause # xq_FLWR | letClause xq # xq_Let | FOR VAR IN join returnClause # xq_Join | VAR # xq_Var | STRINGCONST # xq_Const ; // For the following, add check that the last token isn't a comma forClause: FOR (VAR IN xq (COMMA)*)+; letClause: LET (VAR ASSIGN xq (COMMA)*)+; whereClause: WHERE cond; returnClause: RETURN xq; //join rules returnClauseJoin: VAR # join_return | returnClauseJoin COMMA returnClauseJoin # join_concat | '<'TAGNAME'>' '{' returnClauseJoin '}' '<' SLASH TAGNAME '>' # join_tag_constr | '<'TAGNAME'>' returnClauseJoin '<' SLASH TAGNAME '>' # join_tag_no_constr | ap # join_ap ; condClauseJoin: VAR EQ VAR # join_cond_var_eq | VAR EQ STRINGCONST # join_cond_var_const_eq | STRINGCONST EQ VAR # join_cond_var_const_eq | STRINGCONST EQ STRINGCONST # join_cond_const_eq | condClauseJoin AND condClauseJoin # join_cond_and ; attrList: '[' (TAGNAME (COMMA)*)+ ']'; //xqJoin: forClause (WHERE condClauseJoin)* RETURN returnClauseJoin; xqJoin: forClause (whereClause)? returnClause; join: JOIN '(' (join | xqJoin) COMMA (join | xqJoin) COMMA (attrList (COMMA)*)+')'; cond: cond AND cond # cond_And | cond OR cond # cond_Or | xq EQ xq # cond_Eq | xq IS xq # cond_Is | EMPTY '(' xq ')' # cond_Empty | SOME (VAR IN xq (COMMA)*)+ SATISFIES cond # cond_Some | '(' cond ')' # cond_Par | NOT cond # cond_Not ; //path tokens TEXT_FUNC: 'text()'; AT_SYM: '@'; SLASH: '/'; DSLASH: '//'; COMMA: ','; DQUOTE: '"'; WILDCARD: '*'; PERIOD: '.'; DPERIOD: '..'; DOLLAR: '$'; //filter tokens-EQ is by value and IS is by object id EQ: ('eq' | '='); IS: ('==' | 'is'); AND: 'and'; OR: 'or'; NOT: 'not'; //XQuery Tokens ASSIGN: ':='; WHERE: 'where'; RETURN: 'return'; LET: 'let'; IN: 'in'; FOR: 'for'; SOME: 'some'; SATISFIES: 'satisfies'; EMPTY: 'empty'; VAR: DOLLAR TAGNAME; //Join Tokens JOIN: 'join'; // The TEXT rule needs to be after the above tokens so as to avoid matching 'and', 'not' etc to TEXT //letters, digits, hyphens, underscores, periods TAGNAME: [a-zA-Z0-9._-]+; FILENAME: 'doc("' (TAGNAME | [ !@#$%^&()]+) '")' | 'document("' (TAGNAME | [ !@#$%^&()]+) '")' | 'collection("' (TAGNAME) '")'; ESCAPE: '\\' (['"\\]); STRINGCONST: '"' (ESCAPE | ~["\\])* '"' | '\'' (ESCAPE | ~['\\])* '\''; //TEXT: [a-zA-Z0-9_-]+; //must have at least one character WS: [ \t\n\r]+ -> skip; //skip whitespace