grammar XGrammar;
prog: xq+ EOF;
ap: doc SLASH rp # ap_Slash //relative to root node
| doc DSLASH rp # ap_Dslash //relative to any node
;
rp: TAGNAME # rp_Tag
| WILDCARD # rp_Wild
| PERIOD # rp_Per
| DPERIOD # rp_Dper
| TEXT_FUNC # rp_Text
| AT_SYM TAGNAME # rp_Attr
| '(' rp ')' # rp_Par
| rp SLASH rp # rp_Slash
| rp DSLASH rp # rp_DSlash
| rp '[' f ']' # rp_Filter
| rp COMMA rp # rp_Concat
;
f: rp # f_Rp
| rp EQ rp # f_Eq // by object value
| rp IS rp # f_Is // by object id
| rp EQ STRINGCONST # f_Const // rp = StringConstant, ensure that EQ is not 'eq'
| '(' f ')' # f_Par
| f OR f # f_Or
| f AND f # f_And
| NOT f # f_Not
;
doc: FILENAME;
//collection: ;
xq: ap # xq_Ap
| '(' xq ')' # xp_Par
| xq SLASH rp # xq_Slash
| xq DSLASH rp # xq_Dslash
| xq COMMA xq # xq_Concat
| '<' TAGNAME '>' '{' xq '}' '<' SLASH TAGNAME '>' # xq_Tag
| '<' TAGNAME '>' xq '<' SLASH TAGNAME '>' # xq_Tag2
| forClause (letClause)? (whereClause)? returnClause # xq_FLWR
| letClause xq # xq_Let
| FOR VAR IN join returnClause # xq_Join
| VAR # xq_Var
| STRINGCONST # xq_Const
;
// For the following, add check that the last token isn't a comma
forClause: FOR (VAR IN xq (COMMA)*)+;
letClause: LET (VAR ASSIGN xq (COMMA)*)+;
whereClause: WHERE cond;
returnClause: RETURN xq;
//join rules
returnClauseJoin: VAR # join_return
| returnClauseJoin COMMA returnClauseJoin # join_concat
| '<'TAGNAME'>' '{' returnClauseJoin '}' '<' SLASH TAGNAME '>' # join_tag_constr
| '<'TAGNAME'>' returnClauseJoin '<' SLASH TAGNAME '>' # join_tag_no_constr
| ap # join_ap
;
condClauseJoin: VAR EQ VAR # join_cond_var_eq
| VAR EQ STRINGCONST # join_cond_var_const_eq
| STRINGCONST EQ VAR # join_cond_var_const_eq
| STRINGCONST EQ STRINGCONST # join_cond_const_eq
| condClauseJoin AND condClauseJoin # join_cond_and
;
attrList: '[' (TAGNAME (COMMA)*)+ ']';
//xqJoin: forClause (WHERE condClauseJoin)* RETURN returnClauseJoin;
xqJoin: forClause (whereClause)? returnClause;
join: JOIN '(' (join | xqJoin) COMMA (join | xqJoin) COMMA (attrList (COMMA)*)+')';
cond: cond AND cond # cond_And
| cond OR cond # cond_Or
| xq EQ xq # cond_Eq
| xq IS xq # cond_Is
| EMPTY '(' xq ')' # cond_Empty
| SOME (VAR IN xq (COMMA)*)+ SATISFIES cond # cond_Some
| '(' cond ')' # cond_Par
| NOT cond # cond_Not
;
//path tokens
TEXT_FUNC: 'text()';
AT_SYM: '@';
SLASH: '/';
DSLASH: '//';
COMMA: ',';
DQUOTE: '"';
WILDCARD: '*';
PERIOD: '.';
DPERIOD: '..';
DOLLAR: '$';
//filter tokens-EQ is by value and IS is by object id
EQ: ('eq' | '=');
IS: ('==' | 'is');
AND: 'and';
OR: 'or';
NOT: 'not';
//XQuery Tokens
ASSIGN: ':=';
WHERE: 'where';
RETURN: 'return';
LET: 'let';
IN: 'in';
FOR: 'for';
SOME: 'some';
SATISFIES: 'satisfies';
EMPTY: 'empty';
VAR: DOLLAR TAGNAME;
//Join Tokens
JOIN: 'join';
// The TEXT rule needs to be after the above tokens so as to avoid matching 'and', 'not' etc to TEXT
//letters, digits, hyphens, underscores, periods
TAGNAME: [a-zA-Z0-9._-]+;
FILENAME: 'doc("' (TAGNAME | [ !@#$%^&()]+) '")'
| 'document("' (TAGNAME | [ !@#$%^&()]+) '")'
| 'collection("' (TAGNAME) '")';
ESCAPE: '\\' (['"\\]);
STRINGCONST: '"' (ESCAPE | ~["\\])* '"' | '\'' (ESCAPE | ~['\\])* '\'';
//TEXT: [a-zA-Z0-9_-]+; //must have at least one character
WS: [ \t\n\r]+ -> skip; //skip whitespace