// BNF grammar for sweet-expressions, an easy-to-read S-expression notation.
// (c) 2012-2013 David A. Wheeler, released under "MIT" license.

// This BNF is an LL(1) grammar, written using ANTLR version 3.
// More info on ANTLR is at "http://www.antlr.org/".
// It is written to be easy to translate to a recursive-descent parser,
// even one that doesn't do full tokenizing.

// The actions in "sweet.g" are written inside {...} using Java syntax,
// but invoke normal Scheme procedures (e.g., "car" and "cons") as follows:
// +------------------|------------|------------------------------------------+
// | Action Construct | Scheme     | Discussion                               |
// |------------------|------------|------------------------------------------|
// | cons(x, y)       | (cons x y) | Java syntax used for procedure calls     |
// | nullp(x)         | (null? x)  | "?" in Scheme procedure name becomes "p" |
// | null             | '()        | Java null represents the empty list      |
// | "somename"       | 'somename  | Java strings represent atoms             |
// +------------------|------------|------------------------------------------+
// The program "schemify" translates these actions into Scheme, assuming
// they meet certain formatting rules.

// This BNF specifically shows how non-indent whitespace is handled
// (SRFI-49's BNF is simpler but isn't specific about this, making it
// harder to interpret). Note that this BNF presumes there's a preprocessor
// that does indent processing. While indent processing is enabled, indents
// are marked with INDENT and dedents with DEDENT (one for each dedent).
// Lines with ONLY indent characters are considered blank lines.
// There is no "SAME" token to show "same indent level", but some rules
// use an empty "same" nonterminal to emphasize their lack of INDENT/DEDENT.
// Bad indentation emits BADDENT, which is never accepted.
// If there's an indent when there is NO preceding un-indented line, the
// preprocessor emits INITIAL_INDENT; this
// does *NOT* change the indent level (this is so initial indents
// will disable indentation processing, but only on that line).

// TODO:
// - (Maybe) Handle EOF in weird places.

grammar sweet;

options { k = 1; } // Force grammar to be LL(1).

// LEXER SECTION

tokens {
  BADDENT;  // Generated by indent processor, always illegal.
}

// ANTLR v3 does not allow the parser to communicate to the lexer
// (the lexer runs completely before the parser begins).
// The lexer *can* store and use state, which we have to do anyway to
// do indentation processing if we're going to do indentation processing
// inside the lexer.  Thus, in this implementation, the lexer
// tracks the parentheses enclosure level, so that \n is just a delimiter
// inside (...) but is handled specially in indentation processing,
// while symbols like "$" are just ordinary atoms inside (...).
// The lexer must also specially note "<*" and "*>" and specially modify
// the indentation levels when those are received.
// This ANTLR lexer does indentation processing, so it generates
// INDENT, DEDENT, and so on as appropriate.

@lexer::header {
  import java.util.Deque;
}
  
@lexer::members {
  // Track enclosure level. "(" etc. adds 1, ")" etc. subtracts:
  long enclosure = 0;
  Boolean initial_indent = false;
  // Are we doing indent processing?:
  private Boolean indent_processing() {
    return (enclosure == 0) && !initial_indent;
  }

  // Permit sending multiple tokens per rule, per ANTLR FAQ.
  // This is necessary to support DEDENT, as a single token may cause
  // the generation of multiple DEDENTs.
  Deque<Token> tokens = new java.util.ArrayDeque<Token>();     
  @Override
  public void emit(Token token) {
      state.token = token;
      tokens.addLast(token);
  }
  @Override
  public Token nextToken() {
      super.nextToken();
      if (tokens.isEmpty())
          return Token.EOF_TOKEN;
      return tokens.removeFirst();
  }

  // This stack records the string indents; use push, pop, peek.
  Deque<String> indents = new java.util.ArrayDeque<String>();

  private Boolean outside_t_expr() {
    return indent_processing() &&
           (indents.isEmpty() || indents.peek().equals(""));
  }
  private Boolean indentation_greater_than(String indent1, String indent2) {
    int len1 = indent1.length();
    int len2 = indent2.length();
    return (len1 > len2) && indent2.equals(indent1.substring(0, len2));
  }

  private void process_indent(String indent_text, Token t) {
    if (indents.isEmpty()) indents.push("");
    if (indents.peek().equals(indent_text)) { // no-op, aka "same"
    } else if (indentation_greater_than(indent_text, indents.peek())) {
      // System.out.print("Generate INDENT\n");
      t.setType(INDENT);
      emit(t);
      indents.push(indent_text);
    } else if (indentation_greater_than(indents.peek(), indent_text)) {
      while (!indents.isEmpty() &&
             indentation_greater_than(indents.peek(), indent_text)) {
        indents.removeFirst(); // drop
        // System.out.print("Generate DEDENT(s)\n");
        t.setType(DEDENT);
        emit(t);
      }
      if (!indents.peek().equals(indent_text)) { 
        // System.out.print("Generate BADDENT(s)\n");
        t.setType(BADDENT);
        emit(t);
      }
    } else {
      // System.out.print("Generate BADDENT\n");
      t.setType(BADDENT);
      emit(t);
    }
  }

  private void process_initial_indent(String indent_text, Token t) {
    t.setType(INITIAL_INDENT);
    emit(t);
    initial_indent = true;
  }

  private void restart_indent_level() {
    if (indents.isEmpty()) indents.push(""); // Initialize if needed
    indents.push("");  // Let's start over!
  }

  private void emit_type(int token_type) {
    CommonToken extra = new CommonToken(input, Token.INVALID_TOKEN_TYPE,
                Token.DEFAULT_CHANNEL, getCharIndex(), getCharIndex()-1);
    extra.setLine(getLine());
    extra.setCharPositionInLine(getCharPositionInLine());
    extra.setType(token_type);
    emit(extra);
  }

  private void process_collecting_end(Token t) {
    // Must send independent EOL token for ANTLR.
    emit_type(EOL);

    /* Dedent (if needed) */
    CommonToken dedent_it = new CommonToken(input, Token.INVALID_TOKEN_TYPE,
                Token.DEFAULT_CHANNEL, getCharIndex(), getCharIndex()-1);
    dedent_it.setLine(getLine());
    dedent_it.setCharPositionInLine(getCharPositionInLine());
    process_indent("", dedent_it);  // Emit any dedents needed.

    t.setType(COLLECTING_END);
    emit(t);

    // Restore original indent stack.
    indents.removeFirst();
  }

}


@parser::header {
  import scheme.*;
  import static scheme.Pair.*;
}

@parser::members {

  // This is a bogus variable to work around an ANTLR bug.  ANTLR by default
  // is greedy, but warns on ambiguities unless "greedy=true", e.g.:
  //   part1 (options {greedy = true;} : hspace)* part2
  // But there's no greedy variable defined for the parser, so you can't use
  // the feature properly.  Here we create a settable variable, so you can
  // selectively disable unnecessary warnings.  If a later ANTLR also
  // defines this variable, then remove it:
  public Boolean greedy = true;

  private void generate_eof() {
    System.exit(0); 
  }


  // These similate basic Lisp/Scheme procedures:
  public static Object cadr(Pair x) {
    return car( (Pair) cdr(x));
  }

  public static Object cddr(Pair x) {
    return cdr( (Pair) cdr(x));
  }

  public static Boolean nullp(Object x) { // Scheme "null?"
    return x == null;
  }

  public static Boolean pairp(Object x) { // Scheme "pair?"
    return x instanceof Pair;
  }

  public static Boolean equalp(Object x, Object y) { // Scheme "equal?"
    if ((x == null) && (y == null)) {
      return true;
    } else if ((x == null) || (y == null)) {
      return false ;
    } else if (pairp(x) && pairp(y)) {
      Pair px = (Pair) x;
      Pair py = (Pair) y;
      return equalp(car(px), car(py)) && equalp(cdr(px), cdr(py));
    } else if (pairp(x) || pairp(y)) {
      return false;
    } else {
      return x.equals(y);
    }
  }

  // This does not check for cycles
  private static Boolean ends_in_null(Pair x) {
    if (nullp(cdr(x))) {
      return true;
    } else if (! pairp(cdr(x))) {
      return false;
    } else {
      return ends_in_null( (Pair) cdr(x));
    }
  }

  public static Boolean listp(Object x) { // Scheme "list?"
    if (nullp(x)) return false;
    else if (! pairp(x)) return false;
    else return ends_in_null( (Pair) x);
  }

  public static Pair list(Object x) {
    return cons(x, null);
  }

  public static Pair list(Object x, Object y) {
    return cons(x, list(y));
  }

  // "y" really needs to be a Pair, but that makes it harder to
  // to work with ANTLR
  public static Object append(Object x, Object y) {
    if (x == null) {
      return y;
    } else if (x instanceof Pair) {
      Pair p = (Pair) x;
      return cons(car(p), append(cdr(p), y));
    } else {
      throw new Error(); // for now.
    }
  }

  private static String string_datum_tail(Object x) {
    if (x == null) {
      return ")";
    } else if (x instanceof Pair) {
      Pair p = (Pair) x;
      return " " + string_datum(car(p)) + string_datum_tail(cdr(p));
    } else
      return " . " + string_datum(x) + ")";
  }

  public static String string_datum(Object x) {
    if (x == null) {
      return "()";
    } else if (x instanceof Pair) {
      Pair p = (Pair) x;
      return "(" + string_datum(car(p)) + string_datum_tail(cdr(p));
    } else {
      return x.toString();
    }
  }

  // ; Utility function:
  // ; If x is a 1-element list, return (car x), else return x
  // (define (monify x)
  //   (cond
  //     ((not (pair? x)) x)
  //     ((null? (cdr x)) (car x))
  //     (#t x)))

  public static Object monify(Object x) {
    if (! pairp(x)) {
      return x;
    } else if (nullp(cdr( (Pair) x))) {
      return car( (Pair) x);
    } else {return x;}
  }

  // This isn't an accurate implementation of "if" because it always
  // evaluates the t_branch AND f_branch.  But if t_branch and f_branch
  // have no side-effects, it's good enough.
  public static Object ifp(Boolean b, Object t_branch, Object f_branch) {
    if (b) return t_branch;
    else   return f_branch;
  }

  // Is the parameter v a representation of the symbol "."?
  public static Boolean isperiodp(Object v) {
    if (!(v instanceof String))
      return false;
    else
      return ((String) v).equals(".");
  }

  // ; --------------------------------------------------------
  // ; Curly-infix support procedures - converted from SRFI-105
  // ; --------------------------------------------------------

  // ; Return true if lyst has an even # of parameters, and the (alternating)
  // ; first parameters are "op".  Used to determine if a longer lyst is infix.
  // ; If passed empty list, returns true (so recursion works correctly).
  // (define (even-and-op-prefix? op lyst)
  //   (cond
  //     ((null? lyst) #t)
  //     ((not (pair? lyst)) #f)
  //     ((not (equal? op (car lyst))) #f) ; fail - operators not the same
  //     ((not (pair? (cdr lyst)))  #f) ; Wrong # of parameters or improper
  //     (#t   (even-and-op-prefix? op (cddr lyst))))) ; recurse.
  public static Boolean even_and_op_prefixp(Object op, Object lyst) {
    if (nullp(lyst)) {
      return true;
    } else if (! pairp(lyst)) {
      return false;
    } else if (! equalp(op, car( (Pair) lyst))) {
      return false;
    } else if (! pairp( cdr( (Pair) lyst))) {
      return false;
    } else {
      return even_and_op_prefixp(op, cddr( (Pair) lyst));
    }
  }

  // ; Return true if the lyst is in simple infix format
  // ; (and thus should be reordered at read time).
  // (define (simple-infix-list? lyst)
  //   (and
  //     (pair? lyst)           ; Must have list;  '() doesn't count.
  //     (pair? (cdr lyst))     ; Must have a second argument.
  //     (pair? (cddr lyst))    ; Must have a third argument (we check it
  //                            ; this way for performance)
  //     (even-and-op-prefix? (cadr lyst) (cdr lyst)))) ; true if rest simple
  public static Boolean simple_infix_listp(Object lyst) {
    return pairp(lyst)
           && pairp(cdr((Pair) lyst))
           && pairp(cddr((Pair) lyst))
           && even_and_op_prefixp(cadr((Pair) lyst), cdr((Pair) lyst));
  }

  // ; Return alternating parameters in a lyst (1st, 3rd, 5th, etc.)
  // (define (alternating-parameters lyst)
  //   (if (or (null? lyst) (null? (cdr lyst)))
  //     lyst
  //     (cons (car lyst) (alternating-parameters (cddr lyst)))))
  public static Pair alternating_parameters(Pair lyst) {
    if (nullp(lyst) || nullp(cdr(lyst))) {
      return lyst;
    } else {
      return cons(car(lyst), alternating_parameters((Pair) cddr(lyst)));
    }
  }

  // ; Not a simple infix list - transform it.  Written as a separate procedure
  // ; so that future experiments or SRFIs can easily replace just this piece.
  // Handle dollar sign specially for ANTLR.
  public static Pair transform_mixed_infix(Pair lyst) {
    return cons("$" + "nfx" + "$", lyst);
  }

  // ; Given curly-infix lyst, map it to its final internal format.
  // (define (process-curly lyst)
  //   (cond
  //    ((not (pair? lyst)) lyst) ; E.G., map {} to ().
  //    ((null? (cdr lyst)) ; Map {a} to a.
  //      (car lyst))
  //    ((and (pair? (cdr lyst)) (null? (cddr lyst))) ; Map {a b} to (a b).
  //      lyst)
  //    ((simple-infix-list? lyst) ; Map {a OP b [OP c...]} to (OP a b [c...])
  //      (cons (cadr lyst) (alternating-parameters lyst)))
  //    (#t  (transform-mixed-infix lyst))))
  public static Object process_curly(Object lyst) {
    if (! pairp(lyst)) {
      return lyst;
    } else {
      Pair plyst = (Pair) lyst;
      if (nullp(cdr(plyst))) {
        return car(plyst); // Map {a} to a.
      } else if (pairp(cdr(plyst)) && nullp(cddr(plyst))) {
        return plyst; // Map {a b} to (a b).
      } else if (simple_infix_listp(plyst)) {
        return cons(cadr(plyst), alternating_parameters(plyst));
      } else {
        return transform_mixed_infix(plyst);
      }
    }
  }
}


// Define start symbol for parser (rest of parser is later).
start : t_expr
        {System.out.print(string_datum($t_expr.v) + "\n"); } ;

// LEXER SECTION.
// Lexical token (terminal) names are in all upper case

// INCLUDE IN SRFI

SPACE    : ' ';
TAB      : '\t';
PERIOD   : '.';

// Special markers, which only have meaning outside (), [], {}.
GROUP_SPLIT  : {indent_processing()}? => '\\' '\\'; // GROUP/split symbol.
SUBLIST      : {indent_processing()}? =>'$';
COLLECTING     : {indent_processing()}? => '<*' { restart_indent_level(); } ;
// This generates EOL + (any DEDENTs ) + COLLECTING_END, and restores indents:
COLLECTING_END : {indent_processing()}? => t='*>' {process_collecting_end($t);};
RESERVED_TRIPLE_DOLLAR : {indent_processing()}? => '$$$';  // Reserved.

// Abbreviations followed by certain whitespace are special:
APOSW           : {indent_processing()}? => '\'' (SPACE | TAB) ;
QUASIQUOTEW     : {indent_processing()}? => '\`' (SPACE | TAB) ;
UNQUOTE_SPLICEW : {indent_processing()}? => ',@' (SPACE | TAB) ;
UNQUOTEW        : {indent_processing()}? => ','  (SPACE | TAB) ;

// Abbreviations followed by EOL also generate abbrevW:
APOS_EOL        : {indent_processing()}? => '\'' EOL_SEQUENCE
                  SPECIAL_IGNORED_LINE* i=INDENT_CHARS_PLUS
                  {emit_type(APOSW); emit_type(EOL);
                   process_indent($i.text, $i);};
QUASIQUOTE_EOL  : {indent_processing()}? => '\`' EOL_SEQUENCE
                  SPECIAL_IGNORED_LINE* i=INDENT_CHARS_PLUS
                  {emit_type(QUASIQUOTEW); emit_type(EOL);
                   process_indent($i.text, $i);};
UNQUOTE_SPLICE_EOL: {indent_processing()}? => ',@' EOL_SEQUENCE
                  SPECIAL_IGNORED_LINE* i=INDENT_CHARS_PLUS
                  {emit_type(UNQUOTE_SPLICEW); emit_type(EOL);
                   process_indent($i.text, $i);};
UNQUOTE_EOL     : {indent_processing()}? => ',' EOL_SEQUENCE
                  SPECIAL_IGNORED_LINE* i=INDENT_CHARS_PLUS
                  {emit_type(UNQUOTEW); emit_type(EOL);
                   process_indent($i.text, $i);};

// Abbreviations not followed by horizontal space are ordinary:
APOS           : '\'';
QUASIQUOTE     : '\`';
UNQUOTE_SPLICE : ',@';
UNQUOTE        : ',';

// Special end-of-line character definitions.
fragment EOL_CHAR : '\n' | '\r' ;
fragment NOT_EOL_CHAR : (~ (EOL_CHAR));
fragment NOT_EOL_CHARS : NOT_EOL_CHAR*;
fragment EOL_SEQUENCE : ('\r' '\n'? | '\n');

// Comments. LCOMMENT=line comment, scomment=special comment.
LCOMMENT :       ';' NOT_EOL_CHARS ; // Line comment - doesn't include EOL
BLOCK_COMMENT : '#|' // This is #| ... #|
      (options {greedy=false;} : (BLOCK_COMMENT | .))* '|#' ;
DATUM_COMMENT_START : '#;' ;
// SRFI-105 notes that "implementations could trivially support
// (simultaneously) markers beginning with #! followed by a letter
// (such as the one to identify support for curly-infix-expressions),
// the SRFI-22 #!+space marker as an ignored line, and the
// format #!/ ... !# and #!. ... !# as a multi-line comment."
// We'll implement that approach for maximum flexibility.
SRFI_22_COMMENT : '#! ' NOT_EOL_CHARS ;
SHARP_BANG_FILE : '#!' ('/' | '.') (options {greedy=false;} : .)*
                  '!#' (SPACE|TAB)* ;
// These match #!fold-case, #!no-fold-case, #!sweet, and #!curly-infix;
// it also matches a lone "#!".  The "#!"+space case is handled above,
// in SRFI_22_COMMENT, overriding this one:
SHARP_BANG_DIRECTIVE : '#!' (('a'..'z'|'A'..'Z'|'_')
                    ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'-')*)? (SPACE|TAB)* ;

// STOP INCLUDING IN SRFI

// Specially handle formfeed (\f) and vertical tab (\v).
// We support lone formfeeds on a line to support the GNU Coding Standards
// (http://www.gnu.org/prep/standards/standards.html), which says:
// "Please use formfeed characters (control-L) to divide the program
// into pages at logical places (but not within a function)...
// The formfeeds should appear alone on lines by themselves."
// Some argue against vertical tabs (http://prog21.dadgum.com/76.html).
// Any FF and VT must be at the beginning of a line, must be followed by
// EOL_SEQUENCE, and they terminate a t-expression.
FF : '\f'     {if (enclosure==0) initial_indent = true;} ; // Formfeed, \u000c
VT : '\u000b' {if (enclosure==0) initial_indent = true;} ; // Vertical tab, \v

// Various forms of comments - line comments and special comments.
// We'll specifically ignore lines that begin with ";"
LCOMMENT_LINE :  {(getCharPositionInLine() == 0)}? =>
     ';' contents=NOT_EOL_CHARS EOL_SEQUENCE
     {
       // We can't directly use the ANTLR Lexer to generate ";" comments,
       // because the lexer doesn't run synchronously with the parser.
       // That's no problem, because the spec is designed to allow that
       // to be handled separately and not here.
       // if (outside_t_expr()) System.out.println(";" + $contents.text);
       skip();
     };

// The following implements INDENT/DEDENT tokens, semi-similar to Python in
// http://docs.python.org/2/reference/lexical_analysis.html#indentation
// Page 95 of "The Definitive ANTLR Reference" has a code outline for
// generating INDENT and DEDENT, but it is fundamentally wrong for us.
// That only acts when there is 1+ indent characters on a line, so it
// cannot implement all the DEDENTs necessary when it encounters a blank line.

// End-of-line (EOL) is extremely special in sweet-expressions.
// After reading it, we'll need to read in any following indent characters
// (if indent processing is active) to determine if have an INDENT or DEDENTs.
// As part of tokenizing, we'll consume any following lines that
// are ;-only lines, and treat indent-only lines equivalent to blank lines.
fragment SPECIAL_IGNORED_LINE
   : (' ' | '\t' | '!' )* ';' NOT_EOL_CHARS EOL_SEQUENCE ;
fragment INDENT_CHAR : (' ' | '\t' | '!');
fragment INDENT_CHARS : INDENT_CHAR*;
fragment INDENT_CHARS_PLUS : INDENT_CHAR+;

// These are reported as fragments, not tokens, to silence
// spurious warnings:
fragment INITIAL_INDENT: ' ';
fragment INDENT: ' ';
fragment DEDENT: ' ';

// EOL after contents - may have a following indented/dedented line.
EOL: {enclosure==0 &&
      ((getCharPositionInLine() != 0) && !initial_indent)}? =>
  e=EOL_SEQUENCE
  SPECIAL_IGNORED_LINE*
  i=INDENT_CHARS  // This is the indent for the next line
  extra=EOL_SEQUENCE* // If this exists, the indents are useless.
  {
    $e.setType(EOL);
    emit($e);  // Emit the EOL token
    if ($extra != null || ($i.text).equals("<EOF>")) {
      process_indent("", $i); // Indented EOL = EOL
      emit($e);  // Emit the extra EOL token
    } else {
      // Normal case: EOL, possibly followed by indent; process it.
      process_indent($i.text, $i);
    }
  } ;

BLANK_EOL : {enclosure==0 && (getCharPositionInLine() == 0)}? =>
  e=EOL_SEQUENCE
  {
    $e.setType(EOL);
    emit($e);
    process_indent("", $e);
  } ;


INITIAL_INDENT_EOL :  {enclosure==0 && initial_indent}? =>
  e=EOL_SEQUENCE
  {
    $e.setType(EOL);
    emit($e);
    initial_indent = false;
  } ;

// Generate special initial indents for initial indents
// not preceded by lines with contents
INITIAL_INDENT_INIT : {(enclosure == 0) && (getCharPositionInLine() == 0) }? =>
  i=INDENT_CHARS_PLUS
  {
    process_initial_indent($i.text, $i);
  } ;

ENCLOSED_EOL_CHAR: {enclosure > 0}? => EOL_CHAR;

// Do not reference '\n' or '\r' inside a non-lexing rule in ANTLR.
// If you do, ANTLR will quietly create new lexical tokens for them, and
// those new tokens will interfere with EOL processing. E.G., do NOT do this:
//   eolchar : '\n' | '\r';

// Other simple character or two-character sequences:
LPAREN   : '(' {enclosure++;} ;
VECTOR_START     : '#('   {enclosure++;};
BYTEVECTOR_START : '#u8(' {enclosure++;};
RPAREN   : ')' {if (enclosure>0) {enclosure--;};};
LBRACKET : '[' {enclosure++;};
RBRACKET : ']' {if (enclosure>0) {enclosure--;};};
LBRACE   : '{' {enclosure++;};
RBRACE   : '}' {if (enclosure>0) {enclosure--;};};

fragment BANG : '!';

// Here we give lexical definitions for the so-called simple datums,
// such as symbols and numbers... what many people would call "atoms".

// Anybody who says "Scheme doesn't have syntax" has never tried
// to parse Scheme identifiers or numbers.  Scheme's syntax for
// identifiers and numbers is extremely complicated; the number of
// productions needed are vast, even if (in our case) we're only trying
// to match a string instead something more complex. They're not even
// all that easy to implement when you're trying to directly follow the spec.

// To prove the point, the following shows a typical lexical definition
// for identifiers and numbers in many languages:
//   fragment ID_INITIAL: ('A'..'Z' | 'a'..'z' | '_')
//   IDENTIFIER: ID_INITIAL (ID_INITIAL | '0'..'9')* ;
//   fragment EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
//   FLOAT : ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
//         | '.' ('0'..'9')+ EXPONENT?
//         | ('0'..'9')+ EXPONENT ;
//   INT : ('0'..'9')+ ;
//   number : INT | FLOAT ;
// Languages might have 1-3 more productions for various bases (2,8,16).
//
// In contrast, the productions below show how to parse Scheme identifiers
// and numbers, which are FULL of complications, and thus are chock-full
// of complicated productions.  Numbers, in particular, are frighteningly
// hard to get right.  In fact, the Scheme number spec (R7RS draft 8) has
// a number of problems that I had to correct (I have sent my comments
// to the spec authors).

// Note: The following maps most stuff into strings, because we don't
// need to do more than that for a translation.  The action rules can
// be changed to generate real values, not just string representations,
// but for our purposes that would add needless complications.
// Note: The spec requires that in numbers case is not relevant,
// so many of the productions show upper/lowercase.

IDENTIFIER :
  INITIAL SUBSEQUENT* |
  | '|' SYMBOL_ELEMENT* '|'
  | PECULIAR_IDENTIFIER ;

fragment INITIAL : LETTER | SPECIAL_INITIAL | INLINE_HEX_ESCAPE ;
fragment LETTER : 'a'..'z' | 'A'..'Z' ;
fragment SPECIAL_INITIAL : '!' | '$' | '%' | '&' | '*' | '/' | ':'
                           | '<' | '=' | '>' | '?' | '^' | '_' | '~';
fragment SUBSEQUENT : INITIAL | DIGIT | SPECIAL_SUBSEQUENT ;
fragment DIGIT : '0' .. '9' ;
fragment HEX_DIGIT : DIGIT |'a'..'f' ;
fragment EXPLICIT_SIGN : '+' | '-' ;
fragment SPECIAL_SUBSEQUENT : EXPLICIT_SIGN | '.' | '@' ;
fragment INLINE_HEX_ESCAPE : '\\' 'x' HEX_SCALAR_VALUE ';' ;
fragment HEX_SCALAR_VALUE : HEX_DIGIT+ ;
fragment PECULIAR_IDENTIFIER : EXPLICIT_SIGN
  ( /* empty */
    | SIGN_SUBSEQUENT SUBSEQUENT*
    | '.' DOT_SUBSEQUENT SUBSEQUENT* )
  | '.' DOT_SUBSEQUENT SUBSEQUENT*
  // Note: This is a bogus extension for testing purposes, to make sure that
  // a \\ is not interpreted in an initial indent:
  | '\\' '\\' ;
fragment DOT_SUBSEQUENT : SIGN_SUBSEQUENT | '.' ;
fragment SIGN_SUBSEQUENT : INITIAL | EXPLICIT_SIGN | '@' ;

// Annoyingly, SYMBOL_ELEMENT overlaps with STRING_ELEMENT in
// the R7RS Scheme draft 8 fix; this (below) fixes that:
fragment SYMBOL_ELEMENT :
    (~('|' | '\\'))
    | SPECIAL_STRING_ELEMENT
    // NOTE: Double-quote should NOT be here, already matched above.
    | '\\|' ; 

// boolean, character, character_name
BOOLEAN : '#t' | '#f' | '#true' | '#false' ;

CHARACTER : '#\\'
 ((~ ('a'..'z'))
  | ('a'..'w' | 'y' | 'z') ('a'..'z')*
  | 'x' HEX_SCALAR_VALUE ) ;
 
STRING : '\"' STRING_ELEMENT* '\"' ;

fragment STRING_ELEMENT :
  ~( '"' | '\\' ) | SPECIAL_STRING_ELEMENT ;

fragment INTRALINE_WHITESPACE :
  (' ' | '\t')* EOL_SEQUENCE (' ' | '\t')* ;

fragment SPECIAL_STRING_ELEMENT :
  '\\' ('a' | 'b' | 't' | 'n' | 'r' | '"' | '\\' | INTRALINE_WHITESPACE )
  | INLINE_HEX_ESCAPE ;


// The following is structured so that exactness (if stated)
// can precede *OR* follow the radix (base) declaration, while
// (1) forbidding two exactness statements (only 1/number),
// (2) allowing exactness and radix declarations to both be optional without
//     creating a useless ambiguity.
// Instead of calling a "NUM_2", we explicitly make statements about
// exactness.

NUMBER : (EXACTNESS
            (RADIX_2 COMPLEX_2
             | RADIX_8 COMPLEX_8
             | RADIX_10 COMPLEX_10
             | RADIX_16 COMPLEX_16
             | COMPLEX_10))
          | RADIX_2 EXACTNESS? COMPLEX_2
          | RADIX_8 EXACTNESS? COMPLEX_8
          | RADIX_10 EXACTNESS? COMPLEX_10
          | RADIX_16 EXACTNESS? COMPLEX_16
          | COMPLEX_10 ;

fragment I : 'i' | 'I' ;

// These aren't used directly, they are templates.
// Note that UREAL_R is special for base 10 (supports decimal)
// The PREFIX_R has to handled specially because EXACTNESS can be empty,
// as can RADIX_10.
//
//  fragment NUM_R : PREFIX_R COMPLEX_R ;
//  fragment COMPLEX_R : REAL_R
//    ('@' REAL_R
//     | ('+' | '-') UREAL_R? I
//     | INFNAN I )?
//    | ('+' | '-') UREAL_R I
//    | INFNAN I
//    | ('+' | '-') I ;
//  fragment REAL_R : SIGN UREAL_R | INFNAN ;
//  fragment UREAL_R : UINTEGER_R ('/' UINTEGER_R)?
//    | DECIMAL_R;
//  fragment UINTEGER_R : DIGIT_R+ ;
//  fragment PREFIX_R : RADIX_R EXACTNESS | EXACTNESS RADIX_R ;

// fragment NUM_2 : PREFIX_2 COMPLEX_2 ;
fragment COMPLEX_2 : REAL_2
  ('@' REAL_2
   | ('+' | '-') UREAL_2? I
   | INFNAN I )?
  | ('+' | '-') UREAL_2 I
  | INFNAN I
  | ('+' | '-') I ;
fragment REAL_2 : SIGN UREAL_2 | INFNAN ;
fragment UREAL_2 : UINTEGER_2 ('/' UINTEGER_2)? ;
fragment UINTEGER_2 : DIGIT_2+ ;
// fragment PREFIX_2 : RADIX_2 EXACTNESS? ;

// fragment NUM_8 : PREFIX_8 COMPLEX_8 ;
fragment COMPLEX_8 : REAL_8
  ('@' REAL_8
   | ('+' | '-') UREAL_8? I
   | INFNAN I )?
  | ('+' | '-') UREAL_8 I
  | INFNAN I
  | ('+' | '-') I ;
fragment REAL_8 : SIGN UREAL_8 | INFNAN ;
fragment UREAL_8 : UINTEGER_8 ('/' UINTEGER_8)? ;
fragment UINTEGER_8 : DIGIT_8+ ;
// fragment PREFIX_8 : RADIX_8 EXACTNESS? ;

// fragment NUM_10 : PREFIX_10 COMPLEX_10 ;
fragment COMPLEX_10 : REAL_10
  ('@' REAL_10
   | ('+' | '-') UREAL_10? I
   | INFNAN I )?
  | ('+' | '-') UREAL_10 I
  | INFNAN I
  | ('+' | '-') I ;
fragment REAL_10 : SIGN UREAL_10 | INFNAN ;
// UREAL_10 has to be handled carefully.  The spec has this:
//    fragment UREAL_10 : UINTEGER_10 ('/' UINTEGER_10)? | DECIMAL_10;
//    fragment DECIMAL_10 : UINTEGER_10 SUFFIX
//      | PERIOD DIGIT_10+ SUFFIX
//      | DIGIT_10+ PERIOD DIGIT_10* SUFFIX ;
// which is ambiguous; for UREAL_10, "1" can match branch 1 or 2.

fragment UREAL_10 :
  /* SUFFIX can be empty, so making it optional creates unnecessary ambiguity */
  UINTEGER_10 ('/' UINTEGER_10 | SUFFIX | PERIOD DIGIT_10* SUFFIX)
  | PERIOD DIGIT_10+ SUFFIX ;

fragment UINTEGER_10 : DIGIT_10+ ;
// fragment PREFIX_10 : RADIX_10 EXACTNESS? ;

// fragment NUM_16 : PREFIX_16 COMPLEX_16 ;
fragment COMPLEX_16 : REAL_16
  ('@' REAL_16
   | ('+' | '-') UREAL_16? I
   | INFNAN I )?
  | ('+' | '-') UREAL_16 I
  | INFNAN I
  | ('+' | '-') I ;
fragment REAL_16 : SIGN UREAL_16 | INFNAN ;
fragment UREAL_16 : UINTEGER_16 ('/' UINTEGER_16)? ;
fragment UINTEGER_16 : DIGIT_16+ ;
// fragment PREFIX_16 : RADIX_16 EXACTNESS? ;

fragment INFNAN : ('+' | '-') ('inf.0' | 'nan.0') ;

fragment SUFFIX : /*empty*/ | EXPONENT_MARKER SIGN DIGIT_10+ ;
fragment EXPONENT_MARKER : 'e' | 's' | 'f' | 'd' | 'l' |
                           'E' | 'S' | 'F' | 'D' | 'L' ;
fragment SIGN : /*empty*/ | '+' | '-' ;
// NOTE: This *requires* non-empty.
fragment EXACTNESS : '#i' | '#I' | '#e' | '#E' ;

fragment RADIX_2 : '#b' | '#B' ;
fragment RADIX_8 : '#o' | '#O' ;
// NOTE: This *requires* non-empty.
fragment RADIX_10 : '#d' | '#D';
fragment RADIX_16 : '#x' | '#X';
fragment DIGIT_2 : '0' | '1' ;
fragment DIGIT_8 : '0'..'7' ;
fragment DIGIT_10 : DIGIT ;
fragment DIGIT_16 : DIGIT_10 | 'a'..'f' | 'A'..'F';



// Define ERROR this way so we don't get a spurious ANTLR warning.
// This is used by the later "error" non-terminal.
fragment ERROR: ' ' ;


// PARSER SECTION

// Special non-terminals that act essentially as comments.
// They are used clarify the grammar meaning, as follows:
// empty : ;  // This used to identify an empty branch
//            // but doing that interfered with ANTLR debugging info
same  : ;  // Emphasizes where neither indent nor dedent has occurred
error : ERROR;  // Specifically identifies an error branch.

// Note that errors can occur elsewhere, and an implementation
// may include an extension where an error is noted in this grammar.
// However, the error non-terminal makes it clear where an action is
// not defined, indicates where a parser might specifically check for
// errors, and also acts as a check on the grammar itself (to ensure that
// there isn't some valid interpretation for that sequence at that point).

// Here we create some non-terminals with the same names as terminals,
// just lowercase instead of uppercase.
// These renames are completely unnecessasry, but they are helpful
// when using the ANTLR debugger.

// collecting_end :  COLLECTING_END;
// indent  : INDENT;
// dedent  : DEDENT;

// This BNF uses the following slightly complicated pattern in some places:
//   from_n_expr ((hspace+ (stuff {action1} | /*empty*/ {action2} ))
//                | /*empty*/                           {action2} )
// This is an expanded form of this BNF pattern (sans actions):
//   from_n_expr (hspace+ stuff?)?
// Note that this pattern quietly removes horizontal spaces at the
// end of the line correctly; that's important because you can't see them,
// so quietly handling them eliminates a source of hard-to-find and
// unnecessary errors.
// If from_n_expr (etc.) is as greedy as possible (it needs to be),
// we *could* instead accept this simpler BNF pattern:
//   from_n_expr hspace* stuff?
// but while that simpler BNF pattern would correctly accept *good* input,
// it would also accept *incorrect* input like "(x)q" or other n-expressions
// followed immediately by other n-expressions without intervening whitespace.
// We want to detect such situations as errors, so we'll use the
// more complex (and more persnickety) BNF pattern instead.

wspace  : hspace | ENCLOSED_EOL_CHAR | FF | VT
          | LCOMMENT ; // Separators inside (...) etc.

list_contents_real returns [Object v]
  : n1=n_expr
      (wspace+
        (lc=list_contents_real {$v = cons($n1.v, $lc.v);}
         | /*empty*/  {$v = list($n1.v);})
       | /*empty*/    {$v = list($n1.v);})
    | PERIOD
       (wspace+
         (n2=n_expr wspace* {$v = $n2.v;}
          | /*empty*/ {$v = list(".");})
       | /*empty*/ {$v = list(".");})
    ;

list_contents returns [Object v]
  : wspace*
   (list_contents_real {$v=$list_contents_real.v;}
    | /*empty*/ {$v = null;} ) ;

// The "greedy=true" option here forces n_expr_tail to look at the
// next character and forceably consume in *this* production if it's
// an opening paren, bracket, or brace.  Without this,
// ANTLR notices ambiguities and complains with reports like:
//  Decision can match input such as "LPAREN" using multiple alternatives: 1, 4
// This is because there are constructs such as:
//    #;a#|hi|#(x)
// which technically are ambiguous; do we mean "#;a" or "#;a(x)"?
// The option breaks the ambiguity.

n_expr_tail[Object prefix] returns [Object v]
    : (options {greedy=true;}
    : LPAREN c1=list_contents RPAREN r1=n_expr_tail[cons(prefix, $c1.v)]
      {$v = $r1.v;}
    | LBRACKET c2=list_contents RBRACKET
      r2=n_expr_tail[cons("$" + "bracket-apply" + "$", cons(prefix, $c2.v))]
      {$v = $r2.v;}
    | LBRACE   c3=list_contents RBRACE
      // Map f{} to (f) and not (f ()). Note that f{x} maps to (f x).
      r3=n_expr_tail[nullp($c3.v) ? list(prefix)
                                  : list(prefix, process_curly($c3.v))]
      {$v = $r3.v;}
    | /*empty*/
      {$v = prefix;}
    ) ;

vector returns [Object v]
  : VECTOR_START list_contents RPAREN
    {$v = cons("vector", $list_contents.v); } ;

// Currently return value ignored because simple_datum ignores it.
bytevector returns [Object v]
  : BYTEVECTOR_START list_contents RPAREN
    { $v = cons("bytevector", $list_contents.v); } ;

simple_datum   : BOOLEAN | NUMBER | CHARACTER | STRING | symbol | bytevector;
symbol : IDENTIFIER ;

compound_datum returns [Object v]
  : LPAREN norm=list_contents RPAREN {$v = $norm.v;}
  | LBRACKET bracketed=list_contents RBRACKET {$v = $bracketed.v;}
  | LBRACE braced=list_contents RBRACE {$v = process_curly($braced.v);}
  | vector {$v = $vector.v;} ;


// Production "n_expr_prefix" is an n-expression prefixes per SRFI-105:
n_expr_prefix returns [Object v]
  : simple_datum {$v = $simple_datum.text;}
  | compound_datum {$v = $compound_datum.v;} ;

// Production "n_expr_noabbrev" is an n-expression as defined in SRFI-105,
// but has no leading abbreviations (see below):
n_expr_noabbrev returns [Object v]
    : n_expr_prefix
      n_expr_tail[$n_expr_prefix.v] {$v = $n_expr_tail.v;} ;


// INCLUDE IN SRFI
// IMPORTANT SUPPORTING PARSER DEFINITIONS for the BNF

hspace  : SPACE | TAB ;        // horizontal space

// This is for handling non-first n-expressions in initial indent.
// An implementation MAY implement this as "(hspace | '!')+"
// (e.g., to avoid retaining state when returning a value).
hspaces_maybe_bang: hspace+ ;

// Production "abbrevw" is an abbreviation with a following whitespace:
abbrevw returns [Object v]
  : APOSW           {$v = "quote";}
  | QUASIQUOTEW     {$v = "quasiquote";}
  | UNQUOTE_SPLICEW {$v = "unquote-splicing";}
  | UNQUOTEW        {$v = "unquote";} ;

// Production "abbrev_no_w" is an abbreviation without a following whitespace:
abbrev_no_w returns [Object v]
  : APOS            {$v = "quote";}
  | QUASIQUOTE      {$v = "quasiquote";}
  | UNQUOTE_SPLICE  {$v = "unquote-splicing";}
  | UNQUOTE         {$v = "unquote";};

abbrev_all returns [Object v]
  : abbrevw         {$v = $abbrevw.v;}
  | abbrev_no_w     {$v = $abbrev_no_w.v;} ;

// Production "n_expr" is a full neoteric-expression as defined in SRFI-105.
// n_expr does *not* consume any following horizontal space.
// Uses "n_expr_noabbrev", an n-expression with no leading abbreviations:
n_expr returns [Object v]
 : abbrev_all n1=n_expr {$v = list($abbrev_all.v, $n1.v);}
 | n_expr_noabbrev      {$v = $n_expr_noabbrev.v;} ;

// Production "n_expr_first" is a neoteric-expression, but leading
// abbreviations cannot have an whitespace afterwards (used by "head"):
n_expr_first returns [Object v]
  : abbrev_no_w n1=n_expr_first {$v = list($abbrev_no_w.v, $n1.v);}
  | n_expr_noabbrev            {$v = $n_expr_noabbrev.v;} ;

// Production "scomment" (special comment) defines comments other than ";":
sharp_bang_comments : SRFI_22_COMMENT | SHARP_BANG_FILE
                      | SHARP_BANG_DIRECTIVE ;
scomment : BLOCK_COMMENT
         | DATUM_COMMENT_START (options {greedy=true;} : hspace)* n_expr
         | sharp_bang_comments ;

// Production "comment_eol" reads an optional ;-comment (if it exists),
// and then reads the end-of-line (EOL) sequence.  EOL processing consumes
// additional comment-only lines (if any) which may be indented.

comment_eol : LCOMMENT? EOL;


// KEY BNF PRODUCTIONS for sweet-expressions:

// Production "collecting_tail" returns a collecting list's contents.
// Precondition: At beginning of line.
// Postcondition: Consumed the matching COLLECTING_END.
// FF = formfeed (\f aka \u000c), VT = vertical tab (\v aka \u000b)

collecting_tail returns [Object v]
  : it_expr more=collecting_tail {$v = cons($it_expr.v, $more.v);}
  | comment_eol    retry1=collecting_tail {$v = $retry1.v;}
  | INITIAL_INDENT /* Initial indent only allowed for lcomment */
    LCOMMENT EOL   retry2=collecting_tail {$v = $retry2.v;}
  | (FF | VT)+ EOL retry3=collecting_tail {$v = $retry3.v;}
  | COLLECTING_END {$v = null;} ;

// Process line after ". hspace+" sequence.  Does not go past current line.
post_period returns [Object v]
  : scomment hspace* rpt=post_period {$v = $rpt.v;} // (scomment hspace*)*
    | pn=n_expr hspace* (scomment hspace*)* (n_expr error)? {$v = $pn.v;}
    | COLLECTING hspace* pc=collecting_tail hspace*
      (scomment hspace*)* (n_expr error)? {$v = $pc.v;}
    | /*empty*/ {$v = ".";} ;

// Production "head" reads 1+ n-expressions on one line; it will
// return the list of n-expressions on the line.  If there is one n-expression
// on the line, it returns a list of exactly one item; this makes it
// easy to append to later (if appropriate).  In some cases, we want
// single items to be themselves, not in a list; function monify does this.
// The "head" production never reads beyond the current line
// (except within a block comment), so it doesn't need to keep track
// of indentation, and indentation will NOT change within head.
// The "head" production only directly handles the first n-expression on the
// line, and then calls on "rest" to process the rest (if any); we do this
// because in a few cases it matters if an expression is the first one.
// Callers can depend on "head" and "rest" *not* changing indentation.
// On entry, all indentation/hspace must have already been read.
// On return, it will have consumed all hspace (spaces and tabs).

// Precondition: At beginning of line+indent
// Postcondition: At unconsumed EOL

head returns [Object v]
  : PERIOD /* Leading ".": escape following datum like an n-expression. */
      (hspace+ pp=post_period {$v = list($pp.v);}
       | /*empty*/    {$v = list(".");} )
  | COLLECTING hspace* collecting_tail hspace*
      (rr=rest            {$v = cons($collecting_tail.v, $rr.v); }
       | /*empty*/        {$v = list($collecting_tail.v); } )
  | basic=n_expr_first /* Only match n_expr_first */
      ((hspace+ (br=rest  {$v = cons($basic.v, $br.v);}
                 | /*empty*/  {$v = list($basic.v);} ))
       | /*empty*/            {$v = list($basic.v);} ) ;

// Production "rest" production reads the rest of the expressions on a line
// (the "rest of the head"), after the first expression of the line.
// Like head, it consumes any hspace before it returns.
// The "rest" production is written this way so a non-tokenizing
// implementation can read an expression specially. E.G., if it sees a period,
// read the expression directly and then see if it's just a period.
// Precondition: At beginning of non-first expression on line (past hspace)
// Postcondition: At unconsumed EOL

rest returns [Object v]
  : PERIOD /* Improper list */
      (hspace+  pp=post_period {$v = $pp.v;}
       | /*empty*/   {$v = list(".");})
  | scomment hspace* (sr=rest {$v = $sr.v;} | /*empty*/ {$v = null;} )
  | COLLECTING hspace* collecting_tail hspace*
    (rr=rest             {$v = cons($collecting_tail.v, $rr.v);}
     | /*empty*/             {$v = list($collecting_tail.v);} )
  | basic=n_expr
      ((hspace+ (br=rest {$v = cons($basic.v, $br.v);}
                 | /*empty*/ {$v = list($basic.v);} ))
       | /*empty*/           {$v = list($basic.v);} ) ;

// Production "body" handles the sequence of 1+ child lines in an it_expr
// (e.g., after a "head"), each of which is itself an it_expr.
// It returns the list of expressions in the body.
// Note that an it-expr will consume any line comments or hspaces
// before it returns back to the "body" production.
// Since (list x) is simply (cons x '()), this production always does a
// cons of the first it_expr and another body [if it exists] or '() [if not].

body returns [Object v]
  : i=it_expr
     (same
       ( {isperiodp($i.v)}? => f=it_expr DEDENT
           {$v = $f.v;} // Improper list final value
       | {! isperiodp($i.v)}? => nxt=body
           {$v = cons($i.v, $nxt.v);} )
     | DEDENT {$v = list($i.v);} ) ;

// Production "it_expr" (indented sweet-expressions)
// is the main production for sweet-expressions in the usual case.
// Precondition: At beginning of line+indent
// Postcondition: it-expr ended by consuming EOL + examining indent
// Note: This BNF presumes that "*>" generates multiple tokens,
// "EOL DEDENT* COLLECTING_END", and resets the indentation list.
// You can change the BNF below to allow "head /*empty*/", and handle dedents
// by directly comparing values; then "*>" only needs to generate
// COLLECTING_END. But this creates a bunch of ambiguities
// like a 'dangling else', which must all be disambiguated by accepting
// the first or the longer sequence first.  Either approach is needed to
// support "*>" as the non-first element so that the "head" can end
// without a literal EOL, e.g., as in "let <* y 5 *>".

it_expr returns [Object v]
  : head
    (options {greedy=true;} : (
     GROUP_SPLIT hspace* /* Not initial; interpret as split */
      (options {greedy=true;} :
        // To allow \\ EOL as line-continuation, instead do:
        //   comment_eol same more=it_expr {$v = append($head.v, $more.v);}
        comment_eol error
        | /*empty*/ {$v = monify($head.v);} )
     | SUBLIST hspace* /* head SUBLIST ... case */
       (sub_i=it_expr {$v=append($head.v, list($sub_i.v));}
        | comment_eol error )
     | comment_eol // Normal case, handle child lines if any:
       (INDENT children=body {$v = append($head.v, $children.v);}
        | /*empty*/          {$v = monify($head.v);} /* No child lines */ )
    // If COLLECTING_END doesn't generate multiple tokens, can do:
    // | /*empty*/           {$v = monify($head.v);}
     ))
  | (GROUP_SPLIT | scomment) hspace* /* Initial; Interpet as group */
      (group_i=it_expr {$v = $group_i.v;} /* Ignore initial GROUP/scomment */
       | comment_eol
         (INDENT g_body=body {$v = $g_body.v;} /* Normal GROUP use */
          | same ( g_i=it_expr {$v = $g_i.v;} /* Plausible separator */
                   /* Handle #!sweet EOL EOL t_expr */
                   | comment_eol restart=t_expr {$v = $restart.v;} )
          | DEDENT error ))
  | SUBLIST hspace* /* "$" first on line */
    (is_i=it_expr {$v=list($is_i.v);}
     | comment_eol error )
  | abbrevw hspace*
      (comment_eol INDENT ab=body
         {$v = append(list($abbrevw.v), $ab.v);}
       | ai=it_expr
         {$v=list($abbrevw.v, $ai.v);} ) ;

// Production "t_expr" is the top-level production for sweet-expressions.
// This production handles special cases, then in the normal case
// drops to the it_expr production.
// Precondition: At beginning of line
// Postcondition: At beginning of line

// The rule for "indent processing disabled on initial top-level hspace"
// is a very simple (and clever) BNF construction by Alan Manuel K. Gloria.
// If there is an indent it simply reads a single n-expression and returns.
// If there is more than one on an initially-indented line, the later
// horizontal space will not have have been read, so this production will
// fire again on the next invocation, doing the right thing.

t_expr returns [Object v]
  : comment_eol    retry1=t_expr {$v=$retry1.v;}
  | (FF | VT)+ EOL retry2=t_expr {$v=$retry2.v;}
  | (INITIAL_INDENT | hspaces_maybe_bang) /* initial indent */
    (n_expr {$v = $n_expr.v;}
     | (scomment (options {greedy=true;} : hspace)*
       sretry=t_expr {$v=$sretry.v;})
     | comment_eol retry3=t_expr {$v=$retry3.v;} )
  | EOF {generate_eof();} /* End of file */
  | it_expr {$v = $it_expr.v;} /* Normal case */ ;