* Copyright (C) 2010 Robert Futrell
* robert_futrell at users.sourceforge.net
* http://fifesoft.com/rsyntaxtextarea
* This library is distributed under a modified BSD license. See the included
* RSTALanguageSupport.License.txt file for details.
package org.fife.rsta.ac.java.rjc.lexer;
import java.io.EOFException;
import java.io.IOException;
import javax.swing.text.BadLocationException;
import javax.swing.text.Document;
import javax.swing.text.Position;
* A scanner that allows the user to "push back" tokens. This scanner
* allows arbitrary lookahead.
private static final boolean DEBUG = false;
* The scanner we delegate to.
private SourceCodeScanner s;
* Stack of tokens that have been "pushed back."
* The depth in which we're in TypeArguments or TypeParameters.
private int typeArgLevel;
* If we are parsing text in a Swing <code>JTextComponent</code>, this
* should be the document of that component.
* The most recently lexed token, or <code>null</code> if EOS was
private Token mostRecentToken;
* Constructor. This scanner will return no tokens unless some are pushed
* onto it via {@link #yyPushback(Token)}.
* Constructor. This scanner will only return those tokens pushed onto it.
* @param tokens Tokens to return.
public Scanner(List tokens) {
for (int i=tokens.size()-1; i>=0; i--) {
stack.push(tokens.get(i));
* @param r The stream to read from.
public Scanner(Reader r) {
s = r!=null ? new SourceCodeScanner(r) : null;
s.setKeepLastDocComment(true);
* This method is just here for debugging purposes to make sure
* @param t A token to push onto the stack (non-<code>null</code>).
private void pushOntoStack(Token t) {
if (t!=null && !stack.isEmpty() && t.equals(stack.peek())) {
System.err.println("ERROR: Token being duplicated: " + t);
System.err.println("ERROR: null token pushed onto stack");
* Decreases the depth in which we're in TypeArguments or TypeParameters.
* @see #increaseTypeArgumentsLevel()
* @see #getTypeArgumentsLevel()
public void decreaseTypeArgumentsLevel() {
throw new InternalError("typeArgLevel dipped below 0");
* Returns an offset into the source being parsed. This offset will be
* tracked if we are parsing code from a Swing <code>JTextComponent</code>.
* @param offs The offset.
* @return An object representing the offset.
* @see #setDocument(Document)
public Offset createOffset(final int offs) {
return new DocumentOffset(doc.createPosition(offs));
} catch (BadLocationException ble) { // Should never happen
private void debugPrintToken(Token t) {
System.out.println("... null");
System.out.println("... " + t);
* Returns the current column into the current line.
* @return The current column.
* Returns the last documentation comment parsed. The "last documentation
* comment" is cleared when this method returns.
* @return The last documentation comment parsed, or <code>null</code>
public String getLastDocComment() {
return s.getLastDocComment();
* Returns the current line into the document.
* @return The current line.
* Returns the most recently-lexed token.
* @return The token, or <code>null</code> if EOS was reached.
public Token getMostRecentToken() {
* Returns the current offset into the document.
* Eats through (possibly nested) paren pairs, e.g.:
* <pre>(int i=0; i<getFoo(getParam()); i++)</pre>.
* Blocks nested inside the paren pairs are also skipped.
* @throws IOException If an IO error occurs.
* @throws InternalError If the next token is not a '('.
public void eatParenPairs() throws IOException {
if (t==null || t.getType()!=TokenTypes.SEPARATOR_LPAREN) {
throw new InternalError("'(' expected, found: " + t);
while ((t=yylex())!=null) {
case TokenTypes.SEPARATOR_LBRACE:
case TokenTypes.SEPARATOR_RBRACE:
blockDepth = Math.max(blockDepth-1, 0);
case TokenTypes.SEPARATOR_LPAREN:
case TokenTypes.SEPARATOR_RPAREN:
if (blockDepth==0 && --parenDepth == 0) {
* Eats all tokens up to (and including) the next token of the specified
* type. This is useful, for example, to eat until the next semicolon.
* @param tokenType The type of token to eat through.
* @throws IOException If an IO error occurs.
public void eatThroughNext(int tokenType) throws IOException {
while ((t=yylex())!=null && t.getType()!=tokenType);
* Eats all tokens up to (and including) the next token of the specified
* type. This is useful, for example, to eat until the next semicolon.
* @param tokenType The type of token to eat through.
* @throws IOException If an IO error occurs.
* @see #eatThroughNextSkippingBlocks(int, int)
* @see #eatThroughNextSkippingBlocksAndStuffInParens(int, int)
public void eatThroughNextSkippingBlocks(int tokenType) throws IOException {
while ((t=yylex())!=null) {
if (type==TokenTypes.SEPARATOR_LBRACE) {
else if (type==TokenTypes.SEPARATOR_RBRACE) {
else if (type==tokenType) {
* Eats all tokens up to (and including) the next token of one of the
* specified types. This is useful, for example, to eat until the next
* equal sign or semicolon.
* @param tokenType1 The type of token to eat through.
* @param tokenType2 Another type of token to eat through.
* @return The last token read. This will either be one of the two token
* types passed in, or <code>null</code> if the end of the stream
* @throws IOException If an IO error occurs.
* @see #eatThroughNextSkippingBlocksAndStuffInParens(int, int)
public Token eatThroughNextSkippingBlocks(int tokenType1,
int tokenType2) throws IOException {
while ((t=yylex())!=null) {
if (type==TokenTypes.SEPARATOR_LBRACE) {
else if (type==TokenTypes.SEPARATOR_RBRACE) {
else if (type==tokenType1 || type==tokenType2) {
* Eats all tokens up to (and including) the next token of one of the
* specified types. This is useful, for example, to eat until the next
* equal sign or semicolon.
* @param tokenType1 The type of token to eat through.
* @param tokenType2 Another type of token to eat through.
* @return The last token read. This will either be one of the two token
* types passed in, or <code>null</code> if the end of the stream
* @throws IOException If an IO error occurs.
* @see #eatThroughNextSkippingBlocks(int, int)
public Token eatThroughNextSkippingBlocksAndStuffInParens(int tokenType1,
int tokenType2) throws IOException {
while ((t=yylex())!=null) {
case TokenTypes.SEPARATOR_LBRACE:
case TokenTypes.SEPARATOR_RBRACE:
case TokenTypes.SEPARATOR_LPAREN:
case TokenTypes.SEPARATOR_RPAREN:
if (type==tokenType1 || type==tokenType2) {
if (blockDepth<=0 && parenDepth<=0) {
public void eatUntilNext(int type1, int type2) throws IOException {
while ((t=yylex())!=null) {
if (type==type1 || type==type2) {
public void eatUntilNext(int type1, int type2, int type3) throws IOException {
while ((t=yylex())!=null) {
if (type==type1 || type==type2 || type==type3) {
* Returns the current TypeArgument/TypeParameter level.
* @return The current level.
* @see #increaseTypeArgumentsLevel()
* @see #decreaseTypeArgumentsLevel()
public int getTypeArgumentsLevel() {
* Increases the depth in which we're in TypeArguments or TypeParameters.
* @see #decreaseTypeArgumentsLevel()
* @see #getTypeArgumentsLevel()
public void increaseTypeArgumentsLevel() {
private Stack resetPositions;
private Stack currentResetTokenStack;
private int currentResetStartOffset;
public void markResetPosition() {
if (s!=null) { // Hack! We should really do something for token-only scanners
if (resetPositions==null) {
resetPositions = new Stack();
currentResetTokenStack = new Stack();
resetPositions.push(currentResetTokenStack);
currentResetStartOffset = s.getOffset();
public void resetToLastMarkedPosition() {
if (s!=null) { // Hack! We should really do something for token-only scanners
if (currentResetTokenStack==null) {
throw new InternalError("No resetTokenStack!");
// Remove tokens off the standard stack within the "marked" range
while (!stack.isEmpty()) {
Token t = (Token)stack.peek();
if (t.getOffset()>=currentResetStartOffset) {
// Add all tokens in the "marked" range to our stack
while (!currentResetTokenStack.isEmpty()) {
Token t = (Token)currentResetTokenStack.pop();
resetPositions.pop(); // Remote currentResetTokenStack
currentResetTokenStack = resetPositions.isEmpty() ? null : (Stack)resetPositions.peek();
currentResetStartOffset = -1;
public void clearResetPosition() {
if (s!=null) { // Hack! We should really do something for token-only scanners
if (currentResetTokenStack==null) {
throw new InternalError("No resetTokenStack!");
resetPositions.pop(); // Remote currentResetTokenStack
currentResetTokenStack = resetPositions.isEmpty() ? null : (Stack)resetPositions.peek();
currentResetStartOffset = -1;
* Sets the Swing <code>Document</code> whose content is being parsed.
* This method should be called if we are parsing code inside a
* <code>JTextComponent</code>, as it will help our parsed code to track
* changes when the document is modified. If we are parsing source from a
* flat file, this method shouldn't be called.
* @param doc The document being parsed.
public void setDocument(Document doc) {
* Skips all bracket pairs ('[' followed by ']') in the stream.
* @return The number of bracket pairs skipped.
* @throws IOException If an IO error occurs.
public int skipBracketPairs() throws IOException {
while (yyPeekCheckType()==TokenTypes.SEPARATOR_LBRACKET &&
yyPeekCheckType(2)==TokenTypes.SEPARATOR_RBRACKET) {
* Returns the next token from the input stream.
* @return The next token.
* @throws IOException If an IO error occurs.
* NOTE: All other lex'ing methods should call into this one.
public Token yylex() throws IOException {
t = s!=null ? s.yylex() : null;
// If we have nested TypeArguments ("Set<Map.Entry<String,String>>"),
// Prevent the ">>" from coming across as a single token.
if (typeArgLevel>0 && t!=null && t.isOperator()) {
String lexeme = t.getLexeme();
char ch = lexeme.charAt(0);
case TokenTypes.OPERATOR_LTE:
rest = new TokenImpl(Token.OPERATOR_EQUALS, "=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
case TokenTypes.OPERATOR_LSHIFT:
rest = new TokenImpl(Token.OPERATOR_LT, "<",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
case TokenTypes.OPERATOR_LSHIFT_EQUALS:
rest = new TokenImpl(Token.OPERATOR_LTE, "<=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
t = new TokenImpl(Token.OPERATOR_LT, "<",
t.getLine(), t.getColumn(), t.getOffset());
case TokenTypes.OPERATOR_GTE:
rest = new TokenImpl(Token.OPERATOR_EQUALS, "=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
case TokenTypes.OPERATOR_RSHIFT:
rest = new TokenImpl(Token.OPERATOR_GT, ">",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
case TokenTypes.OPERATOR_RSHIFT2:
rest = new TokenImpl(Token.OPERATOR_RSHIFT, ">>",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
case TokenTypes.OPERATOR_RSHIFT_EQUALS:
rest = new TokenImpl(Token.OPERATOR_GTE, ">=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
case TokenTypes.OPERATOR_RSHIFT2_EQUALS:
rest = new TokenImpl(Token.OPERATOR_RSHIFT_EQUALS, ">>=",
t.getLine(), t.getColumn()+1, t.getOffset()+1);
t = new TokenImpl(Token.OPERATOR_GT, ">",
t.getLine(), t.getColumn(), t.getOffset());
if (currentResetTokenStack!=null) {
currentResetTokenStack.push(t);
if (t!=null) { // Don't let EOS corrupt most recent token
* Returns the next token from the input stream, or throws an exception
* if the end of stream is reached.
* @param error The error description for the exception if the end of
* @throws IOException If an IO error occurs or the end of stream is
public Token yylexNonNull(String error) throws IOException {
throw new EOFException(error);
* Returns the next token from the input stream, or throws an exception
* if the end of stream is reached or if the token is not of a given
* @param type The type the token must be.
* @param error The error description for the exception if the end of
* stream is reached, or if the token is of an unexpected type.
* @throws IOException If an IO error occurs or the end of stream is
* reached, or if the token is of the wrong type.
public Token yylexNonNull(int type, String error) throws IOException {
return yylexNonNull(type, -1, error);
* Returns the next token from the input stream, or throws an exception
* if the end of stream is reached or if the token is not of two given
* @param type1 One type the token can be.
* @param type2 Another type the token can be, or <tt>-1</tt> if we
* should only check against <tt>type1</tt>.
* @param error The error description for the exception if the end of
* stream is reached, or if the token is of an unexpected type.
* @throws IOException If an IO error occurs or the end of stream is
* reached, or if the token is of a wrong type.
public Token yylexNonNull(int type1, int type2, String error)
return yylexNonNull(type1, type2, -1, error);
* Returns the next token from the input stream, or throws an exception
* if the end of stream is reached or if the token is not of three given
* @param type1 One type the token can be.
* @param type2 Another type the token can be, or <tt>-1</tt> if we
* should only check against <tt>type1</tt>.
* @param type3 Another type the token can be, or <tt>-1</tt> if we
* should only check against <tt>type1</tt> and <tt>type2</tt>.
* @param error The error description for the exception if the end of
* stream is reached, or if the token is of an unexpected type.
* @throws IOException If an IO error occurs or the end of stream is
* reached, or if the token is of a wrong type.
public Token yylexNonNull(int type1, int type2, int type3, String error)
throw new IOException(error);
if (t.getType()!=type1 && (type2==-1 || t.getType()!=type2) &&
(type3==-1 || t.getType()!=type3)) {
throw new IOException(error + ", found '" + t.getLexeme() + "'");
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
* @return The next token.
* @throws IOException If an IO error occurs.
public Token yyPeek() throws IOException {
* Returns the <tt>depth</tt>-th token, but does not anything off of the
* stream. This is useful for lookahead.
* @param depth The token to peek at, from <tt>1</tt> forward.
* @return The token, or <code>null</code> if that token index is past the
* @throws IOException If an IO error occurs.
public Token yyPeek(int depth) throws IOException {
throw new IllegalArgumentException("depth must be >= 1");
Stack read = new Stack();
for (int i=0; i<depth; i++) {
while (!read.isEmpty()) {
yyPushback((Token)read.pop());
Token t = (Token)read.peek();
while (!read.isEmpty()) {
yyPushback((Token)read.pop());
* Peeks at and returns the type of the next token on the stream.
* @return The type of the next token, or <tt>-1</tt> if the end of stream
* @throws IOException If an IO error occurs.
public int yyPeekCheckType() throws IOException {
return t!=null ? t.getType() : -1;
* Peeks at and returns the type of the specified token on the stream.
* @param index The index of the token to retrieve.
* @return The type of the token, or <tt>-1</tt> if the end of stream
* @throws IOException If an IO error occurs.
public int yyPeekCheckType(int index) throws IOException {
return t!=null ? t.getType() : -1;
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
* @return The next token.
* @throws IOException If an IO error occurs.
public Token yyPeekNonNull(String error) throws IOException {
throw new IOException(error);
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
* @param type The type the token must be.
* @return The next token.
* @throws IOException If an IO error occurs, or if EOS is reached, or
* if the token is not of the specified type.
public Token yyPeekNonNull(int type, String error) throws IOException {
return yyPeekNonNull(type, -1, error);
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
* @param type1 One of the two types the token must be.
* @param type2 The other of the two types the token must be.
* @return The next token.
* @throws IOException If an IO error occurs, or if EOS is reached, or
* if the token is not of the specified type.
public Token yyPeekNonNull(int type1, int type2, String error)
return yyPeekNonNull(type1, type2, -1, error);
* Returns the next token, but does not take it off of the stream. This
* is useful for lookahead.
* @param type1 One of the three types the token must be.
* @param type2 Another of the three types the token must be.
* @param type3 The third of the types the token must be.
* @return The next token.
* @throws IOException If an IO error occurs, or if EOS is reached, or
* if the token is not of the specified type.
public Token yyPeekNonNull(int type1, int type2, int type3, String error)
throw new IOException(error);
if (t.getType()!=type1 && (type2==-1 || t.getType()!=type2) &&
(type3==-1 || t.getType()!=type3)) {
throw new IOException(error + ", found '" + t.getLexeme() + "'");
* Pushes a token back onto the stream.
public void yyPushback(Token t) {
private class DocumentOffset implements Offset {
public DocumentOffset(Position pos) {