/*       Copyright (c) Eric Ledoux.  All rights reserved.       */
/* See http://www.dwell.net/terms for code sharing information. */

// TokenParser.cs
//
// Implements class TokenParser and related types.
//

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using DwellNet.CodeDoc;
using CodeDocApi.Properties;

namespace DwellNet.CodeDoc
{

TokenParser Class

Parses Token objects from a TextReader.

internal abstract class TokenParser
{
    //////////////////////////////////////////////////////////////////////////
    // Protected Constants & Fields
    //

    
TokenParser.m_sourceFileLabel Field

A label (e.g. file name) to use for the source file for error reporting purposes.

    protected string m_sourceFileLabel;

    
TokenParser.m_sourceFileReader Field

The stream to parse.

    protected TextReader m_sourceFileReader;

    
TokenParser.m_lastCharReadFromStream Field

The last character read from m_sourceFileReader. Set to 0 before the first call to m_sourceFileReader.Read.

    protected int m_lastCharReadFromStream = 0;

    
TokenParser.TabWidth Field

Width of a tab stop.

    protected const int TabWidth = 4;

    
TokenParser.m_columnIndex Field

Zero-based column number, after expansion of tabs.

    protected int m_columnIndex;

    
TokenParser.m_lineIndex Field

Zero-based line number.

    protected int m_lineIndex;

    
TokenParser.m_tokenBuffer Field

Buffer to read a token into.

    protected StringBuilder m_tokenBuffer = new StringBuilder(200);

    
TokenParser.m_char1 Field

The next character in the stream.

    protected int m_char1;

    
TokenParser.m_char2 Field

The character in the stream after m_char1.

    protected int m_char2;

    
TokenParser.m_char3 Field

The character in the stream after m_char2.

    protected int m_char3;

    
TokenParser.m_tokenQueue Field

Contains zero or more tokens that were "prematurely" parsed and need to be returned from the next call(s) to ParseToken.

    protected Queue<Token> m_tokenQueue = new Queue<Token>(10);

    ////////////////////////////////////////////////////////////////////////
    // Public Properties
    //

    
TokenParser.CurrentLineNumber Property

Gets the 1-based line number of the next token to parse.

    public int CurrentLineNumber
    {
        get
        {
            return m_lineIndex + 1;
        }
    }

    //////////////////////////////////////////////////////////////////////////
    // Internal Methods
    //

    
TokenParser Constructor

Initializes an instance of this class.

Parameters

sourceFileLabel

A label (e.g. file name) to use for the source file for error reporting purposes.

sourceFileReader

The stream onto the source file to parse.

    internal TokenParser(string sourceFileLabel, TextReader sourceFileReader)
    {
        m_sourceFileLabel = sourceFileLabel;
        m_sourceFileReader = sourceFileReader;
        m_char1 = ReadCharFromStream();
        m_char2 = ReadCharFromStream();
        m_char3 = ReadCharFromStream();
    }

    
TokenParser.ParseToken Method

Parses one token from a TextReader.

Return Value

One token, or null if we've reached the end of the input stream.

    internal abstract Token ParseToken();

    
TokenParser.ParseWord Method

If m_char1 is positioned at the beginning of a "word" (i.e. an identifier, reserved word, numeric literal, etc.), parse and return the literal. On exit, m_char1 is positioned at the first character after the "word". null is returned if a "word" isn't next in the stream.

    internal abstract Token ParseWord();

    //////////////////////////////////////////////////////////////////////////
    // Protected Methods
    //

    
TokenParser.ReadWhiteSpace Method

Reads zero or more spaces and tabs starting at m_char1, and returns the count of equivalent spaces (after expanding tabs to spaces). On exit, m_char1 is the first non-tab/space character.

    protected int ReadWhiteSpace()
    {
        int blanks = 0;
        while (true)
        {
            if ((m_char1 == ' ') || (m_char1 == '\t'))
                blanks += Advance(false);
            else
                break;
        }

        return blanks;
    }

    
TokenParser.ParseNewline Method

If m_char1 is positioned at a '\r', parse and return the newline; if '\r' is followed by '\n', the '\n' is parsed too. If m_char1 is positioned on a '\n', it is parsed on its own as a newline. On exit, m_char1 is positioned at the first character after the newline. null is returned if a newline isn't next in the stream.

Parameters

returnToken

If true, a newline Token is returned. If false, the newline sequence is parsed and internal state is updated but no Token is returned -- instead, '\n' is appended to m_tokenBuffer.

    protected Token ParseNewline(bool returnToken)
    {
        // set <lineNumber> to the 1-based number of the line that this newline
        // ends (if it is a newline)
        int lineNumber = CurrentLineNumber;

        // check if this is a CR, LF, or CRLF sequence
        if ((m_char1 != '\r') && (m_char1 != '\n'))
            return null;

        // parse the CRLF
        Advance(!returnToken);

        // return a token, or not, depending on <returnToken>
        if (returnToken)
            return new NewlineToken(lineNumber);
        else
            return null;
    }

    
TokenParser.Advance Method

Advances one character in the input stream. The old value of m_char1 is discarded; the old value of m_char2 is moved into m_char1; and so on; m_char3 is set to a new character from the stream (or -1 if there are no more characters).

Parameters

appendToTokenBuffer

If true, the old value of m_char1 is appended to m_tokenBuffer.

Return Value

The number of output character positions. For non-tab characters, 1 is returned. For tab characters, the number of equivalent spaces is returned.

Exceptions
Exception type Condition
EndOfStreamException

m_char1 is at the end of the stream (i.e. -1) on entry.

Remarks

Tabs are expanded, so if appendToTokenBuffer is true then multiple spaces may be written to m_tokenBuffer. Also, CRLF sequences are converted to "\n", so it's possible Advance may advance more than one character position in the input stream.

    protected int Advance(bool appendToTokenBuffer)
    {
        // keep track of the number of output character positions
        int result = 0;

        // increment <m_columnIndex> and optionally append <m_char1> to
        // <m_tokenBuffer>; expand tabs in the process
        bool skip2 = false; // true to advance two characters
        if (m_char1 < 0)
        {
            // end of stream -- this is an error
            throw new EndOfStreamException();
        }
        else
        if (m_char1 == '\t')
        {
            // tab
            while (true)
            {
                m_columnIndex++;
                result++;
                if (appendToTokenBuffer)
                    m_tokenBuffer.Append(' ');
                if ((m_columnIndex % TabWidth) == 0)
                    break;
            }
        }
        else
        if (m_char1 == '\r')
        {
            // CRLF newline
            m_columnIndex = 0;
            m_lineIndex++;
            if (m_char2 == '\n')
                skip2 = true; // skip past full CRLF
            if (appendToTokenBuffer)
                m_tokenBuffer.Append('\n');
            result++;
        }
        else
        if (m_char1 == '\n')
        {
            // LF-only newline
            m_columnIndex = 0;
            m_lineIndex++;
            if (appendToTokenBuffer)
                m_tokenBuffer.Append('\n');
            result++;
        }
        else
        {
            // other character
            m_columnIndex++;
            result++;
            if (appendToTokenBuffer)
                m_tokenBuffer.Append((char) m_char1);
        }

        // shift: <m_char1> <-- <m_char2> <-- <m_char3> <-- new character;
        // if <skip2> was set above, do it twice
        if (skip2)
        {
            m_char1 = m_char3;
            m_char2 = ReadCharFromStream();
            m_char3 = ReadCharFromStream();
        }
        else
        {
            m_char1 = m_char2;
            m_char2 = m_char3;
            m_char3 = ReadCharFromStream();
        }

        return result;
    }

    
TokenParser.Advance2 Method

Advances two characters in the input stream. The old values of m_char1 and m_char2 are discarded; the old value of m_char3 is moved into m_char1; and so on; m_char2 and m_char3 are set to new characters from the stream (or -1 if there are no more characters).

Parameters

appendToTokenBuffer

If true, the old values of m_char1 and m_char2 are appended to m_tokenBuffer.

Remarks

Tabs are expanded, so if appendToTokenBuffer is true then multiple spaces may be written to m_tokenBuffer. Also, CRLF sequences are converted to "\n", so it's possible Advance2 may advance more than two character positions in the input stream.

    protected void Advance2(bool appendToTokenBuffer)
    {
        Advance(appendToTokenBuffer);
        Advance(appendToTokenBuffer);
    }

    
TokenParser.ReadCharFromStream Method

Reads and returns a character from m_sourceFileReader. Returns -1 if the end of the file has been reached. If the last character of the file isn't a newline, one is added "virtually".

    protected int ReadCharFromStream()
    {
        // check if we've already reached the end of the stream
        if (m_lastCharReadFromStream < 0)
            return -1;

        // read the next character; if we reach the end of the stream and the
        // last character isn't a newline, add one "virtually" -- in fact, we
        // add a "\r" because a "\r" alone is treated the same as "\r\n" by
        // all other code in this class
        int iChar = m_sourceFileReader.Read();
        if (iChar < 0)
        {
            if (m_lastCharReadFromStream != '\n')
            {
                m_lastCharReadFromStream = '\n';
                return '\r';
            }
            else
                return -1;
        }
        else
        {
            m_lastCharReadFromStream = iChar;
            return iChar;
        }
    }

    
TokenParser.AppendToToken Method (char)

Appends a character to m_tokenBuffer.

Parameters

ch

The character to append.

Remarks

The primary purpose of this method is to avoid casting errors; for example, m_tokenBuffer.Append(m_char1) would append an integer instead of a character to m_tokenBuffer.

    protected void AppendToToken(char ch)
    {
        m_tokenBuffer.Append(ch);
    }

    
TokenParser.AppendToToken Method (int)

Appends a character to m_tokenBuffer.

Parameters

iChar

The character to append.

Remarks

The primary purpose of this method is to avoid casting errors; for example, m_tokenBuffer.Append(m_char1) would append an integer instead of a character to m_tokenBuffer.

    protected void AppendToToken(int iChar)
    {
        Debug.Assert(iChar >= 0);
        m_tokenBuffer.Append((char) iChar);
    }

    
TokenParser.IsSign Method

Returns true iff a given character is '+' or '-'.

Parameters

iChar

The character to check.

    protected static bool IsSign(int iChar)
    {
        return (iChar == '+') || (iChar == '-');
    }

    
TokenParser.IsDigit Method

Returns true iff a given character is a decimal digit.

Parameters

iChar

The character to check.

    protected static bool IsDigit(int iChar)
    {
        return ((iChar >= '0') && (iChar <= '9'));
    }

    
TokenParser.NewParsingException Method

Constructs and returns a new ParsingException referring to the current source file and line number.

Parameters

format

A formatting string for an error message to include with the exception.

args

Formatting arguments for the error message.

    protected ParsingException NewParsingException(string format,
        params object[] args)
    {
        return new ParsingException(m_sourceFileLabel, m_lineIndex + 1,
            format, args);
    }
}

ParsingException Class

Indicates an error parsing the source code.

public class ParsingException : Exception
{
    //////////////////////////////////////////////////////////////////////////
    // Private Fields
    //

    
ParsingException.m_fileLabel Field

Holds the value of the FileLabel property.

    [DebuggerBrowsable(DebuggerBrowsableState.Never)]
    string m_fileLabel;

    
ParsingException.m_lineNumber Field

Holds the value of the LineNumber property.

    [DebuggerBrowsable(DebuggerBrowsableState.Never)]
    int m_lineNumber;

    
ParsingException.m_baseMessage Field

Holds the value of the BaseMessage property.

    [DebuggerBrowsable(DebuggerBrowsableState.Never)]
    string m_baseMessage;

    //////////////////////////////////////////////////////////////////////////
    // Public Properties
    //

    
ParsingException.FileLabel Property

A label referring to the source code file being parsed. This is often a path to the file, if such a path is available.

    public string FileLabel
    {
        [DebuggerStepThrough]
        get
        {
            return m_fileLabel;
        }
    }

    
ParsingException.LineNumber Property

The one-based line number of the line within the source file that contains the error.

    public int LineNumber
    {
        [DebuggerStepThrough]
        get
        {
            return m_lineNumber;
        }
    }

    
ParsingException.BaseMessage Property

The error message, not including FileLabel and LineNumber.

    public string BaseMessage
    {
        [DebuggerStepThrough]
        get
        {
            return m_baseMessage;
        }
    }

    
ParsingException.WarningMessage Property

The error message, including FileLabel and LineNumber and the word "Warning" (or a localized equivalent).

    public string WarningMessage
    {
        [DebuggerStepThrough]
        get
        {
            return String.Format(Resources.Warning3, FileLabel, LineNumber,
                BaseMessage);
        }
    }


    
ParsingException Constructor

Initializes an instance of this class.

Parameters

fileLabel

A label referring to the source code file being parsed.

lineNumber

The one-based line number of the line within the source file that contains the error.

format

A formatting string for an error message to include with the exception.

args

Formatting arguments for the error message.

    public ParsingException(string fileLabel, int lineNumber, string format,
            params object[] args) :
        base(String.Format("{0}({1}): {2}", fileLabel, lineNumber,
            String.Format(format, args)))
    {
        m_fileLabel = fileLabel;
        m_lineNumber = lineNumber;
        m_baseMessage = String.Format(format, args);
    }
}

}