/* Copyright (c) Eric Ledoux. All rights reserved. */ /* See http://www.dwell.net/terms for code sharing information. */ // TokenParser.cs // // Implements class TokenParser and related types. // using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Text; using System.Text.RegularExpressions; using DwellNet.CodeDoc; using CodeDocApi.Properties; namespace DwellNet.CodeDoc {
internal abstract class TokenParser { ////////////////////////////////////////////////////////////////////////// // Protected Constants & Fields //
TokenParser.m_sourceFileLabel Field
A label (e.g. file name) to use for the source file for error reporting purposes. |
protected string m_sourceFileLabel;
TokenParser.m_sourceFileReader Field
The stream to parse. |
protected TextReader m_sourceFileReader;
TokenParser.m_lastCharReadFromStream Field
The last character read from m_sourceFileReader. Set to 0 before the first call to |
protected int m_lastCharReadFromStream = 0;
TokenParser.TabWidth Field
Width of a tab stop. |
protected const int TabWidth = 4;
TokenParser.m_columnIndex Field
Zero-based column number, after expansion of tabs. |
protected int m_columnIndex;
TokenParser.m_lineIndex Field
Zero-based line number. |
protected int m_lineIndex;
TokenParser.m_tokenBuffer Field
Buffer to read a token into. |
protected StringBuilder m_tokenBuffer = new StringBuilder(200);
TokenParser.m_char1 Field
The next character in the stream. |
protected int m_char1;
protected int m_char2;
protected int m_char3;
TokenParser.m_tokenQueue Field
Contains zero or more tokens that were "prematurely" parsed and need to be returned from the next call(s) to ParseToken. |
protected Queue<Token> m_tokenQueue = new Queue<Token>(10); //////////////////////////////////////////////////////////////////////// // Public Properties //
TokenParser.CurrentLineNumber Property
Gets the 1-based line number of the next token to parse. |
public int CurrentLineNumber { get { return m_lineIndex + 1; } } ////////////////////////////////////////////////////////////////////////// // Internal Methods //
TokenParser Constructor
Initializes an instance of this class.
Parameters
sourceFileLabel A label (e.g. file name) to use for the source file for error reporting purposes. sourceFileReader The stream onto the source file to parse. |
internal TokenParser(string sourceFileLabel, TextReader sourceFileReader) { m_sourceFileLabel = sourceFileLabel; m_sourceFileReader = sourceFileReader; m_char1 = ReadCharFromStream(); m_char2 = ReadCharFromStream(); m_char3 = ReadCharFromStream(); }
TokenParser.ParseToken Method
Parses one token from a TextReader.
Return Value
One token, or null if we've reached the end of the input stream. |
internal abstract Token ParseToken();
internal abstract Token ParseWord(); ////////////////////////////////////////////////////////////////////////// // Protected Methods //
protected int ReadWhiteSpace() { int blanks = 0; while (true) { if ((m_char1 == ' ') || (m_char1 == '\t')) blanks += Advance(false); else break; } return blanks; }
TokenParser.ParseNewline Method
If m_char1 is positioned at a '\r', parse and return the newline; if '\r' is followed by '\n', the '\n' is parsed too. If m_char1 is positioned on a '\n', it is parsed on its own as a newline. On exit, m_char1 is positioned at the first character after the newline. null is returned if a newline isn't next in the stream.
Parameters
returnToken If true, a newline Token is returned. If false, the newline sequence is parsed and internal state is updated but no Token is returned -- instead, '\n' is appended to m_tokenBuffer. |
protected Token ParseNewline(bool returnToken) { // set <lineNumber> to the 1-based number of the line that this newline // ends (if it is a newline) int lineNumber = CurrentLineNumber; // check if this is a CR, LF, or CRLF sequence if ((m_char1 != '\r') && (m_char1 != '\n')) return null; // parse the CRLF Advance(!returnToken); // return a token, or not, depending on <returnToken> if (returnToken) return new NewlineToken(lineNumber); else return null; }
TokenParser.Advance Method
Advances one character in the input stream. The old value of m_char1 is discarded; the old value of m_char2 is moved into m_char1; and so on; m_char3 is set to a new character from the stream (or -1 if there are no more characters).
Parameters
appendToTokenBuffer If true, the old value of m_char1 is appended to m_tokenBuffer.
Return Value
The number of output character positions. For non-tab characters, 1 is returned. For tab characters, the number of equivalent spaces is returned.
Exceptions
Remarks
Tabs are expanded, so if appendToTokenBuffer is true then multiple spaces may be written to m_tokenBuffer. Also, CRLF sequences are converted to "\n", so it's possible Advance may advance more than one character position in the input stream. |
protected int Advance(bool appendToTokenBuffer) { // keep track of the number of output character positions int result = 0; // increment <m_columnIndex> and optionally append <m_char1> to // <m_tokenBuffer>; expand tabs in the process bool skip2 = false; // true to advance two characters if (m_char1 < 0) { // end of stream -- this is an error throw new EndOfStreamException(); } else if (m_char1 == '\t') { // tab while (true) { m_columnIndex++; result++; if (appendToTokenBuffer) m_tokenBuffer.Append(' '); if ((m_columnIndex % TabWidth) == 0) break; } } else if (m_char1 == '\r') { // CRLF newline m_columnIndex = 0; m_lineIndex++; if (m_char2 == '\n') skip2 = true; // skip past full CRLF if (appendToTokenBuffer) m_tokenBuffer.Append('\n'); result++; } else if (m_char1 == '\n') { // LF-only newline m_columnIndex = 0; m_lineIndex++; if (appendToTokenBuffer) m_tokenBuffer.Append('\n'); result++; } else { // other character m_columnIndex++; result++; if (appendToTokenBuffer) m_tokenBuffer.Append((char) m_char1); } // shift: <m_char1> <-- <m_char2> <-- <m_char3> <-- new character; // if <skip2> was set above, do it twice if (skip2) { m_char1 = m_char3; m_char2 = ReadCharFromStream(); m_char3 = ReadCharFromStream(); } else { m_char1 = m_char2; m_char2 = m_char3; m_char3 = ReadCharFromStream(); } return result; }
TokenParser.Advance2 Method
Advances two characters in the input stream. The old values of m_char1 and m_char2 are discarded; the old value of m_char3 is moved into m_char1; and so on; m_char2 and m_char3 are set to new characters from the stream (or -1 if there are no more characters).
Parameters
appendToTokenBuffer If true, the old values of m_char1 and m_char2 are appended to m_tokenBuffer.
Remarks
Tabs are expanded, so if appendToTokenBuffer is true then multiple spaces may be written to m_tokenBuffer. Also, CRLF sequences are converted to "\n", so it's possible Advance2 may advance more than two character positions in the input stream. |
protected void Advance2(bool appendToTokenBuffer) { Advance(appendToTokenBuffer); Advance(appendToTokenBuffer); }
TokenParser.ReadCharFromStream Method
Reads and returns a character from m_sourceFileReader. Returns -1 if the end of the file has been reached. If the last character of the file isn't a newline, one is added "virtually". |
protected int ReadCharFromStream() { // check if we've already reached the end of the stream if (m_lastCharReadFromStream < 0) return -1; // read the next character; if we reach the end of the stream and the // last character isn't a newline, add one "virtually" -- in fact, we // add a "\r" because a "\r" alone is treated the same as "\r\n" by // all other code in this class int iChar = m_sourceFileReader.Read(); if (iChar < 0) { if (m_lastCharReadFromStream != '\n') { m_lastCharReadFromStream = '\n'; return '\r'; } else return -1; } else { m_lastCharReadFromStream = iChar; return iChar; } }
TokenParser.AppendToToken Method (char)
Appends a character to m_tokenBuffer.
Parameters
ch The character to append.
Remarks
The primary purpose of this method is to avoid casting errors; for example, |
protected void AppendToToken(char ch) { m_tokenBuffer.Append(ch); }
TokenParser.AppendToToken Method (int)
Appends a character to m_tokenBuffer.
Parameters
iChar The character to append.
Remarks
The primary purpose of this method is to avoid casting errors; for example, |
protected void AppendToToken(int iChar) { Debug.Assert(iChar >= 0); m_tokenBuffer.Append((char) iChar); }
TokenParser.IsSign Method
Returns true iff a given character is '+' or '-'.
Parameters
iChar The character to check. |
protected static bool IsSign(int iChar) { return (iChar == '+') || (iChar == '-'); }
TokenParser.IsDigit Method
Returns true iff a given character is a decimal digit.
Parameters
iChar The character to check. |
protected static bool IsDigit(int iChar) { return ((iChar >= '0') && (iChar <= '9')); }
TokenParser.NewParsingException Method
Constructs and returns a new ParsingException referring to the current source file and line number.
Parameters
format A formatting string for an error message to include with the exception. args Formatting arguments for the error message. |
protected ParsingException NewParsingException(string format, params object[] args) { return new ParsingException(m_sourceFileLabel, m_lineIndex + 1, format, args); } }
ParsingException Class
Indicates an error parsing the source code. |
public class ParsingException : Exception { ////////////////////////////////////////////////////////////////////////// // Private Fields //
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
string m_fileLabel;
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
int m_lineNumber;
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
string m_baseMessage;
//////////////////////////////////////////////////////////////////////////
// Public Properties
//
ParsingException.FileLabel Property
A label referring to the source code file being parsed. This is often a path to the file, if such a path is available. |
public string FileLabel { [DebuggerStepThrough] get { return m_fileLabel; } }
ParsingException.LineNumber Property
The one-based line number of the line within the source file that contains the error. |
public int LineNumber { [DebuggerStepThrough] get { return m_lineNumber; } }
public string BaseMessage { [DebuggerStepThrough] get { return m_baseMessage; } }
ParsingException.WarningMessage Property
The error message, including FileLabel and LineNumber and the word "Warning" (or a localized equivalent). |
public string WarningMessage { [DebuggerStepThrough] get { return String.Format(Resources.Warning3, FileLabel, LineNumber, BaseMessage); } }
ParsingException Constructor
Initializes an instance of this class.
Parameters
fileLabel A label referring to the source code file being parsed. lineNumber The one-based line number of the line within the source file that contains the error. format A formatting string for an error message to include with the exception. args Formatting arguments for the error message. |
public ParsingException(string fileLabel, int lineNumber, string format, params object[] args) : base(String.Format("{0}({1}): {2}", fileLabel, lineNumber, String.Format(format, args))) { m_fileLabel = fileLabel; m_lineNumber = lineNumber; m_baseMessage = String.Format(format, args); } } }