2024-03-15 12:00:47 +08:00
|
|
|
|
using System.Text;
|
2024-04-18 16:34:32 +08:00
|
|
|
|
using Canon.Core.Abstractions;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
using Canon.Core.Enums;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
using Canon.Core.Exceptions;
|
2024-03-10 19:48:34 +08:00
|
|
|
|
|
2024-03-15 12:00:47 +08:00
|
|
|
|
namespace Canon.Core.LexicalParser;
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
public class Lexer : ILexer
|
2024-03-10 19:48:34 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
// 记录token
|
|
|
|
|
private SemanticToken? _semanticToken;
|
|
|
|
|
private readonly StringBuilder _tokenBuilder = new();
|
2024-04-24 11:01:45 +08:00
|
|
|
|
private List<SemanticToken> _tokens = [];
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
|
|
|
|
// 状态机
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private StateType _state = StateType.Start;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
private char _ch;
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private bool _finish;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
// 文件读取
|
|
|
|
|
private ISourceReader _reader;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
private uint _line = 1;
|
|
|
|
|
private uint _chPos;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
public IEnumerable<SemanticToken> Tokenize(ISourceReader reader)
|
|
|
|
|
{
|
|
|
|
|
_reader = reader;
|
2024-04-24 11:01:45 +08:00
|
|
|
|
_tokens = [];
|
2024-04-20 11:48:05 +08:00
|
|
|
|
_state = StateType.Start;
|
2024-04-18 16:34:32 +08:00
|
|
|
|
|
|
|
|
|
while (_state != StateType.Done)
|
|
|
|
|
{
|
|
|
|
|
switch (_state)
|
|
|
|
|
{
|
|
|
|
|
case StateType.Start:
|
|
|
|
|
HandleStartState();
|
|
|
|
|
break;
|
|
|
|
|
case StateType.Comment:
|
|
|
|
|
if (_ch == '{')
|
|
|
|
|
{
|
|
|
|
|
HandleCommentStateBig();
|
|
|
|
|
}
|
|
|
|
|
else if (_ch == '*')
|
|
|
|
|
{
|
|
|
|
|
HandleCommentStateSmall();
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
HandleCommentSingleLine();
|
|
|
|
|
}
|
2024-03-10 19:48:34 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
break;
|
|
|
|
|
case StateType.Num:
|
|
|
|
|
HandleNumState();
|
|
|
|
|
break;
|
|
|
|
|
case StateType.Word:
|
|
|
|
|
HandleWordState();
|
|
|
|
|
break;
|
|
|
|
|
case StateType.Delimiter:
|
|
|
|
|
HandleDelimiterState();
|
|
|
|
|
break;
|
|
|
|
|
case StateType.Operator:
|
|
|
|
|
HandleOperatorState();
|
|
|
|
|
break;
|
|
|
|
|
case StateType.BreakPoint:
|
|
|
|
|
while (LexRules.IsBreakPoint(_ch))
|
|
|
|
|
{
|
|
|
|
|
GetChar();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Retract();
|
|
|
|
|
_state = StateType.Start;
|
|
|
|
|
break;
|
|
|
|
|
case StateType.Unknown:
|
|
|
|
|
throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos,
|
|
|
|
|
"Illegal lexeme.");
|
|
|
|
|
case StateType.Done:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_tokens.Add(SemanticToken.End);
|
|
|
|
|
return _tokens;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void HandleStartState()
|
2024-03-11 11:36:39 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
// 初始化
|
|
|
|
|
ResetTokenBuilder();
|
2024-03-10 19:48:34 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
// 读取首个字符
|
|
|
|
|
GetChar();
|
2024-03-10 19:48:34 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (_finish)
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Done;
|
|
|
|
|
return;
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
// 根据首个字符判断可能的情况
|
|
|
|
|
if (_ch == '{') // 以 “{” 开头,为注释
|
|
|
|
|
{
|
|
|
|
|
_state = StateType.Comment;
|
|
|
|
|
}
|
|
|
|
|
else if (_ch == '(')
|
|
|
|
|
{
|
|
|
|
|
char nextChar = PeekNextChar();
|
|
|
|
|
if (nextChar == '*')
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
GetChar();
|
|
|
|
|
_state = StateType.Comment;
|
2024-03-11 11:36:39 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
else
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Delimiter;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
}
|
|
|
|
|
else if (_ch == '/')
|
|
|
|
|
{
|
|
|
|
|
char nextChar = PeekNextChar();
|
|
|
|
|
if (nextChar == '/')
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
GetChar();
|
|
|
|
|
_state = StateType.Comment;
|
2024-03-11 11:36:39 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
else
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Operator;
|
2024-03-11 11:36:39 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
}
|
|
|
|
|
else if (_ch == '.') // 以 “.” 开头,可能是数字或分隔符
|
|
|
|
|
{
|
|
|
|
|
char next = PeekNextChar();
|
|
|
|
|
if (next is >= '0' and <= '9')
|
2024-03-11 11:36:39 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Num;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
_state = StateType.Delimiter;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (LexRules.IsLetter(_ch)) // 以字母开头,为关键字或标识符
|
|
|
|
|
{
|
|
|
|
|
_state = StateType.Word;
|
|
|
|
|
}
|
|
|
|
|
else if (LexRules.IsDigit(_ch) || _ch == '$') // 以数字或 “$” 开头,为数值
|
|
|
|
|
{
|
|
|
|
|
_state = StateType.Num;
|
|
|
|
|
}
|
|
|
|
|
else if (LexRules.IsDelimiter(_ch)) // 为分隔符
|
|
|
|
|
{
|
|
|
|
|
_state = StateType.Delimiter;
|
|
|
|
|
}
|
|
|
|
|
else if (LexRules.IsOperator(_ch)) // 为运算符
|
|
|
|
|
{
|
|
|
|
|
_state = StateType.Operator;
|
|
|
|
|
}
|
|
|
|
|
else if (LexRules.IsBreakPoint(_ch))
|
|
|
|
|
{
|
|
|
|
|
_state = StateType.BreakPoint;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
_state = StateType.Unknown;
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private void HandleCommentStateBig()
|
|
|
|
|
{
|
|
|
|
|
while (_ch != '}')
|
|
|
|
|
{
|
|
|
|
|
GetChar();
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (_finish)
|
|
|
|
|
{
|
|
|
|
|
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
|
|
|
|
|
"The comment is not closed.");
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_state = StateType.Start;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void HandleCommentStateSmall()
|
|
|
|
|
{
|
|
|
|
|
bool commentClosed = false;
|
|
|
|
|
while (!commentClosed)
|
|
|
|
|
{
|
|
|
|
|
GetChar();
|
|
|
|
|
while (_ch != '*')
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
GetChar();
|
|
|
|
|
if (_finish)
|
|
|
|
|
{
|
|
|
|
|
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
|
|
|
|
|
"The comment is not closed.");
|
|
|
|
|
}
|
2024-03-10 19:48:34 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
GetChar();
|
|
|
|
|
if (_finish)
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
|
|
|
|
|
"The comment is not closed.");
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (_ch == ')') commentClosed = true;
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Start;
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private void HandleCommentSingleLine()
|
|
|
|
|
{
|
|
|
|
|
while (_ch != '\n')
|
|
|
|
|
{
|
|
|
|
|
GetChar();
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Start;
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private void HandleWordState()
|
|
|
|
|
{
|
|
|
|
|
while (LexRules.IsDigit(_ch) || LexRules.IsLetter(_ch))
|
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
|
|
|
|
}
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
Retract();
|
|
|
|
|
|
|
|
|
|
string tokenString = GetCurrentTokenString();
|
2024-04-21 17:42:08 +08:00
|
|
|
|
if (LexRules.GetKeywordTypeByKeywprd(tokenString, out KeywordType keywordType))
|
2024-04-18 16:34:32 +08:00
|
|
|
|
{
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(keywordType, tokenString, _line, _chPos);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Identifier, tokenString, _line, _chPos);
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
AddToTokens(_semanticToken);
|
|
|
|
|
_state = StateType.Start;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private void HandleNumState()
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
NumberType numberType = NumberType.Integer;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
// 十六进制
|
|
|
|
|
if (_ch == '$')
|
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
ProcessHex();
|
|
|
|
|
numberType = NumberType.Hex;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
// 非十六进制
|
2024-04-18 16:34:32 +08:00
|
|
|
|
else if (LexRules.IsDigit(_ch) || _ch == '.')
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
|
|
|
|
while (!NumberShouldBreak())
|
|
|
|
|
{
|
|
|
|
|
// 含小数部分
|
|
|
|
|
if (_ch == '.')
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-04 21:25:11 +08:00
|
|
|
|
// 检查是否是符号 “..”
|
|
|
|
|
char next = PeekNextChar();
|
|
|
|
|
if (next == '.')
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
|
|
|
|
Retract();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Delimiter;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
break;
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-04 21:25:11 +08:00
|
|
|
|
// 不是符号 “..”,进入小数点后的判断
|
2024-04-18 16:34:32 +08:00
|
|
|
|
Cat(); // 记录“.”
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-04 21:25:11 +08:00
|
|
|
|
// “.”后不应为空,至少应该有一位小数
|
|
|
|
|
GetChar();
|
|
|
|
|
if (NumberShouldBreak())
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
|
|
|
|
|
"Illegal numbers!");
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-04 21:25:11 +08:00
|
|
|
|
// 读取小数点后的数字
|
|
|
|
|
while (!NumberShouldBreak())
|
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (LexRules.IsDigit(_ch))
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-04 21:25:11 +08:00
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
2024-04-04 21:25:11 +08:00
|
|
|
|
else if (_ch == 'e' || _ch == 'E')
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
ProcessE();
|
2024-04-04 21:25:11 +08:00
|
|
|
|
break;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
else if (NumberShouldBreak())
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-04 21:25:11 +08:00
|
|
|
|
break;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
|
|
|
|
|
"Illegal number.");
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
|
|
|
|
|
numberType = NumberType.Real;
|
|
|
|
|
break;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 不含小数部分,含科学计数法
|
|
|
|
|
if (_ch == 'e' || _ch == 'E')
|
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
ProcessE();
|
|
|
|
|
numberType = NumberType.Real;
|
|
|
|
|
break;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 暂时为整数
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (LexRules.IsDigit(_ch))
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
else if (NumberShouldBreak())
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
numberType = NumberType.Integer;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
break;
|
|
|
|
|
}
|
2024-04-04 21:25:11 +08:00
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(numberType, GetCurrentTokenString(),
|
|
|
|
|
_line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
|
|
|
|
_state = StateType.Start;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void ProcessHex()
|
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
|
|
|
|
|
|
|
|
|
while (!NumberShouldBreak())
|
|
|
|
|
{
|
|
|
|
|
// 假设IsHexDigit方法能够识别十六进制数字
|
|
|
|
|
if (LexRules.IsHexDigit(_ch))
|
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
|
|
|
|
}
|
|
|
|
|
else if (NumberShouldBreak())
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
|
|
|
|
|
"Illegal hex numbers!");
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private void ProcessE()
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (LexRules.IsDigit(_ch) || _ch == '+' || _ch == '-')
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 读取e后的数字
|
|
|
|
|
GetChar();
|
|
|
|
|
while (!NumberShouldBreak())
|
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (LexRules.IsDigit(_ch))
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
|
2024-03-11 11:36:39 +08:00
|
|
|
|
}
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-04 21:25:11 +08:00
|
|
|
|
bool NumberShouldBreak()
|
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (_ch == ' ' || _ch == '\n' || _ch == '\t' || _ch == '\r' || (LexRules.IsDelimiter(_ch) && _ch != '.') ||
|
|
|
|
|
LexRules.IsOperator(_ch) || _finish)
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
|
|
|
|
Retract();
|
|
|
|
|
return true;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
|
|
|
|
return false;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-04 21:25:11 +08:00
|
|
|
|
private bool IsDot()
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-04 21:25:11 +08:00
|
|
|
|
if (_tokens.Count != 0)
|
|
|
|
|
{
|
|
|
|
|
SemanticToken tokenBefore = _tokens.Last();
|
|
|
|
|
if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true;
|
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
|
2024-03-15 12:00:47 +08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private void HandleDelimiterState()
|
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
switch (_ch)
|
|
|
|
|
{
|
|
|
|
|
case '.':
|
|
|
|
|
{
|
|
|
|
|
GetChar();
|
|
|
|
|
if (_ch == '.')
|
|
|
|
|
{
|
|
|
|
|
Cat();
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.DoubleDots, "..", _line, _chPos);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
Retract();
|
|
|
|
|
if (IsDot())
|
|
|
|
|
{
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Dot, ".", _line, _chPos);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Period, ".", _line, _chPos);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '\'':
|
|
|
|
|
case '\"':
|
|
|
|
|
{
|
|
|
|
|
// 重置_token,准备收集字符串内容
|
|
|
|
|
ResetTokenBuilder();
|
|
|
|
|
|
|
|
|
|
GetChar(); // 移动到下一个字符,即字符串的第一个字符
|
|
|
|
|
while (_ch != '\'' && _ch != '\"')
|
|
|
|
|
{
|
|
|
|
|
Cat(); // 收集字符
|
|
|
|
|
GetChar(); // 移动到下一个字符
|
|
|
|
|
if (_ch == '\n' || _finish)
|
|
|
|
|
{
|
|
|
|
|
throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos,
|
|
|
|
|
"The String is not closed.");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Character,
|
|
|
|
|
GetCurrentTokenString(), _line, _chPos);
|
|
|
|
|
|
|
|
|
|
ResetTokenBuilder();
|
|
|
|
|
|
|
|
|
|
if (!(_ch == '\'' || _ch == '\"'))
|
|
|
|
|
{
|
|
|
|
|
throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos,
|
|
|
|
|
"The String is not closed.");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case ',':
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Comma, ",", _line, _chPos);
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
case ':':
|
|
|
|
|
char nextChar = PeekNextChar();
|
|
|
|
|
if (nextChar == '=')
|
|
|
|
|
{
|
|
|
|
|
GetChar();
|
|
|
|
|
Cat();
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.Assign, ":=", _line, _chPos);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Colon, ":", _line, _chPos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
case ';':
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Semicolon, ";", _line, _chPos);
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
case '(':
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.LeftParenthesis, "(", _line, _chPos);
|
|
|
|
|
break;
|
|
|
|
|
case ')':
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.RightParenthesis, ")", _line, _chPos);
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
case '[':
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.LeftSquareBracket, "[", _line, _chPos);
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
case ']':
|
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(DelimiterType.RightSquareBracket, "]", _line, _chPos);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-20 11:48:05 +08:00
|
|
|
|
if (_semanticToken is null)
|
|
|
|
|
{
|
|
|
|
|
throw new InvalidOperationException();
|
|
|
|
|
}
|
|
|
|
|
_tokens.Add(_semanticToken);
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Start;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void HandleOperatorState()
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
|
|
|
|
switch (_ch)
|
|
|
|
|
{
|
|
|
|
|
case '+': // 识别 +
|
|
|
|
|
Cat();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.Plus, "+", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
break;
|
|
|
|
|
case '-': // 识别 -
|
|
|
|
|
Cat();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.Minus, "-", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
break;
|
|
|
|
|
case '*': // 识别 *
|
|
|
|
|
Cat();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.Multiply, "*", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
break;
|
|
|
|
|
case '/': // 识别 /
|
|
|
|
|
Cat();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.Divide, "/", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
break;
|
|
|
|
|
case '=':
|
|
|
|
|
Cat();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.Equal, "=", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
break;
|
|
|
|
|
case '<':
|
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
|
|
|
|
if (_ch == '=')
|
|
|
|
|
{
|
|
|
|
|
// 识别 <=
|
|
|
|
|
Cat();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.LessEqual, "<=", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
else if (_ch == '>')
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
|
|
|
|
// 识别 <>
|
|
|
|
|
Cat();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.NotEqual, ">", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// 识别 <
|
|
|
|
|
Retract();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.Less, "<", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
|
2024-03-15 12:00:47 +08:00
|
|
|
|
break;
|
|
|
|
|
case '>':
|
|
|
|
|
Cat();
|
|
|
|
|
GetChar();
|
|
|
|
|
if (_ch == '=')
|
|
|
|
|
{
|
|
|
|
|
// 识别 >=
|
|
|
|
|
Cat();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.GreaterEqual, ">=", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// 识别 >
|
|
|
|
|
Retract();
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_semanticToken = LexemeFactory.MakeToken(OperatorType.Greater, ">", _line, _chPos);
|
|
|
|
|
AddToTokens(_semanticToken);
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
default:
|
2024-04-18 16:34:32 +08:00
|
|
|
|
throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos, "Illegal lexeme.");
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_state = StateType.Start;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private void AddToTokens(SemanticToken semanticToken)
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_tokens.Add(semanticToken);
|
2024-03-11 11:36:39 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-03-15 12:00:47 +08:00
|
|
|
|
private void Cat()
|
2024-03-11 11:36:39 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_tokenBuilder.Append(_ch); // 使用StringBuilder追加字符
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private string GetCurrentTokenString()
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
return _tokenBuilder.ToString(); // 从StringBuilder获取当前记号的字符串
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private void ResetTokenBuilder()
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_tokenBuilder.Clear(); // 清空StringBuilder以复用
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private char PeekNextChar()
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
// 确认下一个位置是否仍在buffer的范围内
|
|
|
|
|
if (_reader.TryPeekChar(out char? c))
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
return c.Value;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
else
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
return char.MinValue;
|
2024-03-15 12:00:47 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
void GetChar()
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (_finish)
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
return;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_finish = !_reader.MoveNext();
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
if (_finish)
|
2024-03-15 12:00:47 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_ch = char.MinValue;
|
|
|
|
|
return;
|
2024-03-10 19:48:34 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_ch = _reader.Current;
|
|
|
|
|
_line = _reader.Line;
|
|
|
|
|
_chPos = _reader.Pos;
|
|
|
|
|
}
|
2024-03-15 12:00:47 +08:00
|
|
|
|
|
2024-04-18 16:34:32 +08:00
|
|
|
|
void Retract()
|
2024-03-11 11:36:39 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
_reader.Retract();
|
2024-03-10 19:48:34 +08:00
|
|
|
|
}
|
|
|
|
|
}
|