parent
a95987b3ce
commit
4353fb0c01
|
@ -1,74 +1,76 @@
|
||||||
namespace Canon.Core.LexicalParser;
|
using Canon.Core.Enums;
|
||||||
|
|
||||||
|
namespace Canon.Core.LexicalParser;
|
||||||
|
|
||||||
public static class LexRules
|
public static class LexRules
|
||||||
{
|
{
|
||||||
// 保留关键字
|
// 保留关键字
|
||||||
private static readonly string[] _keywords =
|
private static readonly Dictionary<string, KeywordType> s_keywordTypes =
|
||||||
[
|
new(StringComparer.OrdinalIgnoreCase)
|
||||||
"Program", "Const", "Var", "Procedure",
|
{
|
||||||
"Function", "Begin", "End", "Array",
|
{ "program", KeywordType.Program },
|
||||||
"Of", "If", "Then", "Else",
|
{ "const", KeywordType.Const },
|
||||||
"For", "To", "Do", "Integer",
|
{ "var", KeywordType.Var },
|
||||||
"Real", "Boolean", "Character", "Divide",
|
{ "procedure", KeywordType.Procedure },
|
||||||
"Not", "Mod", "And", "Or"
|
{ "function", KeywordType.Function },
|
||||||
];
|
{ "begin", KeywordType.Begin },
|
||||||
|
{ "end", KeywordType.End },
|
||||||
|
{ "array", KeywordType.Array },
|
||||||
|
{ "of", KeywordType.Of },
|
||||||
|
{ "if", KeywordType.If },
|
||||||
|
{ "then", KeywordType.Then },
|
||||||
|
{ "else", KeywordType.Else },
|
||||||
|
{ "for", KeywordType.For },
|
||||||
|
{ "to", KeywordType.To },
|
||||||
|
{ "do", KeywordType.Do },
|
||||||
|
{ "integer", KeywordType.Integer },
|
||||||
|
{ "real", KeywordType.Real },
|
||||||
|
{ "boolean", KeywordType.Boolean },
|
||||||
|
{ "char", KeywordType.Character },
|
||||||
|
{ "div", KeywordType.Divide }, // 注意: Pascal 使用 'div' 而不是 '/'
|
||||||
|
{ "not", KeywordType.Not },
|
||||||
|
{ "mod", KeywordType.Mod },
|
||||||
|
{ "and", KeywordType.And },
|
||||||
|
{ "or", KeywordType.Or }
|
||||||
|
};
|
||||||
|
|
||||||
private static readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]", "'", "\"", ".."];
|
public static bool GetKeywordTypeByKeywprd(string keyword, out KeywordType type)
|
||||||
|
=> s_keywordTypes.TryGetValue(keyword, out type);
|
||||||
|
|
||||||
private static readonly string[] _operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="];
|
|
||||||
|
private static readonly HashSet<char> s_delimiter = [';', ',', ':', '.', '(', ')', '[', ']', '\'', '"'];
|
||||||
|
|
||||||
|
private static readonly HashSet<string> s_operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="];
|
||||||
|
|
||||||
// 判断字符
|
// 判断字符
|
||||||
public static bool IsDigit(char _ch) {
|
public static bool IsDigit(char ch)
|
||||||
if (_ch >= '0' && _ch <= '9') return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static bool IsHexDigit(char _ch)
|
|
||||||
{
|
{
|
||||||
if ((_ch >= '0' && _ch <= '9') || (_ch<= 'F' && _ch >= 'A')) return true;
|
if (ch is >= '0' and <= '9') return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool IsLetter(char _ch) {
|
public static bool IsHexDigit(char ch)
|
||||||
if ((_ch >= 'A' && _ch <= 'Z') || (_ch >= 'a' && _ch <= 'z' || _ch == '_')) {
|
{
|
||||||
|
if (ch is >= '0' and <= '9' || ch is <= 'F' and >= 'A') return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static bool IsLetter(char ch)
|
||||||
|
{
|
||||||
|
if (ch is >= 'A' and <= 'Z' || (ch is >= 'a' and <= 'z' || ch == '_'))
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool IsKeyword(string tokenString)
|
|
||||||
{
|
|
||||||
|
|
||||||
foreach (var t in _keywords)
|
|
||||||
{
|
|
||||||
if (string.Equals(tokenString, t, StringComparison.OrdinalIgnoreCase)) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static bool IsDelimiter(char ch)
|
public static bool IsDelimiter(char ch)
|
||||||
{
|
=> s_delimiter.Contains(ch);
|
||||||
foreach (var delimiter in _delimiter)
|
|
||||||
{
|
|
||||||
if (delimiter.Contains(ch))
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static bool IsOperator(char ch)
|
public static bool IsOperator(char ch)
|
||||||
{
|
{
|
||||||
foreach (var o in _operator)
|
return s_operator.Any(op => op.Contains(ch));
|
||||||
{
|
|
||||||
if (o.Contains(ch))
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool IsBreakPoint(char ch)
|
public static bool IsBreakPoint(char ch)
|
||||||
|
|
|
@ -231,10 +231,8 @@ public class Lexer : ILexer
|
||||||
Retract();
|
Retract();
|
||||||
|
|
||||||
string tokenString = GetCurrentTokenString();
|
string tokenString = GetCurrentTokenString();
|
||||||
if (LexRules.IsKeyword(tokenString))
|
if (LexRules.GetKeywordTypeByKeywprd(tokenString, out KeywordType keywordType))
|
||||||
{
|
{
|
||||||
KeywordType keywordType =
|
|
||||||
KeywordSemanticToken.GetKeywordTypeByKeyword(GetCurrentTokenString());
|
|
||||||
|
|
||||||
_semanticToken = LexemeFactory.MakeToken(keywordType, tokenString, _line, _chPos);
|
_semanticToken = LexemeFactory.MakeToken(keywordType, tokenString, _line, _chPos);
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,39 +105,6 @@ public class DelimiterSemanticToken : SemanticToken
|
||||||
|
|
||||||
public required DelimiterType DelimiterType { get; init; }
|
public required DelimiterType DelimiterType { get; init; }
|
||||||
|
|
||||||
public static bool TryParse(uint linePos, uint characterPos, LinkedListNode<char> now,
|
|
||||||
out DelimiterSemanticToken? token)
|
|
||||||
{
|
|
||||||
Dictionary<char, DelimiterType> delimiterMap = new()
|
|
||||||
{
|
|
||||||
{ ',', DelimiterType.Comma },
|
|
||||||
{ '.', DelimiterType.Period },
|
|
||||||
{ ':', DelimiterType.Colon },
|
|
||||||
{ ';', DelimiterType.Semicolon },
|
|
||||||
{ '(', DelimiterType.LeftParenthesis },
|
|
||||||
{ ')', DelimiterType.RightParenthesis },
|
|
||||||
{ '[', DelimiterType.LeftSquareBracket },
|
|
||||||
{ ']', DelimiterType.RightSquareBracket },
|
|
||||||
{ '\'', DelimiterType.SingleQuotation },
|
|
||||||
{ '\"', DelimiterType.DoubleQuotation }
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!delimiterMap.TryGetValue(now.Value, out DelimiterType value))
|
|
||||||
{
|
|
||||||
token = null;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
token = new DelimiterSemanticToken
|
|
||||||
{
|
|
||||||
LinePos = linePos,
|
|
||||||
CharacterPos = characterPos,
|
|
||||||
LiteralValue = new string([now.Value]),
|
|
||||||
DelimiterType = value
|
|
||||||
};
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public override int GetHashCode()
|
public override int GetHashCode()
|
||||||
{
|
{
|
||||||
return base.GetHashCode() ^ DelimiterType.GetHashCode();
|
return base.GetHashCode() ^ DelimiterType.GetHashCode();
|
||||||
|
@ -153,50 +120,9 @@ public class KeywordSemanticToken : SemanticToken
|
||||||
|
|
||||||
public required KeywordType KeywordType { get; init; }
|
public required KeywordType KeywordType { get; init; }
|
||||||
|
|
||||||
public static readonly Dictionary<string, KeywordType> KeywordTypes =
|
|
||||||
new Dictionary<string, KeywordType>(StringComparer.OrdinalIgnoreCase)
|
|
||||||
{
|
|
||||||
{ "program", KeywordType.Program },
|
|
||||||
{ "const", KeywordType.Const },
|
|
||||||
{ "var", KeywordType.Var },
|
|
||||||
{ "procedure", KeywordType.Procedure },
|
|
||||||
{ "function", KeywordType.Function },
|
|
||||||
{ "begin", KeywordType.Begin },
|
|
||||||
{ "end", KeywordType.End },
|
|
||||||
{ "array", KeywordType.Array },
|
|
||||||
{ "of", KeywordType.Of },
|
|
||||||
{ "if", KeywordType.If },
|
|
||||||
{ "then", KeywordType.Then },
|
|
||||||
{ "else", KeywordType.Else },
|
|
||||||
{ "for", KeywordType.For },
|
|
||||||
{ "to", KeywordType.To },
|
|
||||||
{ "do", KeywordType.Do },
|
|
||||||
{ "integer", KeywordType.Integer },
|
|
||||||
{ "real", KeywordType.Real },
|
|
||||||
{ "boolean", KeywordType.Boolean },
|
|
||||||
{ "character", KeywordType.Character },
|
|
||||||
{ "div", KeywordType.Divide }, // 注意: Pascal 使用 'div' 而不是 '/'
|
|
||||||
{ "not", KeywordType.Not },
|
|
||||||
{ "mod", KeywordType.Mod },
|
|
||||||
{ "and", KeywordType.And },
|
|
||||||
{ "or", KeywordType.Or }
|
|
||||||
};
|
|
||||||
|
|
||||||
public static KeywordType GetKeywordTypeByKeyword(string keyword)
|
|
||||||
{
|
|
||||||
if (KeywordTypes.TryGetValue(keyword, out var keywordType))
|
|
||||||
{
|
|
||||||
return keywordType;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw new ArgumentException($"Unknown keyword: {keyword}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public override int GetHashCode()
|
public override int GetHashCode()
|
||||||
{
|
{
|
||||||
return base.GetHashCode() ^ this.KeywordType.GetHashCode();
|
return base.GetHashCode() ^ KeywordType.GetHashCode();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,33 +135,6 @@ public class OperatorSemanticToken : SemanticToken
|
||||||
|
|
||||||
public required OperatorType OperatorType { get; init; }
|
public required OperatorType OperatorType { get; init; }
|
||||||
|
|
||||||
public static readonly Dictionary<string, OperatorType> OperatorTypes = new Dictionary<string, OperatorType>
|
|
||||||
{
|
|
||||||
{ "=", OperatorType.Equal },
|
|
||||||
{ "<>", OperatorType.NotEqual },
|
|
||||||
{ "<", OperatorType.Less },
|
|
||||||
{ "<=", OperatorType.LessEqual },
|
|
||||||
{ ">", OperatorType.Greater },
|
|
||||||
{ ">=", OperatorType.GreaterEqual },
|
|
||||||
{ "+", OperatorType.Plus },
|
|
||||||
{ "-", OperatorType.Minus },
|
|
||||||
{ "*", OperatorType.Multiply },
|
|
||||||
{ "/", OperatorType.Divide },
|
|
||||||
{ ":=", OperatorType.Assign }
|
|
||||||
};
|
|
||||||
|
|
||||||
public static OperatorType GetOperatorTypeByOperator(string operatorSymbol)
|
|
||||||
{
|
|
||||||
if (OperatorTypes.TryGetValue(operatorSymbol, out var operatorType))
|
|
||||||
{
|
|
||||||
return operatorType;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw new ArgumentException($"Unknown operator: {operatorSymbol}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public override int GetHashCode()
|
public override int GetHashCode()
|
||||||
{
|
{
|
||||||
return base.GetHashCode() ^ OperatorType.GetHashCode();
|
return base.GetHashCode() ^ OperatorType.GetHashCode();
|
||||||
|
@ -270,6 +169,9 @@ public class IdentifierSemanticToken : SemanticToken
|
||||||
public string IdentifierName => LiteralValue.ToLower();
|
public string IdentifierName => LiteralValue.ToLower();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 终结符记号
|
||||||
|
/// </summary>
|
||||||
public class EndSemanticToken : SemanticToken
|
public class EndSemanticToken : SemanticToken
|
||||||
{
|
{
|
||||||
public override SemanticTokenType TokenType => SemanticTokenType.End;
|
public override SemanticTokenType TokenType => SemanticTokenType.End;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user