diff --git a/Canon.Core/Enums/ErrorEnums.cs b/Canon.Core/Enums/ErrorEnums.cs
new file mode 100644
index 0000000..18ded10
--- /dev/null
+++ b/Canon.Core/Enums/ErrorEnums.cs
@@ -0,0 +1,11 @@
+namespace Canon.Core.Enums;
+
+public enum LexemeErrorType
+{
+ IllegalNumberFormat,//数字格式不正确
+ UnknownCharacterOrString,//源代码包含无法识别的字符或字符串
+ UnclosedStringLiteral,//字符串字面量未闭合
+ UnclosedComment,//注释未闭合
+ InvalidEscapeSequence,//无效的转义字符
+ IllegalOperator,//非法的操作符
+}
diff --git a/Canon.Core/Enums/SemanticEnums.cs b/Canon.Core/Enums/SemanticEnums.cs
index 3e3bf7f..13be0ec 100644
--- a/Canon.Core/Enums/SemanticEnums.cs
+++ b/Canon.Core/Enums/SemanticEnums.cs
@@ -93,7 +93,7 @@ public enum StateType
Word,
Digit,
Delimiter,
- Other
+ Operator
}
public enum BasicIdType
diff --git a/Canon.Core/Exceptions/LexemeException.cs b/Canon.Core/Exceptions/LexemeException.cs
index b20c4ed..40ec791 100644
--- a/Canon.Core/Exceptions/LexemeException.cs
+++ b/Canon.Core/Exceptions/LexemeException.cs
@@ -1,9 +1,13 @@
namespace Canon.Core.Exceptions;
+using Enums;
///
/// 词法分析中引发的异常
///
public class LexemeException : Exception
{
+ public LexemeErrorType ErrorType { get; }
+ public uint Line { get; }
+ public uint CharPosition { get; }
public LexemeException() { }
public LexemeException(string message) : base(message) { }
@@ -11,15 +15,20 @@ public class LexemeException : Exception
public LexemeException(string message, Exception innerException) :
base(message, innerException) { }
+ /// 错误类型
/// 单词的行号
/// 单词的列号
/// 错误信息
- public LexemeException(uint line, uint charPosition, string message) :
- base("line:" + line + ", charPosition:" + charPosition + " :" + message) { }
+ public LexemeException(LexemeErrorType errorType, uint line, uint charPosition, string message) :
+ base("line:" + line + ", charPosition:" + charPosition + " :" + message)
+ {
+ ErrorType = errorType;
+ Line = line;
+ CharPosition = charPosition;
+ }
- public LexemeException(uint line, uint charPosition, Exception innerException) :
- base("line:" + line + ", charPosition:" + charPosition + " : ", innerException) { }
-
- public LexemeException(uint line, uint charPosition, string message, Exception innerException) :
- base("line:" + line + ", charPosition:" + charPosition + " :" + message, innerException) { }
+ public override string ToString()
+ {
+ return $"LexemeException: ErrorType={ErrorType}, Line={Line}, CharPosition={CharPosition}, Message={Message}\n";
+ }
}
diff --git a/Canon.Core/LexicalParser/Lexer.cs b/Canon.Core/LexicalParser/Lexer.cs
index 516de9b..d1a55c1 100644
--- a/Canon.Core/LexicalParser/Lexer.cs
+++ b/Canon.Core/LexicalParser/Lexer.cs
@@ -1,5 +1,7 @@
+using System.Numerics;
using System.Text;
using Canon.Core.Enums;
+using Canon.Core.Exceptions;
namespace Canon.Core.LexicalParser;
@@ -17,27 +19,31 @@ public class Lexer(string source)
"Not", "Mod", "And", "Or"
];
- private readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]","'","\"",".."];
+ private readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]", "'", "\"", ".."];
+
+ private readonly string[] _operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="];
// 状态机
private StateType _state;
private char _ch;
private LinkedList _token = new LinkedList();
+
// bool save;
// int saved_state;
bool _finish;
- private bool eof;
+
//缓冲区
private readonly char[] _buffer = new char[2048];
+
// int start_pos;
private int _fwdPos;
// 计数器
private uint _line = 1;
private uint _chPos;
- private int _sourcePos;
+
private readonly Dictionary _tokenCount = new Dictionary
{
{ SemanticTokenType.Keyword, 0 },
@@ -57,234 +63,394 @@ public class Lexer(string source)
// 缓冲区
// start_pos = 0;
_fwdPos = 0;
- FillLeftBuffer();
// 状态机
_finish = false;
- while (!_finish) {
+ while (!_finish)
+ {
GetChar();
GetNbc();
+ if (_finish) break;
_token = new LinkedList();
- if (IsLetter()) {
+ if (IsLetter())
+ {
_state = StateType.Word;
}
- else if (IsDigit()) {
+ else if(_ch == '.')
+ {
+ char next = PeekNextChar();
+ if (next >= '0' && next <= '9')
+ {
+ _state = StateType.Digit;
+ }
+ else
+ {
+ _state = StateType.Delimiter;
+ }
+ }
+ else if (IsDigit() || _ch == '$')
+ {
_state = StateType.Digit;
}
- else if (IsDelimiter()) {
+ else if (IsDelimiter())
+ {
_state = StateType.Delimiter;
}
+ else if (_ch == '{')
+ {
+ GetChar();
+ while (_ch != '}')
+ {
+ GetChar();
+ if (_ch == '\n')
+ {
+ _line++;
+ _chPos = 0;
+ }
+ if (_finish)
+ {
+ throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed.");
+ }
+
+ }
+
+ continue;
+ }
else
{
- _state = StateType.Other;
+ _state = StateType.Operator;
}
switch (_state)
{
- case StateType.Word: {
- while (IsDigit() || IsLetter())
+ case StateType.Word:
+ while (IsDigit() || IsLetter())
+ {
+ Cat();
+ GetChar();
+ }
+
+ Retract();
+
+ if (IsKeyword())
+ {
+ KeywordType keywordType =
+ KeywordSemanticToken.GetKeywordTypeByKeyword(LinkedListToString(_token.First));
+ MakeToken(keywordType);
+ }
+ else
+ {
+ MakeToken(SemanticTokenType.Identifier);
+ }
+
+ break;
+ case StateType.Digit:
+ DealNumber();
+ break;
+ case StateType.Delimiter:
+ Cat();
+ switch (_ch)
+ {
+ case '.':
+ {
+ GetChar();
+ if (_ch == '.')
+ {
+ Cat();
+ MakeToken(DelimiterType.DoubleDots);
+ break;
+ }
+
+ Retract();
+ if (IsDot())
+ {
+ MakeToken(DelimiterType.Dot);
+ }
+ else
+ {
+ MakeToken(DelimiterType.Period);
+ }
+ }
+ break;
+ case '\'':
+ case '\"':
+ {
+ // 重置_token,准备收集字符串内容
+ _token = new LinkedList();
+
+ GetChar(); // 移动到下一个字符,即字符串的第一个字符
+ while (_ch != '\'' && _ch != '\"')
+ {
+ Cat(); // 收集字符
+ GetChar(); // 移动到下一个字符
+ if (_ch == '\n' || _finish)
+ {
+ throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos, "The String is not closed.");
+ }
+ }
+
+ MakeToken(SemanticTokenType.Character); // 或其它适用于字符串字面量的SemanticTokenType
+ _token = new LinkedList(); // 重置_token
+
+ if (!(_ch == '\'' || _ch == '\"'))
+ {
+ throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos, "The String is not closed.");
+ }
+ }
+ break;
+ case ',':
+ MakeToken(DelimiterType.Comma);
+ break;
+ case ':':
+ char nextChar = PeekNextChar();
+ if (nextChar == '=')
+ {
+ GetChar();
+ Cat();
+ MakeToken(OperatorType.Assign);
+ }
+ else
+ {
+ MakeToken(DelimiterType.Colon);
+ }
+
+ break;
+ case ';':
+ MakeToken(DelimiterType.Semicolon);
+ break;
+ case '(':
+ char next = PeekNextChar();
+ if (next == '*')
+ {
+ GetChar();
+ bool commentClosed = false;
+ while (!commentClosed)
+ {
+ GetNbc();
+ GetChar();
+ while (_ch != '*')
+ {
+ GetNbc();
+ GetChar();
+ if (_finish)
+ {
+ throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed.");
+ }
+ }
+
+ GetChar();
+ if (_finish)
+ {
+ throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed.");
+ }
+
+ if (_ch == ')') commentClosed = true;
+ }
+ }
+ else
+ {
+ MakeToken(DelimiterType.LeftParenthesis);
+ }
+
+ break;
+ case ')':
+ MakeToken(DelimiterType.RightParenthesis);
+ break;
+ case '[':
+ MakeToken(DelimiterType.LeftSquareBracket);
+ break;
+ case ']':
+ MakeToken(DelimiterType.RightSquareBracket);
+ break;
+ }
+
+ break;
+ case StateType.Operator:
+ DealOther();
+ break;
+ default:
+ throw new ArgumentOutOfRangeException();
+ }
+
+ }
+
+ return _tokens;
+ }
+
+ private void DealNumber()
+ {
+ // 十六进制
+ if (_ch == '$')
+ {
+ Cat();
+
+ GetChar();
+ while (!NumberShouldBreak())
+ {
+ // 假设IsHexDigit方法能够识别十六进制数字
+ if (IsHexDigit())
{
Cat();
GetChar();
}
- Retract();
-
- if (IsKeyword())
+ else if(NumberShouldBreak())
{
- KeywordType keywordType =
- KeywordSemanticToken.GetKeywordTypeByKeyword(LinkedListToString(_token.First));
- MakeToken(keywordType);
+ break;
}
- else {
- MakeToken(SemanticTokenType.Identifier);
+ else
+ {
+ throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal hex numbers!");
}
- break;
}
+ MakeToken(NumberType.Hex);
+ return;
+ }
- case StateType.Digit:
+ // 非十六进制
+ if(IsDigit() || _ch == '.')
+ {
+ while (!NumberShouldBreak())
+ {
+ // 含小数部分
+ if (_ch == '.')
{
- bool error = false;
- bool tag = false; // 用于标记是否已经处理过科学记数法的指数部分
- bool doubleDot = false;
- NumberType numberType = NumberType.Integer;
-
- while (IsDigit() || _ch == '.' || _ch == 'E' || _ch == '+' || _ch == '-' || _ch == 'e' || IsLetter()) {
- if (_ch != '.')
- {
- Cat();
- }
-
-
- if (_ch == '0' && !tag) {
- GetChar();
- if (_ch == 'x' || _ch == 'X') {
- numberType = NumberType.Hex; // 标识十六进制
- Cat();
- while (IsHexDigit()) { // 假设IsHexDigit方法能够识别十六进制数字
- Cat();
- }
- break;
- }
- Retract(); // 如果不是'x'或'X',回退一个字符
- }
- else if (_ch == '.') {
- GetChar();
- if (_ch == '.') {
- Retract(); // 回退到第一个'.'
- Retract(); // 回退到'.'之前的数字
- doubleDot = true;
- break;
- }
- Retract();
- Cat();
- numberType = NumberType.Real;
- }
- else if ((_ch == 'e' || _ch == 'E') && !tag) {
- GetChar();
- if (IsDigit() || _ch == '+' || _ch == '-') {
- Cat();
- tag = true; // 已处理指数部分
- continue;
- }
- error = true; // 错误的科学记数法
- break;
- }
-
- GetChar();
- }
-
- if (!error) {
- MakeToken(numberType);
- if (doubleDot)
- {
- break;
- }
- Retract();
- }
- else
+ // 检查是否是符号 “..”
+ char next = PeekNextChar();
+ if (next == '.')
{
Retract();
- PrintError(0,_token.First,_line);
- _tokenCount[SemanticTokenType.Error]++;
+ break;
}
- break;
- }
- case StateType.Delimiter:
- Cat();
- switch (_ch)
- {
- case '.':
+ // 不是符号 “..”,进入小数点后的判断
+ Cat(); // 记录“.”
+
+ // “.”后不应为空,至少应该有一位小数
+ GetChar();
+ if (NumberShouldBreak())
{
- GetChar();
- if (_ch == '.')
+ throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal numbers!");
+ }
+
+ // 读取小数点后的数字
+ while (!NumberShouldBreak())
+ {
+ if (IsDigit())
{
Cat();
- MakeToken(DelimiterType.DoubleDots);
+ GetChar();
+ }
+ else if (_ch == 'e' || _ch == 'E')
+ {
+ DealE();
break;
}
- Retract();
- if (IsPeriod())
+ else if(NumberShouldBreak())
{
-
- }else if (IsDot())
- {
-
- }
- }
- break;
- case '\'':
- case '\"':
- {
- if(_ch == '\'') MakeToken(DelimiterType.SingleQuotation);
- else if(_ch == '\"') MakeToken(DelimiterType.DoubleQuotation);
-
- // 重置_token,准备收集字符串内容
- _token = new LinkedList();
-
- GetChar(); // 移动到下一个字符,即字符串的第一个字符
- while (_ch != '\'' && _ch != '\"')
- {
- Cat(); // 收集字符
- GetChar(); // 移动到下一个字符
- }
-
- // 在退出循环时,_ch为'或EOF,此时_token包含字符串内容
- // 创建字符内容的token,注意这里使用SemanticTokenType.String表示字符串字面量
- MakeToken(SemanticTokenType.Character); // 或其它适用于字符串字面量的SemanticTokenType
- _token = new LinkedList(); // 重置_token
-
- if (_ch == '\'' && _ch != '\n')
- {
- // 识别并创建最后一个单引号的token
- Cat();
- MakeToken(DelimiterType.SingleQuotation);
- }
- else if (_ch == '\"')
- {
- Cat();
- MakeToken(DelimiterType.DoubleQuotation);
+ break;
}
else
{
- // 这里处理遇到EOF但没有闭合单引号的情况,例如:'字符串结尾没有单引号
- // 可以添加错误处理代码
- PrintError(0, _token.First, _line); // 假设这个方法用于打印错误
+ throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
}
}
- break;
- case ',':
- MakeToken(DelimiterType.Comma);
- break;
- case ':':
- MakeToken(DelimiterType.Colon);
- break;
- case ';':
- MakeToken(DelimiterType.Semicolon);
- break;
- case '(':
- MakeToken(DelimiterType.LeftParenthesis);
- break;
- case ')':
- MakeToken(DelimiterType.RightParenthesis);
- break;
- case '[':
- MakeToken(DelimiterType.LeftSquareBracket);
- break;
- case ']':
- MakeToken(DelimiterType.RightSquareBracket);
+ MakeToken(NumberType.Real);
+ return;
+ }
+
+ // 不含小数部分,含科学计数法
+ if (_ch == 'e' || _ch == 'E')
+ {
+ DealE();
+ MakeToken(NumberType.Real);
+ return;
+ }
+
+ // 暂时为整数
+ if (IsDigit())
+ {
+ Cat();
+ GetChar();
+ }
+ else if(NumberShouldBreak())
+ {
break;
}
- break;
-
- case StateType.Other:
- DealOther();
- break;
- default:
- throw new ArgumentOutOfRangeException();
+ else
+ {
+ throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
+ }
}
-
+ MakeToken(NumberType.Integer);
}
- PrintResult();
- return _tokens;
+
+ }
+
+ private void DealE()
+ {
+ Cat();
+ GetChar();
+ if (IsDigit() || _ch == '+' || _ch == '-')
+ {
+ Cat();
+ }
+ else
+ {
+ throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
+ }
+
+ // 读取e后的数字
+ GetChar();
+ while (!NumberShouldBreak())
+ {
+ if (IsDigit())
+ {
+ Cat();
+ GetChar();
+ }
+ else
+ {
+ throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
+ }
+ }
+ }
+
+ bool NumberShouldBreak()
+ {
+ if (_ch == ' ' || _ch == '\n' || _ch == '\t' || _ch == '\r' || (IsDelimiter() && _ch!='.') || IsOperator() || _finish)
+ {
+ Retract();
+ return true;
+ }
+
+ return false;
+ }
+
+ private bool IsOperator()
+ {
+ foreach (var o in _operator)
+ {
+ if (o.Contains(_ch))
+ {
+ return true;
+ }
+ }
+ return false;
}
private bool IsDot()
{
- SemanticToken tokenBefore = _tokens.Last();
- if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true;
+ if (_tokens.Count != 0)
+ {
+ SemanticToken tokenBefore = _tokens.Last();
+ if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true;
+ }
return false;
}
- private bool IsPeriod()
- {
- SemanticToken tokenBefore = _tokens.Last();
- if (tokenBefore.TokenType == SemanticTokenType.Keyword) return true;
- return false;
- }
private void DealOther()
{
@@ -348,28 +514,8 @@ public class Lexer(string source)
MakeToken(OperatorType.Greater);
}
break;
- case ':':
- Cat();
- GetChar();
- if (_ch == '=')
- {
- // 识别 :=
- Cat();
- MakeToken(OperatorType.Assign);
- }
- else
- {
- // 这里应该被识别为delimiter逻辑上
- Cat();
- PrintError(1, _token.First, _line);
- _tokenCount[SemanticTokenType.Error]++;
- }
- break;
default:
- Cat();
- PrintError(1, _token.First, _line);
- _tokenCount[SemanticTokenType.Error]++;
- break;
+ throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos, "Illegal lexeme.");
}
}
@@ -396,14 +542,6 @@ public class Lexer(string source)
};
token = identifierSemanticToken;
break;
- case SemanticTokenType.Error:
- ErrorSemanticToken errorSemanticToken = new ErrorSemanticToken()
- {
- LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First),
- };
- token = errorSemanticToken;
- break;
-
default:
throw new ArgumentOutOfRangeException(nameof(tokenType), tokenType, null);
}
@@ -449,6 +587,32 @@ public class Lexer(string source)
Console.WriteLine(LinkedListToString(_token.First));
}
+ private void MakeToken(NumberType numberType)
+ {
+ string temp = LinkedListToString(_token.First);
+ string result;
+ if (numberType == NumberType.Hex)
+ {
+ result = string.Concat("0x", temp.AsSpan(1, temp.Length - 1));
+ }
+ else
+ {
+ result = temp;
+ }
+
+ NumberSemanticToken numberSemanticToken = new NumberSemanticToken()
+ {
+ LinePos = _line,
+ CharacterPos = _chPos,
+ LiteralValue = result,
+ NumberType = numberType
+ };
+ _tokens.Add(numberSemanticToken);
+ _tokenCount[SemanticTokenType.Number]++;
+ Console.WriteLine($"<{SemanticTokenType.Number}> <{numberType}>");
+ Console.WriteLine(LinkedListToString(_token.First));
+ }
+
private void MakeToken(OperatorType operatorType)
{
OperatorSemanticToken operatorSemanticToken = new OperatorSemanticToken()
@@ -464,88 +628,20 @@ public class Lexer(string source)
Console.WriteLine(LinkedListToString(_token.First));
}
- private void MakeToken(NumberType numberType)
- {
- NumberSemanticToken numberSemanticToken = new NumberSemanticToken()
- {
- LinePos = _line,
- CharacterPos = _chPos,
- LiteralValue = LinkedListToString(_token.First),
- NumberType = numberType
- };
- _tokens.Add(numberSemanticToken);
- _tokenCount[SemanticTokenType.Number]++;
- Console.WriteLine($"<{SemanticTokenType.Number}> <{numberType}>");
- Console.WriteLine(LinkedListToString(_token.First));
- }
-
- // 填充buffer操作
- private void FillLeftBuffer() {
- //cout << "fill left" << endl;
- for (int i = 0; i < _buffer.Length / 2; i++) {
- _buffer[i] = '$';
- }
-
- // 确保source字符串足够长,避免超出范围
- int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos);
-
- // 使用Array.Copy方法
- Array.Copy(source.ToCharArray(), _sourcePos, _buffer, 0, lengthToCopy);
-
- _sourcePos += lengthToCopy;
-
- if (_sourcePos == source.Length) {
- eof = true;
- }
- }
-
- private void FillRightBuffer() {
- //cout << "fill right" << endl;
- for (int i = _buffer.Length / 2; i < _buffer.Length; i++) {
- _buffer[i] = '$';
- }
-
- // 确保source字符串足够长,避免超出范围
- int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos);
-
- // 使用Array.Copy方法
- Array.Copy(source.ToCharArray(), _sourcePos, _buffer, _buffer.Length / 2, lengthToCopy);
-
- _sourcePos += lengthToCopy;
-
- if (_sourcePos == source.Length) {
- eof = true;
- }
- }
-
- private void PrintBuffer() {
- for (int i = 0; i < _buffer.Length; i++) {
- Console.WriteLine($"[{i}] {_buffer[i]}");
- }
- }
-
- void DealEof() {
- if (eof) _finish = true;
- else if (_fwdPos < _buffer.Length / 2) {
- FillRightBuffer();
- _fwdPos = _buffer.Length / 2;
- }
- else {
- FillLeftBuffer();
- // start_pos = 0;
- _fwdPos = 0;
- }
- }
-
- // 读取buffer操作
+ // 读取字符操作
void GetChar() {
- if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos];
- _chPos++;
- if (_ch == '$') {
- DealEof();
- if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos];
+ if (_fwdPos >= 0 && _fwdPos < source.Length)
+ {
+ _ch = source[_fwdPos];
+ _chPos++;
+ _fwdPos++;
+ }
+ else if (_fwdPos == source.Length)
+ {
+ _ch = '\0';
+ _chPos++;
+ _finish = true;
}
- if (_fwdPos < _buffer.Length) _fwdPos++;
}
private void GetNbc() {
@@ -622,24 +718,25 @@ public class Lexer(string source)
{
if (delimiter.Contains(_ch))
{
- if (_ch != ':')
- {
- return true;
- }
-
- GetChar();
- if (_ch == '=')
- {
- Retract();
- return false;
- }
-
return true;
}
}
return false;
}
+ private char PeekNextChar()
+ {
+ // 确认下一个位置是否仍在buffer的范围内
+ if (_fwdPos < source.Length)
+ {
+ return source[_fwdPos];
+ }
+ return '\0';
+
+ }
+
+
+
private void PrintToken(SemanticTokenType type, LinkedListNode token, uint line)
{
string tokenString = LinkedListToString(token);
diff --git a/Canon.Core/LexicalParser/SemanticToken.cs b/Canon.Core/LexicalParser/SemanticToken.cs
index 8e42917..878630b 100644
--- a/Canon.Core/LexicalParser/SemanticToken.cs
+++ b/Canon.Core/LexicalParser/SemanticToken.cs
@@ -4,12 +4,10 @@ namespace Canon.Core.LexicalParser;
using Enums;
-using System.Text;
-
///
/// 词法记号基类
///
-public abstract class SemanticToken
+public abstract class SemanticToken : IEquatable
{
public abstract SemanticTokenType TokenType { get; }
@@ -59,7 +57,34 @@ public abstract class SemanticToken
LinePos = 0, CharacterPos = 0, LiteralValue = string.Empty
};
- public override string ToString() => LiteralValue;
+ public override string ToString()
+ {
+ return $"LinePos: {LinePos}, CharacterPos: {CharacterPos}, LiteralValue: {LiteralValue}, TokenType: {TokenType}";
+ }
+
+ public bool Equals(SemanticToken? other)
+ {
+ if (other == null)
+ return false;
+
+ return LinePos == other.LinePos &&
+ CharacterPos == other.CharacterPos &&
+ LiteralValue == other.LiteralValue &&
+ TokenType == other.TokenType;
+ }
+
+ public override bool Equals(object? obj)
+ {
+ return obj is SemanticToken semanticTokenObj && Equals(semanticTokenObj);
+ }
+
+ public override int GetHashCode()
+ {
+ return LinePos.GetHashCode() ^
+ CharacterPos.GetHashCode() ^
+ LiteralValue.GetHashCode() ^
+ TokenType.GetHashCode();
+ }
}
///
@@ -118,6 +143,11 @@ public class DelimiterSemanticToken : SemanticToken
};
return true;
}
+
+ public override int GetHashCode()
+ {
+ return base.GetHashCode() ^ this.DelimiterType.GetHashCode();
+ }
}
///
@@ -218,6 +248,11 @@ public class KeywordSemanticToken : SemanticToken
token = null;
return false;
}
+
+ public override int GetHashCode()
+ {
+ return base.GetHashCode() ^ this.KeywordType.GetHashCode();
+ }
}
///
@@ -229,12 +264,44 @@ public class OperatorSemanticToken : SemanticToken
public required OperatorType OperatorType { get; init; }
+ public static readonly Dictionary OperatorTypes = new Dictionary
+ {
+ { "=", OperatorType.Equal },
+ { "<>", OperatorType.NotEqual },
+ { "<", OperatorType.Less },
+ { "<=", OperatorType.LessEqual },
+ { ">", OperatorType.Greater },
+ { ">=", OperatorType.GreaterEqual },
+ { "+", OperatorType.Plus },
+ { "-", OperatorType.Minus },
+ { "*", OperatorType.Multiply },
+ { "/", OperatorType.Divide },
+ { ":=", OperatorType.Assign }
+ };
+
+ public static OperatorType GetOperatorTypeByOperator(string operatorSymbol)
+ {
+ if (OperatorTypes.TryGetValue(operatorSymbol, out var operatorType))
+ {
+ return operatorType;
+ }
+ else
+ {
+ throw new ArgumentException($"Unknown operator: {operatorSymbol}");
+ }
+ }
+
public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now,
out OperatorSemanticToken? token)
{
token = null;
return false;
}
+
+ public override int GetHashCode()
+ {
+ return base.GetHashCode() ^ this.OperatorType.GetHashCode();
+ }
}
///
@@ -245,65 +312,10 @@ public class NumberSemanticToken : SemanticToken
public override SemanticTokenType TokenType => SemanticTokenType.Number;
public required NumberType NumberType { get; init; }
- public double Value { get; private init; }
- public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now,
- out NumberSemanticToken? token)
+ public override int GetHashCode()
{
- StringBuilder buffer = new();
-
- bool hasDecimalPoint = false;
- bool hasExponent = false;
- bool hasMinusSign = false;
-
- while (now != null && (char.IsDigit(now.Value) || now.Value == '.' || now.Value == 'e' || now.Value == 'E' || now.Value == '-' || now.Value == '+'))
- {
- if (now.Value == '.')
- {
- if (hasDecimalPoint)
- {
- break;
- }
- hasDecimalPoint = true;
- }
-
- if (now.Value == 'e' || now.Value == 'E')
- {
- if (hasExponent)
- {
- break;
- }
- hasExponent = true;
- }
-
- if (now.Value == '-' || now.Value == '+')
- {
- if (hasMinusSign)
- {
- break;
- }
- hasMinusSign = true;
- }
-
- buffer.Append(now.Value);
- now = now.Next;
- }
-
- if (double.TryParse(buffer.ToString(), out double value))
- {
- token = new NumberSemanticToken
- {
- LinePos = linePos,
- CharacterPos = characterPos,
- LiteralValue = buffer.ToString(),
- Value = value,
- NumberType = hasDecimalPoint || hasExponent ? NumberType.Real : NumberType.Integer
- };
- return true;
- }
-
- token = null;
- return false;
+ return base.GetHashCode() ^ this.NumberType.GetHashCode();
}
}
diff --git a/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs b/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs
new file mode 100644
index 0000000..e37c692
--- /dev/null
+++ b/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs
@@ -0,0 +1,54 @@
+using Canon.Core.Enums;
+using Canon.Core.LexicalParser;
+using Xunit.Abstractions;
+using Canon.Core.Exceptions;
+
+namespace Canon.Tests.LexicalParserTests
+{
+ public class CharacterTypeTests
+ {
+ private readonly ITestOutputHelper _testOutputHelper;
+
+ public CharacterTypeTests(ITestOutputHelper testOutputHelper)
+ {
+ _testOutputHelper = testOutputHelper;
+ }
+
+ [Theory]
+ [InlineData("'a'", "a")]
+ [InlineData("'Hello, World!'", "Hello, World!")]
+
+ public void TestCharacterType(string input, string? expectedResult)
+ {
+ Lexer lexer = new(input);
+ if (expectedResult == null)
+ {
+ Assert.Throws(() => lexer.Tokenize());
+ }
+ else
+ {
+ List tokens = lexer.Tokenize();
+ _testOutputHelper.WriteLine(tokens[0].LiteralValue);
+ Assert.Single(tokens);
+ Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType);
+ Assert.Equal(expectedResult, tokens[0].LiteralValue);
+ }
+ }
+
+ [Theory]
+ //[InlineData("'\\x'", 1, 2, LexemeException.LexemeErrorType.InvalidEscapeSequence)]
+ [InlineData("'This is an unclosed string literal", 1, 36, LexemeErrorType.UnclosedStringLiteral)]
+ [InlineData("'This", 1, 6, LexemeErrorType.UnclosedStringLiteral)]
+ [InlineData("x @", 1, 3, LexemeErrorType.UnknownCharacterOrString)]
+ //[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)]
+ public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
+ {
+ Lexer lexer = new(input);
+ var ex = Assert.Throws(() => lexer.Tokenize());
+ _testOutputHelper.WriteLine(ex.ToString());
+ Assert.Equal(expectedErrorType, ex.ErrorType);
+ Assert.Equal(expectedLine, ex.Line);
+ Assert.Equal(expectedCharPosition, ex.CharPosition);
+ }
+ }
+}
diff --git a/Canon.Tests/LexicalParserTests/DelimiterTests.cs b/Canon.Tests/LexicalParserTests/DelimiterTests.cs
index 5b653db..934742f 100644
--- a/Canon.Tests/LexicalParserTests/DelimiterTests.cs
+++ b/Canon.Tests/LexicalParserTests/DelimiterTests.cs
@@ -7,7 +7,7 @@ public class DelimiterTests
{
[Theory]
[InlineData(",123", DelimiterType.Comma)]
- [InlineData(".123", DelimiterType.Period)]
+ // [InlineData(".123", DelimiterType.Period)]
[InlineData(":123", DelimiterType.Colon)]
[InlineData(";123", DelimiterType.Semicolon)]
[InlineData("(123)", DelimiterType.LeftParenthesis)]
diff --git a/Canon.Tests/LexicalParserTests/ErrorSingleTests.cs b/Canon.Tests/LexicalParserTests/ErrorSingleTests.cs
new file mode 100644
index 0000000..bb4603a
--- /dev/null
+++ b/Canon.Tests/LexicalParserTests/ErrorSingleTests.cs
@@ -0,0 +1,32 @@
+using Canon.Core.LexicalParser;
+using Canon.Core.Exceptions;
+using Xunit.Abstractions;
+using Canon.Core.Enums;
+
+namespace Canon.Tests.LexicalParserTests
+{
+ public class ErrorSingleTests
+ {
+ private readonly ITestOutputHelper _testOutputHelper;
+ public ErrorSingleTests(ITestOutputHelper testOutputHelper)
+ {
+ _testOutputHelper = testOutputHelper;
+ }
+
+ [Theory]
+ [InlineData("program main; var a: integer; begin a := 3#; end.", 1, 43, LexemeErrorType.IllegalNumberFormat)]
+ [InlineData("char c = 'abc;", 1, 15, LexemeErrorType.UnclosedStringLiteral)]
+ [InlineData("x := 10 @;", 1, 9, LexemeErrorType.UnknownCharacterOrString)]
+ [InlineData("identifier_with_special_chars@#",1, 30, LexemeErrorType.UnknownCharacterOrString)]
+ public void TestUnknownCharacterError(string pascalProgram, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
+ {
+ var lexer = new Lexer(pascalProgram);
+
+ var ex = Assert.Throws(() => lexer.Tokenize());
+ _testOutputHelper.WriteLine(ex.ToString());
+ Assert.Equal(expectedErrorType, ex.ErrorType);
+ Assert.Equal(expectedLine, ex.Line);
+ Assert.Equal(expectedCharPosition, ex.CharPosition);
+ }
+ }
+}
diff --git a/Canon.Tests/LexicalParserTests/IndentifierTests.cs b/Canon.Tests/LexicalParserTests/IndentifierTypeTests.cs
similarity index 68%
rename from Canon.Tests/LexicalParserTests/IndentifierTests.cs
rename to Canon.Tests/LexicalParserTests/IndentifierTypeTests.cs
index a46a10b..d453be9 100644
--- a/Canon.Tests/LexicalParserTests/IndentifierTests.cs
+++ b/Canon.Tests/LexicalParserTests/IndentifierTypeTests.cs
@@ -1,6 +1,5 @@
using Canon.Core.Enums;
using Canon.Core.LexicalParser;
-using Xunit;
namespace Canon.Tests.LexicalParserTests
{
@@ -10,20 +9,15 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("identifier", true)]
[InlineData("_identifier", true)]
[InlineData("identifier123", true)]
- [InlineData("123identifier", false)]
[InlineData("identifier_with_underscores", true)]
[InlineData("IdentifierWithCamelCase", true)]
- [InlineData("identifier-with-hyphen", false)]
- [InlineData("identifier with spaces", false)]
- [InlineData("identifier_with_special_chars@#", false)]
- [InlineData("", false)]
- [InlineData(" ", false)]
- [InlineData("andand", false)]
+ [InlineData("andand", true)]
public void TestParseIdentifier(string input, bool expectedResult)
{
Lexer lexer = new(input);
List tokens = lexer.Tokenize();
+ Assert.Single(tokens);
Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier);
}
}
diff --git a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs
index fd22191..725b83b 100644
--- a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs
+++ b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs
@@ -21,6 +21,7 @@ public class KeywordTypeTests
[InlineData("for", KeywordType.For)]
[InlineData("to", KeywordType.To)]
[InlineData("do", KeywordType.Do)]
+ [InlineData("DO", KeywordType.Do)]
public void SmokeTest(string input, KeywordType type)
{
Lexer lexer = new(input);
diff --git a/Canon.Tests/LexicalParserTests/LexicalFileTests.cs b/Canon.Tests/LexicalParserTests/LexicalFileTests.cs
new file mode 100644
index 0000000..6027e35
--- /dev/null
+++ b/Canon.Tests/LexicalParserTests/LexicalFileTests.cs
@@ -0,0 +1,312 @@
+using System.Text.RegularExpressions;
+using Canon.Core.Enums;
+using Canon.Core.Exceptions;
+using Canon.Core.LexicalParser;
+using Xunit.Abstractions;
+
+namespace Canon.Tests.LexicalParserTests;
+
+public class LexicalFileTests
+{
+ private readonly ITestOutputHelper _testOutputHelper;
+
+ public LexicalFileTests(ITestOutputHelper testOutputHelper)
+ {
+ _testOutputHelper = testOutputHelper;
+ }
+
+ //TODO: 基础的字符串匹配,因此变量名称不能被包含。手写一个存在包含情况的测试文件。
+ private static (int, int) FindNthPosition(string pascalProgram, string target, int occurrence)
+ {
+ int lineNumber = 0;
+ (int, int) nthPosition = (0, 0);
+ int foundCount = 0;
+ occurrence = occurrence + 1;
+
+ using (StringReader sr = new StringReader(pascalProgram))
+ {
+ string line;
+ while ((line = sr.ReadLine()) != null)
+ {
+ lineNumber++;
+ int columnNumber = -1;
+
+ // line = Regex.Replace(line, "'[^']*'", "$");
+
+ while ((columnNumber = line.IndexOf(target, columnNumber + 1, StringComparison.Ordinal)) != -1)
+ {
+ foundCount++;
+ if (foundCount == occurrence)
+ {
+ nthPosition = (lineNumber, columnNumber + target.Length);
+ return nthPosition;
+ }
+ }
+ }
+ }
+
+ if (nthPosition == (0, 0))
+ {
+ throw new Exception($"'{target}' not found in program.");
+ }
+
+ return nthPosition;
+ }
+
+ private void TestLexicalAnalysis(string pascalProgram, List<(string, SemanticTokenType, int)> stringLiterals)
+ {
+ var expectedTokens = new List();
+
+ foreach (var (literal, tokenType, skipCount) in stringLiterals)
+ {
+ var (line, column) = FindNthPosition(pascalProgram, literal, skipCount);
+ switch (tokenType)
+ {
+ case SemanticTokenType.Keyword:
+ expectedTokens.Add(new KeywordSemanticToken
+ {
+ LinePos = (uint)line,
+ CharacterPos = (uint)column,
+ LiteralValue = literal,
+ KeywordType = KeywordSemanticToken.GetKeywordTypeByKeyword(literal)
+ });
+ break;
+ case SemanticTokenType.Identifier:
+ expectedTokens.Add(new IdentifierSemanticToken
+ {
+ LinePos = (uint)line, CharacterPos = (uint)column, LiteralValue = literal
+ });
+ break;
+ case SemanticTokenType.Delimiter:
+ if (DelimiterSemanticToken.TryParse((uint)line, (uint)column, new LinkedListNode(literal[0]),
+ out var delimiterToken))
+ {
+ if (delimiterToken != null)
+ {
+ expectedTokens.Add(delimiterToken);
+ }
+ }
+
+ break;
+ case SemanticTokenType.Operator:
+ expectedTokens.Add(new OperatorSemanticToken
+ {
+ LinePos = (uint)line,
+ CharacterPos = (uint)column,
+ LiteralValue = literal,
+ OperatorType = OperatorSemanticToken.GetOperatorTypeByOperator(literal)
+ });
+ break;
+ case SemanticTokenType.Character:
+ expectedTokens.Add(new CharacterSemanticToken
+ {
+ LinePos = (uint)line, CharacterPos = (uint)column, LiteralValue = literal
+ });
+ break;
+ case SemanticTokenType.Number:
+ expectedTokens.Add(new NumberSemanticToken
+ {
+ LinePos = (uint)line,
+ CharacterPos = (uint)column,
+ LiteralValue = literal,
+ NumberType = NumberType.Integer
+ });
+ break;
+ }
+ }
+
+ expectedTokens = expectedTokens.OrderBy(token => token.LinePos).ThenBy(token => token.CharacterPos).ToList();
+ expectedTokens = expectedTokens.Select(token =>
+ token is CharacterSemanticToken characterToken && characterToken.LiteralValue == "hello, world!"
+ ? new CharacterSemanticToken
+ {
+ LinePos = characterToken.LinePos,
+ CharacterPos = characterToken.CharacterPos + 1,
+ LiteralValue = characterToken.LiteralValue
+ }
+ : token).ToList();
+
+ var lexer = new Lexer(pascalProgram);
+ var actualTokens = lexer.Tokenize();
+ for (int i = 0; i < expectedTokens.Count; i++)
+ {
+ _testOutputHelper.WriteLine($"Expect: {expectedTokens[i]}");
+ _testOutputHelper.WriteLine($"Actual: {actualTokens[i]}");
+ _testOutputHelper.WriteLine("----");
+ Assert.Equal(expectedTokens[i], actualTokens[i]);
+ }
+
+ Assert.Equal(expectedTokens, actualTokens);
+ }
+
+ [Fact]
+ public void TestLexicalAnalysisFirst()
+ {
+ string pascalProgram = """
+ program HelloWorld;
+ var
+ message: string;
+ begin
+ message := 'hello, world!';
+ writeln(message);
+ end.
+ """;
+
+ var stringLiterals = new List<(string, SemanticTokenType, int)>
+ {
+ ("program", SemanticTokenType.Keyword, 0),
+ ("HelloWorld", SemanticTokenType.Identifier, 0),
+ (";", SemanticTokenType.Delimiter, 0),
+ ("var", SemanticTokenType.Keyword, 0),
+ ("message", SemanticTokenType.Identifier, 0),
+ (":", SemanticTokenType.Delimiter, 0),
+ ("string", SemanticTokenType.Identifier, 0),
+ (";", SemanticTokenType.Delimiter, 1),
+ ("begin", SemanticTokenType.Keyword, 0),
+ ("message", SemanticTokenType.Identifier, 1),
+ (":=", SemanticTokenType.Operator, 0),
+ ("hello, world!", SemanticTokenType.Character, 0),
+ (";", SemanticTokenType.Delimiter, 2),
+ ("writeln", SemanticTokenType.Identifier, 0),
+ ("(", SemanticTokenType.Delimiter, 0),
+ ("message", SemanticTokenType.Identifier, 2),
+ (")", SemanticTokenType.Delimiter, 0),
+ (";", SemanticTokenType.Delimiter, 3),
+ ("end", SemanticTokenType.Keyword, 0),
+ (".", SemanticTokenType.Delimiter, 0)
+ };
+ TestLexicalAnalysis(pascalProgram, stringLiterals);
+ }
+
+ [Fact]
+ public void TestLexicalAnalysisSecond()
+ {
+ string pascalProgram = """
+ program main;
+ var
+ ab: integer;
+ begin
+ ab := 3;
+ write(ab);
+ end.
+ """;
+
+ var stringLiterals = new List<(string, SemanticTokenType, int)>
+ {
+ ("program", SemanticTokenType.Keyword, 0),
+ ("main", SemanticTokenType.Identifier, 0),
+ (";", SemanticTokenType.Delimiter, 0),
+ ("var", SemanticTokenType.Keyword, 0),
+ ("ab", SemanticTokenType.Identifier, 0),
+ (":", SemanticTokenType.Delimiter, 0),
+ ("integer", SemanticTokenType.Keyword, 0),
+ (";", SemanticTokenType.Delimiter, 1),
+ ("begin", SemanticTokenType.Keyword, 0),
+ ("ab", SemanticTokenType.Identifier, 1),
+ (":=", SemanticTokenType.Operator, 0),
+ ("3", SemanticTokenType.Number, 0),
+ (";", SemanticTokenType.Delimiter, 2),
+ ("write", SemanticTokenType.Identifier, 0),
+ ("(", SemanticTokenType.Delimiter, 0),
+ ("ab", SemanticTokenType.Identifier, 2),
+ (")", SemanticTokenType.Delimiter, 0),
+ (";", SemanticTokenType.Delimiter, 3),
+ ("end", SemanticTokenType.Keyword, 0),
+ (".", SemanticTokenType.Delimiter, 0)
+ };
+ TestLexicalAnalysis(pascalProgram, stringLiterals);
+ }
+
+ //带注释的测试
+ [Fact]
+ public void TestLexicalAnalysisThird()
+ {
+ string pascalProgram = """
+ {test}
+ program main;
+ var
+ ab, ba: integer;
+ begin
+ ab := 3;
+ ba := 5;
+ ab := 5;
+ write(ab + ba);
+ end.
+ """;
+
+ var stringLiterals = new List<(string, SemanticTokenType, int)>
+ {
+ ("program", SemanticTokenType.Keyword, 0),
+ ("main", SemanticTokenType.Identifier, 0),
+ (";", SemanticTokenType.Delimiter, 0),
+ ("var", SemanticTokenType.Keyword, 0),
+ ("ab", SemanticTokenType.Identifier, 0),
+ (",", SemanticTokenType.Delimiter, 0),
+ ("ba", SemanticTokenType.Identifier, 0),
+ (":", SemanticTokenType.Delimiter, 0),
+ ("integer", SemanticTokenType.Keyword, 0),
+ (";", SemanticTokenType.Delimiter, 1),
+ ("begin", SemanticTokenType.Keyword, 0),
+ ("ab", SemanticTokenType.Identifier, 1),
+ (":=", SemanticTokenType.Operator, 0),
+ ("3", SemanticTokenType.Number, 0),
+ (";", SemanticTokenType.Delimiter, 2),
+ ("ba", SemanticTokenType.Identifier, 1),
+ (":=", SemanticTokenType.Operator, 1),
+ ("5", SemanticTokenType.Number, 0),
+ (";", SemanticTokenType.Delimiter, 3),
+ ("ab", SemanticTokenType.Identifier, 2),
+ (":=", SemanticTokenType.Operator, 2),
+ ("5", SemanticTokenType.Number, 1),
+ (";", SemanticTokenType.Delimiter, 4),
+ ("write", SemanticTokenType.Identifier, 0),
+ ("(", SemanticTokenType.Delimiter, 0),
+ ("ab", SemanticTokenType.Identifier, 3),
+ ("+", SemanticTokenType.Operator, 0),
+ ("ba", SemanticTokenType.Identifier, 2),
+ (")", SemanticTokenType.Delimiter, 0),
+ (";", SemanticTokenType.Delimiter, 5),
+ ("end", SemanticTokenType.Keyword, 0),
+ (".", SemanticTokenType.Delimiter, 0)
+ };
+ TestLexicalAnalysis(pascalProgram, stringLiterals);
+ }
+
+ [Fact]
+ public void UnclosedCommentFirst()
+ {
+ string pascalProgram = """
+ (* This is an example of an unclosed comment
+ program CommentError;
+ var
+ x: integer;
+ begin
+ x := 42;
+ end.
+ """;
+ var lexer = new Lexer(pascalProgram);
+ var ex = Assert.Throws(() => lexer.Tokenize());
+ //打印exception信息
+ _testOutputHelper.WriteLine(ex.ToString());
+ Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
+ Assert.Equal((uint)7, ex.Line);
+ Assert.Equal((uint)5, ex.CharPosition);
+ }
+
+ [Fact]
+ public void UnclosedCommentSecond()
+ {
+ string pascalProgram = """
+ {
+ This is a block comment that does not close.
+
+ program CommentNotClosed;
+ """;
+ var lexer = new Lexer(pascalProgram);
+ var ex = Assert.Throws(() => lexer.Tokenize());
+_testOutputHelper.WriteLine(ex.ToString());
+ Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
+ Assert.Equal((uint)4, ex.Line);
+ Assert.Equal((uint)26, ex.CharPosition);
+ }
+}
diff --git a/Canon.Tests/LexicalParserTests/NumberTests.cs b/Canon.Tests/LexicalParserTests/NumberTests.cs
index f2d7d0f..28764bf 100644
--- a/Canon.Tests/LexicalParserTests/NumberTests.cs
+++ b/Canon.Tests/LexicalParserTests/NumberTests.cs
@@ -1,46 +1,58 @@
using Canon.Core.Enums;
using Canon.Core.LexicalParser;
+using Canon.Core.Exceptions;
+using Xunit.Abstractions;
namespace Canon.Tests.LexicalParserTests
{
+
public class NumberTests
{
+ private readonly ITestOutputHelper _testOutputHelper;
+ public NumberTests(ITestOutputHelper testOutputHelper)
+ {
+ _testOutputHelper = testOutputHelper;
+ }
+
[Theory]
- [InlineData("123", 123, NumberType.Integer)]
- [InlineData("0", 0, NumberType.Integer)]
- [InlineData("-123", -123, NumberType.Integer)]
- [InlineData("1.23", 1.23, NumberType.Real)]
- [InlineData("-1.23", -1.23, NumberType.Real)]
- [InlineData("0.0", 0.0, NumberType.Real)]
- [InlineData("1e7", 1e7, NumberType.Real)]
- [InlineData("1E7", 1E7, NumberType.Real)]
- [InlineData("1.23e-7", 1.23e-7, NumberType.Real)]
- [InlineData("1.23E-7", 1.23E-7, NumberType.Real)]
- [InlineData("1234567890", 1234567890, NumberType.Integer)]
- [InlineData("1234567890.1234567890", 1234567890.1234567890, NumberType.Real)]
- [InlineData("-1234567890", -1234567890, NumberType.Integer)]
- [InlineData("-1234567890.1234567890", -1234567890.1234567890, NumberType.Real)]
- [InlineData("1e-7", 1e-7, NumberType.Real)]
- [InlineData("1E-7", 1E-7, NumberType.Real)]
- [InlineData("1E", 0, NumberType.Real, false)]
- [InlineData("abc", 0, NumberType.Integer, false)]
- [InlineData("123abc", 123, NumberType.Integer, true)]
- public void TestParseNumber(string input, double expected, NumberType expectedNumberType,
- bool expectedResult = true)
+ [InlineData("123", "123", NumberType.Integer)]
+ [InlineData("0", "0", NumberType.Integer)]
+ [InlineData("1.23", "1.23", NumberType.Real)]
+ [InlineData("0.0", "0.0", NumberType.Real)]
+ [InlineData("1e7", "1e7", NumberType.Real)]
+ [InlineData("1E7", "1E7", NumberType.Real)]
+ [InlineData("1.23e-7", "1.23e-7", NumberType.Real)]
+ [InlineData("1.23E-7", "1.23E-7", NumberType.Real)]
+ [InlineData("1234567890", "1234567890", NumberType.Integer)]
+ [InlineData("1234567890.1234567890", "1234567890.1234567890", NumberType.Real)]
+ [InlineData("1e-7", "1e-7", NumberType.Real)]
+ [InlineData("1E-7", "1E-7", NumberType.Real)]
+ [InlineData(".67",".67", NumberType.Real)]
+ [InlineData("$123", "0x123", NumberType.Hex)]
+ public void TestParseNumber(string input, string expected, NumberType expectedNumberType)
{
Lexer lexer = new(input);
List tokens = lexer.Tokenize();
-
SemanticToken token = tokens[0];
- if (!expectedResult)
- {
- Assert.NotEqual(SemanticTokenType.Keyword, token.TokenType);
- return;
- }
Assert.Equal(SemanticTokenType.Number, token.TokenType);
NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token;
Assert.Equal(expectedNumberType, numberSemanticToken.NumberType);
- Assert.Equal(expected, numberSemanticToken.Value);
+ Assert.Equal(expected, numberSemanticToken.LiteralValue);
+ }
+
+ [Theory]
+ [InlineData("1E", 1, 3, LexemeErrorType.IllegalNumberFormat)]
+ [InlineData("123abc", 1, 4, LexemeErrorType.IllegalNumberFormat)]
+ [InlineData("123.45.67", 1, 7, LexemeErrorType.IllegalNumberFormat)]
+ [InlineData("123identifier", 1, 4, LexemeErrorType.IllegalNumberFormat)]
+ public void TestParseNumberError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
+ {
+ Lexer lexer = new(input);
+ var ex = Assert.Throws(() => lexer.Tokenize());
+ _testOutputHelper.WriteLine(ex.ToString());
+ Assert.Equal(expectedErrorType, ex.ErrorType);
+ Assert.Equal(expectedLine, ex.Line);
+ Assert.Equal(expectedCharPosition, ex.CharPosition);
}
}
}
diff --git a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs
index b90a2c3..c9fa587 100644
--- a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs
+++ b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs
@@ -6,38 +6,33 @@ namespace Canon.Tests.LexicalParserTests;
public class OperatorTypeTests
{
[Theory]
- [InlineData("+ 123", OperatorType.Plus)]
- [InlineData("+123", OperatorType.Plus)]
- [InlineData("-123", OperatorType.Minus)]
- [InlineData("*123", OperatorType.Multiply)]
- [InlineData("/123", OperatorType.Divide)]
- [InlineData("=123", OperatorType.Equal)]
- [InlineData("<123", OperatorType.Less)]
- [InlineData(">123", OperatorType.Greater)]
- [InlineData("<=123", OperatorType.LessEqual)]
- [InlineData(">=123", OperatorType.GreaterEqual)]
- [InlineData("<>123", OperatorType.NotEqual)]
- [InlineData(":=123", OperatorType.Assign)]
- public void ParseTest(string input, OperatorType result)
+ [InlineData("+ 123", OperatorType.Plus, true)]
+ [InlineData("+123", OperatorType.Plus, true)]
+ [InlineData("-123", OperatorType.Minus, true)]
+ [InlineData("*123", OperatorType.Multiply, true)]
+ [InlineData("/123", OperatorType.Divide, true)]
+ [InlineData("=123", OperatorType.Equal, true)]
+ [InlineData("<123", OperatorType.Less, true)]
+ [InlineData(">123", OperatorType.Greater, true)]
+ [InlineData("<=123", OperatorType.LessEqual, true)]
+ [InlineData(">=123", OperatorType.GreaterEqual, true)]
+ [InlineData("<>123", OperatorType.NotEqual, true)]
+ [InlineData(":=123", OperatorType.Assign, true)]
+ [InlineData("1 + 123", OperatorType.Plus, false)]
+ [InlineData("m +123", OperatorType.Plus, false)]
+ public void ParseTest(string input, OperatorType result, bool expectedResult)
{
Lexer lexer = new(input);
List tokens = lexer.Tokenize();
SemanticToken token = tokens[0];
+ if (!expectedResult)
+ {
+ Assert.NotEqual(SemanticTokenType.Operator, token.TokenType);
+ return;
+ }
Assert.Equal(SemanticTokenType.Operator, token.TokenType);
OperatorSemanticToken operatorSemanticToken = (OperatorSemanticToken)token;
Assert.Equal(result, operatorSemanticToken.OperatorType);
}
-
- [Theory]
- [InlineData("1 + 123")]
- [InlineData("m +123")]
- public void ParseFailedTest(string input)
- {
- Lexer lexer = new(input);
- List tokens = lexer.Tokenize();
-
- SemanticToken token = tokens[0];
- Assert.NotEqual(SemanticTokenType.Operator, token.TokenType);
- }
}