Canon/Canon.Tests/LexicalParserTests/LexicalFileTests.cs
Huaps c4189fd1b2 lexical-parser (#15)
add: 词法分析器剩下数字、标识符的细节处理以及错误处理

Co-authored-by: duqoo <92306417+duqoo@users.noreply.github.com>
Reviewed-on: PostGuard/Canon#15
Co-authored-by: Huaps <1183155719@qq.com>
Co-committed-by: Huaps <1183155719@qq.com>
2024-04-04 21:25:11 +08:00

313 lines
12 KiB
C#

using System.Text.RegularExpressions;
using Canon.Core.Enums;
using Canon.Core.Exceptions;
using Canon.Core.LexicalParser;
using Xunit.Abstractions;
namespace Canon.Tests.LexicalParserTests;
public class LexicalFileTests
{
private readonly ITestOutputHelper _testOutputHelper;
public LexicalFileTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
}
//TODO: 基础的字符串匹配,因此变量名称不能被包含。手写一个存在包含情况的测试文件。
private static (int, int) FindNthPosition(string pascalProgram, string target, int occurrence)
{
int lineNumber = 0;
(int, int) nthPosition = (0, 0);
int foundCount = 0;
occurrence = occurrence + 1;
using (StringReader sr = new StringReader(pascalProgram))
{
string line;
while ((line = sr.ReadLine()) != null)
{
lineNumber++;
int columnNumber = -1;
// line = Regex.Replace(line, "'[^']*'", "$");
while ((columnNumber = line.IndexOf(target, columnNumber + 1, StringComparison.Ordinal)) != -1)
{
foundCount++;
if (foundCount == occurrence)
{
nthPosition = (lineNumber, columnNumber + target.Length);
return nthPosition;
}
}
}
}
if (nthPosition == (0, 0))
{
throw new Exception($"'{target}' not found in program.");
}
return nthPosition;
}
private void TestLexicalAnalysis(string pascalProgram, List<(string, SemanticTokenType, int)> stringLiterals)
{
var expectedTokens = new List<SemanticToken>();
foreach (var (literal, tokenType, skipCount) in stringLiterals)
{
var (line, column) = FindNthPosition(pascalProgram, literal, skipCount);
switch (tokenType)
{
case SemanticTokenType.Keyword:
expectedTokens.Add(new KeywordSemanticToken
{
LinePos = (uint)line,
CharacterPos = (uint)column,
LiteralValue = literal,
KeywordType = KeywordSemanticToken.GetKeywordTypeByKeyword(literal)
});
break;
case SemanticTokenType.Identifier:
expectedTokens.Add(new IdentifierSemanticToken
{
LinePos = (uint)line, CharacterPos = (uint)column, LiteralValue = literal
});
break;
case SemanticTokenType.Delimiter:
if (DelimiterSemanticToken.TryParse((uint)line, (uint)column, new LinkedListNode<char>(literal[0]),
out var delimiterToken))
{
if (delimiterToken != null)
{
expectedTokens.Add(delimiterToken);
}
}
break;
case SemanticTokenType.Operator:
expectedTokens.Add(new OperatorSemanticToken
{
LinePos = (uint)line,
CharacterPos = (uint)column,
LiteralValue = literal,
OperatorType = OperatorSemanticToken.GetOperatorTypeByOperator(literal)
});
break;
case SemanticTokenType.Character:
expectedTokens.Add(new CharacterSemanticToken
{
LinePos = (uint)line, CharacterPos = (uint)column, LiteralValue = literal
});
break;
case SemanticTokenType.Number:
expectedTokens.Add(new NumberSemanticToken
{
LinePos = (uint)line,
CharacterPos = (uint)column,
LiteralValue = literal,
NumberType = NumberType.Integer
});
break;
}
}
expectedTokens = expectedTokens.OrderBy(token => token.LinePos).ThenBy(token => token.CharacterPos).ToList();
expectedTokens = expectedTokens.Select(token =>
token is CharacterSemanticToken characterToken && characterToken.LiteralValue == "hello, world!"
? new CharacterSemanticToken
{
LinePos = characterToken.LinePos,
CharacterPos = characterToken.CharacterPos + 1,
LiteralValue = characterToken.LiteralValue
}
: token).ToList();
var lexer = new Lexer(pascalProgram);
var actualTokens = lexer.Tokenize();
for (int i = 0; i < expectedTokens.Count; i++)
{
_testOutputHelper.WriteLine($"Expect: {expectedTokens[i]}");
_testOutputHelper.WriteLine($"Actual: {actualTokens[i]}");
_testOutputHelper.WriteLine("----");
Assert.Equal(expectedTokens[i], actualTokens[i]);
}
Assert.Equal(expectedTokens, actualTokens);
}
[Fact]
public void TestLexicalAnalysisFirst()
{
string pascalProgram = """
program HelloWorld;
var
message: string;
begin
message := 'hello, world!';
writeln(message);
end.
""";
var stringLiterals = new List<(string, SemanticTokenType, int)>
{
("program", SemanticTokenType.Keyword, 0),
("HelloWorld", SemanticTokenType.Identifier, 0),
(";", SemanticTokenType.Delimiter, 0),
("var", SemanticTokenType.Keyword, 0),
("message", SemanticTokenType.Identifier, 0),
(":", SemanticTokenType.Delimiter, 0),
("string", SemanticTokenType.Identifier, 0),
(";", SemanticTokenType.Delimiter, 1),
("begin", SemanticTokenType.Keyword, 0),
("message", SemanticTokenType.Identifier, 1),
(":=", SemanticTokenType.Operator, 0),
("hello, world!", SemanticTokenType.Character, 0),
(";", SemanticTokenType.Delimiter, 2),
("writeln", SemanticTokenType.Identifier, 0),
("(", SemanticTokenType.Delimiter, 0),
("message", SemanticTokenType.Identifier, 2),
(")", SemanticTokenType.Delimiter, 0),
(";", SemanticTokenType.Delimiter, 3),
("end", SemanticTokenType.Keyword, 0),
(".", SemanticTokenType.Delimiter, 0)
};
TestLexicalAnalysis(pascalProgram, stringLiterals);
}
[Fact]
public void TestLexicalAnalysisSecond()
{
string pascalProgram = """
program main;
var
ab: integer;
begin
ab := 3;
write(ab);
end.
""";
var stringLiterals = new List<(string, SemanticTokenType, int)>
{
("program", SemanticTokenType.Keyword, 0),
("main", SemanticTokenType.Identifier, 0),
(";", SemanticTokenType.Delimiter, 0),
("var", SemanticTokenType.Keyword, 0),
("ab", SemanticTokenType.Identifier, 0),
(":", SemanticTokenType.Delimiter, 0),
("integer", SemanticTokenType.Keyword, 0),
(";", SemanticTokenType.Delimiter, 1),
("begin", SemanticTokenType.Keyword, 0),
("ab", SemanticTokenType.Identifier, 1),
(":=", SemanticTokenType.Operator, 0),
("3", SemanticTokenType.Number, 0),
(";", SemanticTokenType.Delimiter, 2),
("write", SemanticTokenType.Identifier, 0),
("(", SemanticTokenType.Delimiter, 0),
("ab", SemanticTokenType.Identifier, 2),
(")", SemanticTokenType.Delimiter, 0),
(";", SemanticTokenType.Delimiter, 3),
("end", SemanticTokenType.Keyword, 0),
(".", SemanticTokenType.Delimiter, 0)
};
TestLexicalAnalysis(pascalProgram, stringLiterals);
}
//带注释的测试
[Fact]
public void TestLexicalAnalysisThird()
{
string pascalProgram = """
{test}
program main;
var
ab, ba: integer;
begin
ab := 3;
ba := 5;
ab := 5;
write(ab + ba);
end.
""";
var stringLiterals = new List<(string, SemanticTokenType, int)>
{
("program", SemanticTokenType.Keyword, 0),
("main", SemanticTokenType.Identifier, 0),
(";", SemanticTokenType.Delimiter, 0),
("var", SemanticTokenType.Keyword, 0),
("ab", SemanticTokenType.Identifier, 0),
(",", SemanticTokenType.Delimiter, 0),
("ba", SemanticTokenType.Identifier, 0),
(":", SemanticTokenType.Delimiter, 0),
("integer", SemanticTokenType.Keyword, 0),
(";", SemanticTokenType.Delimiter, 1),
("begin", SemanticTokenType.Keyword, 0),
("ab", SemanticTokenType.Identifier, 1),
(":=", SemanticTokenType.Operator, 0),
("3", SemanticTokenType.Number, 0),
(";", SemanticTokenType.Delimiter, 2),
("ba", SemanticTokenType.Identifier, 1),
(":=", SemanticTokenType.Operator, 1),
("5", SemanticTokenType.Number, 0),
(";", SemanticTokenType.Delimiter, 3),
("ab", SemanticTokenType.Identifier, 2),
(":=", SemanticTokenType.Operator, 2),
("5", SemanticTokenType.Number, 1),
(";", SemanticTokenType.Delimiter, 4),
("write", SemanticTokenType.Identifier, 0),
("(", SemanticTokenType.Delimiter, 0),
("ab", SemanticTokenType.Identifier, 3),
("+", SemanticTokenType.Operator, 0),
("ba", SemanticTokenType.Identifier, 2),
(")", SemanticTokenType.Delimiter, 0),
(";", SemanticTokenType.Delimiter, 5),
("end", SemanticTokenType.Keyword, 0),
(".", SemanticTokenType.Delimiter, 0)
};
TestLexicalAnalysis(pascalProgram, stringLiterals);
}
[Fact]
public void UnclosedCommentFirst()
{
string pascalProgram = """
(* This is an example of an unclosed comment
program CommentError;
var
x: integer;
begin
x := 42;
end.
""";
var lexer = new Lexer(pascalProgram);
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
//打印exception信息
_testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
Assert.Equal((uint)7, ex.Line);
Assert.Equal((uint)5, ex.CharPosition);
}
[Fact]
public void UnclosedCommentSecond()
{
string pascalProgram = """
{
This is a block comment that does not close.
program CommentNotClosed;
""";
var lexer = new Lexer(pascalProgram);
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
_testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
Assert.Equal((uint)4, ex.Line);
Assert.Equal((uint)26, ex.CharPosition);
}
}