2024-04-20 11:48:05 +08:00
|
|
|
|
using Canon.Core.Enums;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
using Canon.Core.Exceptions;
|
|
|
|
|
using Canon.Core.LexicalParser;
|
2024-04-18 16:34:32 +08:00
|
|
|
|
using Canon.Tests.Utils;
|
|
|
|
|
using Canon.Core.Abstractions;
|
2024-04-20 11:48:05 +08:00
|
|
|
|
using Xunit.Abstractions;
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
|
|
|
|
namespace Canon.Tests.LexicalParserTests;
|
|
|
|
|
|
2024-04-20 11:48:05 +08:00
|
|
|
|
public class LexicalFileTests(ITestOutputHelper testOutputHelper)
|
2024-04-04 21:25:11 +08:00
|
|
|
|
{
|
2024-04-18 16:34:32 +08:00
|
|
|
|
private readonly ILexer _lexer = new Lexer();
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void TestLexicalAnalysisFirst()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
2024-04-18 16:34:32 +08:00
|
|
|
|
program HelloWorld;
|
|
|
|
|
var
|
2024-05-04 13:57:14 +08:00
|
|
|
|
message: char;
|
2024-04-18 16:34:32 +08:00
|
|
|
|
begin
|
2024-05-04 13:57:14 +08:00
|
|
|
|
message := 'h';
|
2024-04-18 16:34:32 +08:00
|
|
|
|
writeln(message);
|
|
|
|
|
end.
|
|
|
|
|
""";
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
2024-04-20 11:48:05 +08:00
|
|
|
|
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
ValidateSemanticTokens(tokens, [
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
2024-05-04 13:57:14 +08:00
|
|
|
|
SemanticTokenType.Keyword,
|
2024-04-20 11:48:05 +08:00
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Operator,
|
|
|
|
|
SemanticTokenType.Character,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
2024-04-24 11:01:45 +08:00
|
|
|
|
SemanticTokenType.Delimiter
|
2024-04-20 11:48:05 +08:00
|
|
|
|
]);
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void TestLexicalAnalysisSecond()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
2024-04-18 16:34:32 +08:00
|
|
|
|
program main;
|
|
|
|
|
var
|
|
|
|
|
ab: integer;
|
|
|
|
|
begin
|
|
|
|
|
ab := 3;
|
|
|
|
|
write(ab);
|
|
|
|
|
end.
|
|
|
|
|
""";
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
2024-04-20 11:48:05 +08:00
|
|
|
|
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
|
|
|
|
|
ValidateSemanticTokens(tokens, [
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Operator,
|
|
|
|
|
SemanticTokenType.Number,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Delimiter
|
|
|
|
|
]);
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//带注释的测试
|
|
|
|
|
[Fact]
|
|
|
|
|
public void TestLexicalAnalysisThird()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
2024-04-18 16:34:32 +08:00
|
|
|
|
{test}
|
|
|
|
|
program main;
|
|
|
|
|
var
|
|
|
|
|
ab, ba: integer;
|
|
|
|
|
begin
|
|
|
|
|
ab := 3;
|
|
|
|
|
ba := 5;
|
|
|
|
|
ab := 5;
|
|
|
|
|
write(ab + ba);
|
|
|
|
|
end.
|
|
|
|
|
""";
|
2024-04-04 21:25:11 +08:00
|
|
|
|
|
2024-04-20 11:48:05 +08:00
|
|
|
|
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
|
|
|
|
|
ValidateSemanticTokens(tokens, [
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Operator,
|
|
|
|
|
SemanticTokenType.Number,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Operator,
|
|
|
|
|
SemanticTokenType.Number,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Operator,
|
|
|
|
|
SemanticTokenType.Number,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Operator,
|
|
|
|
|
SemanticTokenType.Identifier,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Delimiter,
|
|
|
|
|
SemanticTokenType.Keyword,
|
|
|
|
|
SemanticTokenType.Delimiter
|
|
|
|
|
]);
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-24 11:01:45 +08:00
|
|
|
|
|
2024-04-04 21:25:11 +08:00
|
|
|
|
[Fact]
|
|
|
|
|
public void UnclosedCommentFirst()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
2024-04-18 16:34:32 +08:00
|
|
|
|
(* This is an example of an unclosed comment
|
|
|
|
|
program CommentError;
|
|
|
|
|
var
|
|
|
|
|
x: integer;
|
|
|
|
|
begin
|
|
|
|
|
x := 42;
|
|
|
|
|
end.
|
|
|
|
|
""";
|
|
|
|
|
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
|
2024-04-04 21:25:11 +08:00
|
|
|
|
//打印exception信息
|
2024-04-20 11:48:05 +08:00
|
|
|
|
testOutputHelper.WriteLine(ex.ToString());
|
2024-04-04 21:25:11 +08:00
|
|
|
|
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
|
|
|
|
|
Assert.Equal((uint)7, ex.Line);
|
2024-04-24 11:01:45 +08:00
|
|
|
|
Assert.Equal((uint)4, ex.CharPosition);
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void UnclosedCommentSecond()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
|
|
|
|
{
|
|
|
|
|
This is a block comment that does not close.
|
|
|
|
|
|
|
|
|
|
program CommentNotClosed;
|
|
|
|
|
""";
|
2024-04-18 16:34:32 +08:00
|
|
|
|
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
|
2024-04-20 11:48:05 +08:00
|
|
|
|
testOutputHelper.WriteLine(ex.ToString());
|
2024-04-04 21:25:11 +08:00
|
|
|
|
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
|
|
|
|
|
Assert.Equal((uint)4, ex.Line);
|
2024-04-24 11:01:45 +08:00
|
|
|
|
Assert.Equal((uint)25, ex.CharPosition);
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|
2024-04-18 16:34:32 +08:00
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void ClosedCommentFirst()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
|
|
|
|
program exFunction;
|
|
|
|
|
var
|
|
|
|
|
a, b, ret : integer;
|
|
|
|
|
|
|
|
|
|
begin
|
|
|
|
|
a := 100;
|
|
|
|
|
b := 200;
|
|
|
|
|
(* calling a function to get max value
|
|
|
|
|
*)
|
|
|
|
|
ret := a - b;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
end.
|
|
|
|
|
""";
|
|
|
|
|
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
|
|
|
|
Assert.NotNull(tokens);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void ClosedCommentSecond()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
|
|
|
|
program exFunction;
|
|
|
|
|
var
|
|
|
|
|
a, b, ret : integer;
|
|
|
|
|
|
|
|
|
|
begin
|
|
|
|
|
a := 100;
|
|
|
|
|
b := 200;
|
|
|
|
|
(* calling a function to get max valued *)
|
|
|
|
|
ret := a - b;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
end.
|
|
|
|
|
""";
|
|
|
|
|
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
|
|
|
|
Assert.NotNull(tokens);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void ClosedCommentThird()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
|
|
|
|
{
|
|
|
|
|
This is a block comment that does closed.
|
|
|
|
|
}
|
|
|
|
|
program CommentClosed;
|
|
|
|
|
""";
|
|
|
|
|
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
|
|
|
|
Assert.NotNull(tokens);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void ClosedCommentFourth()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
|
|
|
|
{}
|
|
|
|
|
program CommentClosed;
|
|
|
|
|
""";
|
|
|
|
|
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
|
|
|
|
Assert.NotNull(tokens);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void ClosedCommentFifth()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
program CommentClosed;
|
|
|
|
|
""";
|
|
|
|
|
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
|
|
|
|
Assert.NotNull(tokens);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[Fact]
|
|
|
|
|
public void ClosedCommentSixth()
|
|
|
|
|
{
|
|
|
|
|
string pascalProgram = """
|
|
|
|
|
(**)
|
|
|
|
|
""";
|
|
|
|
|
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
|
|
|
|
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
|
|
|
|
Assert.NotNull(tokens);
|
|
|
|
|
}
|
2024-04-20 11:48:05 +08:00
|
|
|
|
|
|
|
|
|
private static void ValidateSemanticTokens(IEnumerable<SemanticToken> actualTokens,
|
|
|
|
|
IEnumerable<SemanticTokenType> expectedTypes)
|
|
|
|
|
{
|
2024-04-24 11:01:45 +08:00
|
|
|
|
List<SemanticTokenType> types = [..expectedTypes, SemanticTokenType.End];
|
|
|
|
|
List<SemanticToken> tokens = actualTokens.ToList();
|
|
|
|
|
|
|
|
|
|
Assert.Equal(types.Count, tokens.Count);
|
|
|
|
|
foreach ((SemanticTokenType type, SemanticToken token) in types.Zip(tokens))
|
2024-04-20 11:48:05 +08:00
|
|
|
|
{
|
|
|
|
|
Assert.Equal(type, token.TokenType);
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-04-04 21:25:11 +08:00
|
|
|
|
}
|