refeat: ILexer接口适配 (#38)

Co-authored-by: Huaps <1183155719@qq.com>
Co-authored-by: duqoo <92306417+duqoo@users.noreply.github.com>
Reviewed-on: PostGuard/Canon#38
This commit is contained in:
jackfiled 2024-04-18 16:34:32 +08:00
parent d631a28703
commit 4b6635796c
19 changed files with 952 additions and 721 deletions

View File

@ -7,12 +7,7 @@ namespace Canon.Core.Abstractions;
/// </summary> /// </summary>
public interface ISourceReader public interface ISourceReader
{ {
/// <summary> public char Current { get; }
/// 尝试读取下一个字符
/// </summary>
/// <param name="c">读取到的字符</param>
/// <returns>是否成功读取</returns>
public bool TryReadChar([NotNullWhen(true)] out char? c);
/// <summary> /// <summary>
/// 源文件名称 /// 源文件名称
@ -28,4 +23,23 @@ public interface ISourceReader
/// 当前读取字符的列号 /// 当前读取字符的列号
/// </summary> /// </summary>
public uint Pos { get; } public uint Pos { get; }
/// <summary>
/// 回退一个字符
/// </summary>
/// <returns>回退是否成功</returns>
public bool Retract();
/// <summary>
/// 前进一个字符
/// </summary>
/// <returns></returns>
public bool MoveNext();
/// <summary>
/// 读取下一个字符但是移进
/// </summary>
/// <param name="c">读取到的下一个字符</param>
/// <returns>是否能够读取下一个字符</returns>
public bool TryPeekChar([NotNullWhen(true)] out char? c);
} }

View File

@ -9,7 +9,6 @@ public enum SemanticTokenType
Identifier, Identifier,
Character, Character,
Empty, Empty,
Error, // 加了一个错误token
/// <summary> /// <summary>
/// 语法分析中的栈底符号 /// 语法分析中的栈底符号
/// </summary> /// </summary>
@ -90,10 +89,15 @@ public enum NumberType
public enum StateType public enum StateType
{ {
Start,
Comment,
Word, Word,
Digit, Num,
Delimiter, Delimiter,
Operator Operator,
BreakPoint,
Unknown,
Done
} }
public enum BasicIdType public enum BasicIdType

View File

@ -0,0 +1,83 @@
namespace Canon.Core.LexicalParser;
public static class LexRules
{
// 保留关键字
private static readonly string[] _keywords =
[
"Program", "Const", "Var", "Procedure",
"Function", "Begin", "End", "Array",
"Of", "If", "Then", "Else",
"For", "To", "Do", "Integer",
"Real", "Boolean", "Character", "Divide",
"Not", "Mod", "And", "Or"
];
private static readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]", "'", "\"", ".."];
private static readonly string[] _operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="];
// 判断字符
public static bool IsDigit(char _ch) {
if (_ch >= '0' && _ch <= '9') return true;
return false;
}
public static bool IsHexDigit(char _ch)
{
if ((_ch >= '0' && _ch <= '9') || (_ch<= 'F' && _ch >= 'A')) return true;
return false;
}
public static bool IsLetter(char _ch) {
if ((_ch >= 'A' && _ch <= 'Z') || (_ch >= 'a' && _ch <= 'z' || _ch == '_')) {
return true;
}
return false;
}
public static bool IsKeyword(string tokenString)
{
foreach (var t in _keywords)
{
if (string.Equals(tokenString, t, StringComparison.OrdinalIgnoreCase)) return true;
}
return false;
}
public static bool IsDelimiter(char ch)
{
foreach (var delimiter in _delimiter)
{
if (delimiter.Contains(ch))
{
return true;
}
}
return false;
}
public static bool IsOperator(char ch)
{
foreach (var o in _operator)
{
if (o.Contains(ch))
{
return true;
}
}
return false;
}
public static bool IsBreakPoint(char ch)
{
if (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r')
{
return true;
}
return false;
}
}

View File

@ -0,0 +1,95 @@
using Canon.Core.Enums;
namespace Canon.Core.LexicalParser;
public static class LexemeFactory
{
public static SemanticToken MakeToken(SemanticTokenType tokenType,string literal,uint _line,uint _chPos)
{
SemanticToken? token;
switch (tokenType)
{
case SemanticTokenType.Character:
CharacterSemanticToken characterSemanticToken = new CharacterSemanticToken()
{
LinePos = _line, CharacterPos = _chPos, LiteralValue = literal,
};
token = characterSemanticToken;
break;
case SemanticTokenType.Identifier:
IdentifierSemanticToken identifierSemanticToken = new IdentifierSemanticToken()
{
LinePos = _line, CharacterPos = _chPos, LiteralValue = literal,
};
token = identifierSemanticToken;
break;
default:
throw new ArgumentOutOfRangeException(nameof(tokenType), tokenType, null);
}
return token;
}
public static KeywordSemanticToken MakeToken(KeywordType keywordType,string literal,uint _line,uint _chPos)
{
KeywordSemanticToken keywordSemanticToken = new KeywordSemanticToken
{
LinePos = _line,
CharacterPos = _chPos,
LiteralValue = literal,
KeywordType = keywordType
};
return keywordSemanticToken;
}
public static DelimiterSemanticToken MakeToken(DelimiterType delimiterType,string literal,uint _line,uint _chPos)
{
DelimiterSemanticToken delimiterSemanticToken = new DelimiterSemanticToken()
{
LinePos = _line,
CharacterPos = _chPos,
LiteralValue = literal,
DelimiterType = delimiterType
};
return delimiterSemanticToken;
}
public static NumberSemanticToken MakeToken(NumberType numberType,string literal,uint _line,uint _chPos)
{
string temp = literal;
string result;
if (numberType == NumberType.Hex)
{
result = string.Concat("0x", temp.AsSpan(1, temp.Length - 1));
}
else
{
result = temp;
}
NumberSemanticToken numberSemanticToken = new NumberSemanticToken()
{
LinePos = _line,
CharacterPos = _chPos,
LiteralValue = result,
NumberType = numberType
};
return numberSemanticToken;
}
public static OperatorSemanticToken MakeToken(OperatorType operatorType,string literal,uint _line,uint _chPos)
{
OperatorSemanticToken operatorSemanticToken = new OperatorSemanticToken()
{
LinePos = _line,
CharacterPos = _chPos,
LiteralValue = literal,
OperatorType = operatorType
};
return operatorSemanticToken;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -339,18 +339,4 @@ public class EndSemanticToken : SemanticToken
public override SemanticTokenType TokenType => SemanticTokenType.End; public override SemanticTokenType TokenType => SemanticTokenType.End;
} }
/// <summary>
/// 错误类型记号
/// </summary>
public class ErrorSemanticToken : SemanticToken
{
public override SemanticTokenType TokenType => SemanticTokenType.Error;
public static bool TryParse(uint linePos, uint characterPos, LinkedListNode<char> now,
out IdentifierSemanticToken? token)
{
token = null;
return false;
}
}

View File

@ -3,12 +3,14 @@ using Canon.Core.CodeGenerators;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Canon.Core.SyntaxNodes; using Canon.Core.SyntaxNodes;
using Canon.Tests.GeneratedParserTests; using Canon.Tests.GeneratedParserTests;
using Canon.Tests.Utils;
namespace Canon.Tests.CCodeGeneratorTests; namespace Canon.Tests.CCodeGeneratorTests;
public class BasicTests public class BasicTests
{ {
private readonly IGrammarParser _parser = GeneratedGrammarParser.Instance; private readonly IGrammarParser _parser = GeneratedGrammarParser.Instance;
private readonly ILexer _lexer = new Lexer();
[Fact] [Fact]
public void ProgramStructTest() public void ProgramStructTest()
@ -21,9 +23,7 @@ public class BasicTests
end. end.
"""; """;
Lexer lexer = new(program); IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
ProgramStruct root = _parser.Analyse(tokens); ProgramStruct root = _parser.Analyse(tokens);
root.GenerateCCode(builder); root.GenerateCCode(builder);

View File

@ -4,12 +4,14 @@ using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Canon.Core.SyntaxNodes; using Canon.Core.SyntaxNodes;
using Canon.Tests.GeneratedParserTests; using Canon.Tests.GeneratedParserTests;
using Canon.Tests.Utils;
namespace Canon.Tests.GrammarParserTests; namespace Canon.Tests.GrammarParserTests;
public class PascalGrammarTests public class PascalGrammarTests
{ {
private readonly IGrammarParser _parser = GeneratedGrammarParser.Instance; private readonly IGrammarParser _parser = GeneratedGrammarParser.Instance;
private readonly ILexer _lexer = new Lexer();
[Fact] [Fact]
public void DoNothingTest() public void DoNothingTest()
@ -20,9 +22,7 @@ public class PascalGrammarTests
end. end.
"""; """;
Lexer lexer = new(program); IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
ProgramStruct root = _parser.Analyse(tokens); ProgramStruct root = _parser.Analyse(tokens);
Assert.Equal("DoNothing", root.Head.ProgramName.LiteralValue); Assert.Equal("DoNothing", root.Head.ProgramName.LiteralValue);
@ -39,10 +39,7 @@ public class PascalGrammarTests
a := 1 + 1 a := 1 + 1
end. end.
"""; """;
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
Lexer lexer = new(program);
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
ProgramStruct root = _parser.Analyse(tokens); ProgramStruct root = _parser.Analyse(tokens);
Assert.Equal("Add", root.Head.ProgramName.LiteralValue); Assert.Equal("Add", root.Head.ProgramName.LiteralValue);
@ -59,10 +56,7 @@ public class PascalGrammarTests
writeln( str, ret ); writeln( str, ret );
end. end.
"""; """;
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
Lexer lexer = new(program);
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
ProgramStruct root = _parser.Analyse(tokens); ProgramStruct root = _parser.Analyse(tokens);
Assert.Equal("exFunction", root.Head.ProgramName.LiteralValue); Assert.Equal("exFunction", root.Head.ProgramName.LiteralValue);
@ -79,10 +73,7 @@ public class PascalGrammarTests
begin begin
end. end.
"""; """;
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
Lexer lexer = new(program);
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
ProgramStruct root = _parser.Analyse(tokens); ProgramStruct root = _parser.Analyse(tokens);
Assert.Equal("main", root.Head.ProgramName.LiteralValue); Assert.Equal("main", root.Head.ProgramName.LiteralValue);

View File

@ -2,13 +2,15 @@
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Xunit.Abstractions; using Xunit.Abstractions;
using Canon.Core.Exceptions; using Canon.Core.Exceptions;
using Canon.Core.Abstractions;
using Canon.Tests.Utils;
namespace Canon.Tests.LexicalParserTests namespace Canon.Tests.LexicalParserTests
{ {
public class CharacterTypeTests public class CharacterTypeTests
{ {
private readonly ITestOutputHelper _testOutputHelper; private readonly ITestOutputHelper _testOutputHelper;
private readonly ILexer _lexer = new Lexer();
public CharacterTypeTests(ITestOutputHelper testOutputHelper) public CharacterTypeTests(ITestOutputHelper testOutputHelper)
{ {
_testOutputHelper = testOutputHelper; _testOutputHelper = testOutputHelper;
@ -20,16 +22,15 @@ namespace Canon.Tests.LexicalParserTests
public void TestCharacterType(string input, string? expectedResult) public void TestCharacterType(string input, string? expectedResult)
{ {
Lexer lexer = new(input); IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = tokensEnumerable.ToList();
if (expectedResult == null) if (expectedResult == null)
{ {
Assert.Throws<LexemeException>(() => lexer.Tokenize()); Assert.Throws<LexemeException>(() => tokens);
} }
else else
{ {
List<SemanticToken> tokens = lexer.Tokenize();
_testOutputHelper.WriteLine(tokens[0].LiteralValue); _testOutputHelper.WriteLine(tokens[0].LiteralValue);
Assert.Single(tokens);
Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType); Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType);
Assert.Equal(expectedResult, tokens[0].LiteralValue); Assert.Equal(expectedResult, tokens[0].LiteralValue);
} }
@ -43,8 +44,8 @@ namespace Canon.Tests.LexicalParserTests
//[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)] //[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)]
public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType) public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
{ {
Lexer lexer = new(input);
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize()); var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(input)).ToList());
_testOutputHelper.WriteLine(ex.ToString()); _testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(expectedErrorType, ex.ErrorType); Assert.Equal(expectedErrorType, ex.ErrorType);
Assert.Equal(expectedLine, ex.Line); Assert.Equal(expectedLine, ex.Line);

View File

@ -1,10 +1,13 @@
using Canon.Core.Enums; using Canon.Core.Enums;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests; namespace Canon.Tests.LexicalParserTests;
public class DelimiterTests public class DelimiterTests
{ {
private readonly ILexer _lexer = new Lexer();
[Theory] [Theory]
[InlineData(",123", DelimiterType.Comma)] [InlineData(",123", DelimiterType.Comma)]
// [InlineData(".123", DelimiterType.Period)] // [InlineData(".123", DelimiterType.Period)]
@ -16,8 +19,8 @@ public class DelimiterTests
[InlineData("]asd", DelimiterType.RightSquareBracket)] [InlineData("]asd", DelimiterType.RightSquareBracket)]
public void SmokeTest(string input, DelimiterType type) public void SmokeTest(string input, DelimiterType type)
{ {
Lexer lexer = new(input); IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = lexer.Tokenize(); List<SemanticToken> tokens = tokensEnumerable.ToList();
SemanticToken token = tokens[0]; SemanticToken token = tokens[0];
Assert.Equal(SemanticTokenType.Delimiter, token.TokenType); Assert.Equal(SemanticTokenType.Delimiter, token.TokenType);

View File

@ -2,11 +2,14 @@
using Canon.Core.Exceptions; using Canon.Core.Exceptions;
using Xunit.Abstractions; using Xunit.Abstractions;
using Canon.Core.Enums; using Canon.Core.Enums;
using Canon.Core.Abstractions;
using Canon.Tests.Utils;
namespace Canon.Tests.LexicalParserTests namespace Canon.Tests.LexicalParserTests
{ {
public class ErrorSingleTests public class ErrorSingleTests
{ {
private readonly ILexer _lexer = new Lexer();
private readonly ITestOutputHelper _testOutputHelper; private readonly ITestOutputHelper _testOutputHelper;
public ErrorSingleTests(ITestOutputHelper testOutputHelper) public ErrorSingleTests(ITestOutputHelper testOutputHelper)
{ {
@ -20,9 +23,7 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("identifier_with_special_chars@#",1, 30, LexemeErrorType.UnknownCharacterOrString)] [InlineData("identifier_with_special_chars@#",1, 30, LexemeErrorType.UnknownCharacterOrString)]
public void TestUnknownCharacterError(string pascalProgram, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType) public void TestUnknownCharacterError(string pascalProgram, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
{ {
var lexer = new Lexer(pascalProgram); var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
_testOutputHelper.WriteLine(ex.ToString()); _testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(expectedErrorType, ex.ErrorType); Assert.Equal(expectedErrorType, ex.ErrorType);
Assert.Equal(expectedLine, ex.Line); Assert.Equal(expectedLine, ex.Line);

View File

@ -1,10 +1,13 @@
using Canon.Core.Enums; using Canon.Core.Enums;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests namespace Canon.Tests.LexicalParserTests
{ {
public class IdentifierTests public class IdentifierTests
{ {
private readonly ILexer _lexer = new Lexer();
[Theory] [Theory]
[InlineData("identifier", true)] [InlineData("identifier", true)]
[InlineData("_identifier", true)] [InlineData("_identifier", true)]
@ -14,10 +17,9 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("andand", true)] [InlineData("andand", true)]
public void TestParseIdentifier(string input, bool expectedResult) public void TestParseIdentifier(string input, bool expectedResult)
{ {
Lexer lexer = new(input); IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = lexer.Tokenize(); List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.Single(tokens);
Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier); Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier);
} }
} }

View File

@ -1,10 +1,14 @@
using Canon.Core.Enums; using Canon.Core.Enums;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests; namespace Canon.Tests.LexicalParserTests;
public class KeywordTypeTests public class KeywordTypeTests
{ {
private readonly ILexer _lexer = new Lexer();
[Theory] [Theory]
[InlineData("program", KeywordType.Program)] [InlineData("program", KeywordType.Program)]
[InlineData("const", KeywordType.Const)] [InlineData("const", KeywordType.Const)]
@ -24,8 +28,8 @@ public class KeywordTypeTests
[InlineData("DO", KeywordType.Do)] [InlineData("DO", KeywordType.Do)]
public void SmokeTest(string input, KeywordType type) public void SmokeTest(string input, KeywordType type)
{ {
Lexer lexer = new(input); IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = lexer.Tokenize(); List<SemanticToken> tokens = tokensEnumerable.ToList();
SemanticToken token = tokens[0]; SemanticToken token = tokens[0];
Assert.Equal(SemanticTokenType.Keyword, token.TokenType); Assert.Equal(SemanticTokenType.Keyword, token.TokenType);

View File

@ -3,12 +3,15 @@ using Canon.Core.Enums;
using Canon.Core.Exceptions; using Canon.Core.Exceptions;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Xunit.Abstractions; using Xunit.Abstractions;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests; namespace Canon.Tests.LexicalParserTests;
public class LexicalFileTests public class LexicalFileTests
{ {
private readonly ITestOutputHelper _testOutputHelper; private readonly ITestOutputHelper _testOutputHelper;
private readonly ILexer _lexer = new Lexer();
public LexicalFileTests(ITestOutputHelper testOutputHelper) public LexicalFileTests(ITestOutputHelper testOutputHelper)
{ {
@ -126,14 +129,16 @@ public class LexicalFileTests
} }
: token).ToList(); : token).ToList();
var lexer = new Lexer(pascalProgram); IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
var actualTokens = lexer.Tokenize(); List<SemanticToken> tokens = tokensEnumerable.ToList();
var actualTokens = tokens;
for (int i = 0; i < expectedTokens.Count; i++) for (int i = 0; i < expectedTokens.Count; i++)
{ {
_testOutputHelper.WriteLine($"Expect: {expectedTokens[i]}"); _testOutputHelper.WriteLine($"Expect: {expectedTokens[i]}");
_testOutputHelper.WriteLine($"Actual: {actualTokens[i]}"); _testOutputHelper.WriteLine($"Actual: {actualTokens[i]}");
_testOutputHelper.WriteLine("----"); _testOutputHelper.WriteLine("----");
Assert.Equal(expectedTokens[i], actualTokens[i]); // Assert.Equal(expectedTokens[i], actualTokens[i]);
} }
Assert.Equal(expectedTokens, actualTokens); Assert.Equal(expectedTokens, actualTokens);
@ -143,14 +148,14 @@ public class LexicalFileTests
public void TestLexicalAnalysisFirst() public void TestLexicalAnalysisFirst()
{ {
string pascalProgram = """ string pascalProgram = """
program HelloWorld; program HelloWorld;
var var
message: string; message: string;
begin begin
message := 'hello, world!'; message := 'hello, world!';
writeln(message); writeln(message);
end. end.
"""; """;
var stringLiterals = new List<(string, SemanticTokenType, int)> var stringLiterals = new List<(string, SemanticTokenType, int)>
{ {
@ -182,14 +187,14 @@ public class LexicalFileTests
public void TestLexicalAnalysisSecond() public void TestLexicalAnalysisSecond()
{ {
string pascalProgram = """ string pascalProgram = """
program main; program main;
var var
ab: integer; ab: integer;
begin begin
ab := 3; ab := 3;
write(ab); write(ab);
end. end.
"""; """;
var stringLiterals = new List<(string, SemanticTokenType, int)> var stringLiterals = new List<(string, SemanticTokenType, int)>
{ {
@ -222,17 +227,17 @@ public class LexicalFileTests
public void TestLexicalAnalysisThird() public void TestLexicalAnalysisThird()
{ {
string pascalProgram = """ string pascalProgram = """
{test} {test}
program main; program main;
var var
ab, ba: integer; ab, ba: integer;
begin begin
ab := 3; ab := 3;
ba := 5; ba := 5;
ab := 5; ab := 5;
write(ab + ba); write(ab + ba);
end. end.
"""; """;
var stringLiterals = new List<(string, SemanticTokenType, int)> var stringLiterals = new List<(string, SemanticTokenType, int)>
{ {
@ -276,16 +281,15 @@ public class LexicalFileTests
public void UnclosedCommentFirst() public void UnclosedCommentFirst()
{ {
string pascalProgram = """ string pascalProgram = """
(* This is an example of an unclosed comment (* This is an example of an unclosed comment
program CommentError; program CommentError;
var var
x: integer; x: integer;
begin begin
x := 42; x := 42;
end. end.
"""; """;
var lexer = new Lexer(pascalProgram); var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
//打印exception信息 //打印exception信息
_testOutputHelper.WriteLine(ex.ToString()); _testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType); Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
@ -302,11 +306,108 @@ public class LexicalFileTests
program CommentNotClosed; program CommentNotClosed;
"""; """;
var lexer = new Lexer(pascalProgram); var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize()); _testOutputHelper.WriteLine(ex.ToString());
_testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType); Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
Assert.Equal((uint)4, ex.Line); Assert.Equal((uint)4, ex.Line);
Assert.Equal((uint)26, ex.CharPosition); Assert.Equal((uint)26, ex.CharPosition);
} }
[Fact]
public void ClosedCommentFirst()
{
string pascalProgram = """
program exFunction;
var
a, b, ret : integer;
begin
a := 100;
b := 200;
(* calling a function to get max value
*)
ret := a - b;
end.
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentSecond()
{
string pascalProgram = """
program exFunction;
var
a, b, ret : integer;
begin
a := 100;
b := 200;
(* calling a function to get max valued *)
ret := a - b;
end.
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentThird()
{
string pascalProgram = """
{
This is a block comment that does closed.
}
program CommentClosed;
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentFourth()
{
string pascalProgram = """
{}
program CommentClosed;
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentFifth()
{
string pascalProgram = """
{
}
program CommentClosed;
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentSixth()
{
string pascalProgram = """
(**)
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
} }

View File

@ -2,12 +2,14 @@
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Canon.Core.Exceptions; using Canon.Core.Exceptions;
using Xunit.Abstractions; using Xunit.Abstractions;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests namespace Canon.Tests.LexicalParserTests
{ {
public class NumberTests public class NumberTests
{ {
private readonly ILexer _lexer = new Lexer();
private readonly ITestOutputHelper _testOutputHelper; private readonly ITestOutputHelper _testOutputHelper;
public NumberTests(ITestOutputHelper testOutputHelper) public NumberTests(ITestOutputHelper testOutputHelper)
{ {
@ -31,8 +33,8 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("$123", "0x123", NumberType.Hex)] [InlineData("$123", "0x123", NumberType.Hex)]
public void TestParseNumber(string input, string expected, NumberType expectedNumberType) public void TestParseNumber(string input, string expected, NumberType expectedNumberType)
{ {
Lexer lexer = new(input); IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = lexer.Tokenize(); List<SemanticToken> tokens = tokensEnumerable.ToList();
SemanticToken token = tokens[0]; SemanticToken token = tokens[0];
Assert.Equal(SemanticTokenType.Number, token.TokenType); Assert.Equal(SemanticTokenType.Number, token.TokenType);
NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token; NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token;
@ -41,14 +43,13 @@ namespace Canon.Tests.LexicalParserTests
} }
[Theory] [Theory]
[InlineData("1E", 1, 3, LexemeErrorType.IllegalNumberFormat)] [InlineData("1E", 1, 2, LexemeErrorType.IllegalNumberFormat)]
[InlineData("123abc", 1, 4, LexemeErrorType.IllegalNumberFormat)] [InlineData("123abc", 1, 4, LexemeErrorType.IllegalNumberFormat)]
[InlineData("123.45.67", 1, 7, LexemeErrorType.IllegalNumberFormat)] [InlineData("123.45.67", 1, 7, LexemeErrorType.IllegalNumberFormat)]
[InlineData("123identifier", 1, 4, LexemeErrorType.IllegalNumberFormat)] [InlineData("123identifier", 1, 4, LexemeErrorType.IllegalNumberFormat)]
public void TestParseNumberError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType) public void TestParseNumberError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
{ {
Lexer lexer = new(input); var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(input)).ToList());
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
_testOutputHelper.WriteLine(ex.ToString()); _testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(expectedErrorType, ex.ErrorType); Assert.Equal(expectedErrorType, ex.ErrorType);
Assert.Equal(expectedLine, ex.Line); Assert.Equal(expectedLine, ex.Line);

View File

@ -1,10 +1,13 @@
using Canon.Core.Enums; using Canon.Core.Enums;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests; namespace Canon.Tests.LexicalParserTests;
public class OperatorTypeTests public class OperatorTypeTests
{ {
private readonly ILexer _lexer = new Lexer();
[Theory] [Theory]
[InlineData("+ 123", OperatorType.Plus, true)] [InlineData("+ 123", OperatorType.Plus, true)]
[InlineData("+123", OperatorType.Plus, true)] [InlineData("+123", OperatorType.Plus, true)]
@ -22,8 +25,8 @@ public class OperatorTypeTests
[InlineData("m +123", OperatorType.Plus, false)] [InlineData("m +123", OperatorType.Plus, false)]
public void ParseTest(string input, OperatorType result, bool expectedResult) public void ParseTest(string input, OperatorType result, bool expectedResult)
{ {
Lexer lexer = new(input); IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = lexer.Tokenize(); List<SemanticToken> tokens = tokensEnumerable.ToList();
SemanticToken token = tokens[0]; SemanticToken token = tokens[0];
if (!expectedResult) if (!expectedResult)

View File

@ -0,0 +1,15 @@
namespace Canon.Tests.Utils;
public static class EnumerableExtensions
{
/// <summary>
/// 含有索引的遍历
/// </summary>
/// <param name="enumerable">可遍历的接口</param>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
public static IEnumerable<(T, uint)> WithIndex<T>(this IEnumerable<T> enumerable)
{
return enumerable.Select((value, index) => (value, (uint)index));
}
}

View File

@ -6,10 +6,11 @@ namespace Canon.Tests.Utils;
/// <summary> /// <summary>
/// 从字符串中读取源代码 /// 从字符串中读取源代码
/// </summary> /// </summary>
public sealed class StringSourceReader(string source) : ISourceReader, IDisposable public sealed class StringSourceReader(string source) : ISourceReader
{ {
private readonly IEnumerator<char> _enumerator = private int _pos = -1;
source.GetEnumerator();
private uint _lastPos;
public uint Line { get; private set; } = 1; public uint Line { get; private set; } = 1;
@ -17,31 +18,70 @@ public sealed class StringSourceReader(string source) : ISourceReader, IDisposab
public string FileName => "string"; public string FileName => "string";
public bool TryReadChar([NotNullWhen(true)] out char? c) public char Current
{ {
if (Pos != 0 || Line != 1) get
{ {
// 不是第一次读取 if (_pos == -1)
if (_enumerator.Current == '\n')
{ {
Pos = 0; throw new InvalidOperationException("Reader at before the start.");
Line += 1; }
else
{
return source[_pos];
} }
} }
}
if (!_enumerator.MoveNext()) public bool Retract()
{
if (_pos <= 0)
{
return false;
}
_pos -= 1;
if (Current == '\n')
{
Line -= 1;
// TODO: 如果一直回退就完蛋了
Pos = _lastPos;
}
else
{
Pos -= 1;
}
return true;
}
public bool MoveNext()
{
if (_pos >= source.Length - 1)
{
return false;
}
if (_pos != -1 && Current == '\n')
{
Line += 1;
_lastPos = Pos;
Pos = 0;
}
_pos += 1;
Pos += 1;
return true;
}
public bool TryPeekChar([NotNullWhen(true)] out char? c)
{
if (_pos >= source.Length - 1)
{ {
c = null; c = null;
return false; return false;
} }
Pos += 1; c = source[_pos + 1];
c = _enumerator.Current;
return true; return true;
} }
public void Dispose()
{
_enumerator.Dispose();
}
} }

View File

@ -8,78 +8,64 @@ public class StringSourceReaderTests
public void LineFeedTest() public void LineFeedTest()
{ {
ISourceReader reader = new StringSourceReader("program Main;\nbegin\nend.\n"); ISourceReader reader = new StringSourceReader("program Main;\nbegin\nend.\n");
reader.MoveNext();
Assert.Equal(0u, reader.Pos); CheckLine(reader, "program Main;", 1);
Assert.Equal(1u, reader.Line); reader.MoveNext();
CheckLine(reader, "begin", 2);
// program reader.MoveNext();
Assert.True(reader.TryReadChar(out char? c)); CheckLine(reader, "end.", 3);
Assert.Equal('p', c);
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out c));
Assert.Equal(' ', c);
// main;
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out c));
Assert.Equal('\n', c);
// begin
for (uint i = 1; i <= 5; i++)
{
Assert.True(reader.TryReadChar(out char? _));
Assert.Equal(i, reader.Pos);
Assert.Equal(2u, reader.Line);
}
// \n
Assert.True(reader.TryReadChar(out c));
Assert.Equal('\n', c);
// end.
foreach (char i in "end.")
{
Assert.True(reader.TryReadChar(out c));
Assert.Equal(i, c);
}
} }
[Fact] [Fact]
public void CarriageReturnLineFeedTest() public void CarriageReturnLineFeedTest()
{ {
ISourceReader reader = new StringSourceReader("program Main;\r\nbegin\r\nend.\r\n"); ISourceReader reader = new StringSourceReader("program Main;\r\nbegin\r\nend.\r\n");
reader.MoveNext();
// program Main; CheckLine(reader, "program Main;", 1);
foreach ((char value, uint index) in reader.MoveNext();
"program Main;".Select((value, index) => (value, (uint)index))) reader.MoveNext();
CheckLine(reader, "begin", 2);
reader.MoveNext();
reader.MoveNext();
CheckLine(reader, "end.", 3);
}
[Fact]
public void RetractTest()
{
ISourceReader reader = new StringSourceReader("test");
reader.MoveNext();
Assert.Equal('t', reader.Current);
Assert.True(reader.MoveNext());
Assert.Equal('e', reader.Current);
Assert.True(reader.Retract());
Assert.Equal('t', reader.Current);
Assert.False(reader.Retract());
}
[Fact]
public void PeekTest()
{
ISourceReader reader = new StringSourceReader("peek");
reader.MoveNext();
Assert.Equal('p', reader.Current);
Assert.True(reader.TryPeekChar(out char? c));
Assert.Equal('e', c);
Assert.Equal('p', reader.Current);
}
private static void CheckLine(ISourceReader reader, string line, uint lineNumber)
{
foreach ((char value, uint index) in line.WithIndex())
{ {
Assert.True(reader.TryReadChar(out char? c)); Assert.Equal(value, reader.Current);
Assert.Equal(value, c); Assert.Equal(lineNumber, reader.Line);
Assert.Equal(index + 1, reader.Pos); Assert.Equal(index + 1, reader.Pos);
Assert.Equal(1u, reader.Line); reader.MoveNext();
}
Assert.True(reader.TryReadChar(out _));
Assert.True(reader.TryReadChar(out _));
// begin
foreach ((char value, uint index) in
"begin".Select((value, index) => (value, (uint)index)))
{
Assert.True(reader.TryReadChar(out char? c));
Assert.Equal(value, c);
Assert.Equal(index + 1, reader.Pos);
Assert.Equal(2u, reader.Line);
} }
} }
} }