refeat: ILexer接口适配 (#38)

Co-authored-by: Huaps <1183155719@qq.com>
Co-authored-by: duqoo <92306417+duqoo@users.noreply.github.com>
Reviewed-on: PostGuard/Canon#38
This commit is contained in:
jackfiled 2024-04-18 16:34:32 +08:00
parent d631a28703
commit 4b6635796c
19 changed files with 952 additions and 721 deletions

View File

@ -7,12 +7,7 @@ namespace Canon.Core.Abstractions;
/// </summary>
public interface ISourceReader
{
/// <summary>
/// 尝试读取下一个字符
/// </summary>
/// <param name="c">读取到的字符</param>
/// <returns>是否成功读取</returns>
public bool TryReadChar([NotNullWhen(true)] out char? c);
public char Current { get; }
/// <summary>
/// 源文件名称
@ -28,4 +23,23 @@ public interface ISourceReader
/// 当前读取字符的列号
/// </summary>
public uint Pos { get; }
/// <summary>
/// 回退一个字符
/// </summary>
/// <returns>回退是否成功</returns>
public bool Retract();
/// <summary>
/// 前进一个字符
/// </summary>
/// <returns></returns>
public bool MoveNext();
/// <summary>
/// 读取下一个字符但是移进
/// </summary>
/// <param name="c">读取到的下一个字符</param>
/// <returns>是否能够读取下一个字符</returns>
public bool TryPeekChar([NotNullWhen(true)] out char? c);
}

View File

@ -9,7 +9,6 @@ public enum SemanticTokenType
Identifier,
Character,
Empty,
Error, // 加了一个错误token
/// <summary>
/// 语法分析中的栈底符号
/// </summary>
@ -90,10 +89,15 @@ public enum NumberType
public enum StateType
{
Start,
Comment,
Word,
Digit,
Num,
Delimiter,
Operator
Operator,
BreakPoint,
Unknown,
Done
}
public enum BasicIdType

View File

@ -0,0 +1,83 @@
namespace Canon.Core.LexicalParser;
public static class LexRules
{
// 保留关键字
private static readonly string[] _keywords =
[
"Program", "Const", "Var", "Procedure",
"Function", "Begin", "End", "Array",
"Of", "If", "Then", "Else",
"For", "To", "Do", "Integer",
"Real", "Boolean", "Character", "Divide",
"Not", "Mod", "And", "Or"
];
private static readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]", "'", "\"", ".."];
private static readonly string[] _operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="];
// 判断字符
public static bool IsDigit(char _ch) {
if (_ch >= '0' && _ch <= '9') return true;
return false;
}
public static bool IsHexDigit(char _ch)
{
if ((_ch >= '0' && _ch <= '9') || (_ch<= 'F' && _ch >= 'A')) return true;
return false;
}
public static bool IsLetter(char _ch) {
if ((_ch >= 'A' && _ch <= 'Z') || (_ch >= 'a' && _ch <= 'z' || _ch == '_')) {
return true;
}
return false;
}
public static bool IsKeyword(string tokenString)
{
foreach (var t in _keywords)
{
if (string.Equals(tokenString, t, StringComparison.OrdinalIgnoreCase)) return true;
}
return false;
}
public static bool IsDelimiter(char ch)
{
foreach (var delimiter in _delimiter)
{
if (delimiter.Contains(ch))
{
return true;
}
}
return false;
}
public static bool IsOperator(char ch)
{
foreach (var o in _operator)
{
if (o.Contains(ch))
{
return true;
}
}
return false;
}
public static bool IsBreakPoint(char ch)
{
if (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r')
{
return true;
}
return false;
}
}

View File

@ -0,0 +1,95 @@
using Canon.Core.Enums;
namespace Canon.Core.LexicalParser;
public static class LexemeFactory
{
public static SemanticToken MakeToken(SemanticTokenType tokenType,string literal,uint _line,uint _chPos)
{
SemanticToken? token;
switch (tokenType)
{
case SemanticTokenType.Character:
CharacterSemanticToken characterSemanticToken = new CharacterSemanticToken()
{
LinePos = _line, CharacterPos = _chPos, LiteralValue = literal,
};
token = characterSemanticToken;
break;
case SemanticTokenType.Identifier:
IdentifierSemanticToken identifierSemanticToken = new IdentifierSemanticToken()
{
LinePos = _line, CharacterPos = _chPos, LiteralValue = literal,
};
token = identifierSemanticToken;
break;
default:
throw new ArgumentOutOfRangeException(nameof(tokenType), tokenType, null);
}
return token;
}
public static KeywordSemanticToken MakeToken(KeywordType keywordType,string literal,uint _line,uint _chPos)
{
KeywordSemanticToken keywordSemanticToken = new KeywordSemanticToken
{
LinePos = _line,
CharacterPos = _chPos,
LiteralValue = literal,
KeywordType = keywordType
};
return keywordSemanticToken;
}
public static DelimiterSemanticToken MakeToken(DelimiterType delimiterType,string literal,uint _line,uint _chPos)
{
DelimiterSemanticToken delimiterSemanticToken = new DelimiterSemanticToken()
{
LinePos = _line,
CharacterPos = _chPos,
LiteralValue = literal,
DelimiterType = delimiterType
};
return delimiterSemanticToken;
}
public static NumberSemanticToken MakeToken(NumberType numberType,string literal,uint _line,uint _chPos)
{
string temp = literal;
string result;
if (numberType == NumberType.Hex)
{
result = string.Concat("0x", temp.AsSpan(1, temp.Length - 1));
}
else
{
result = temp;
}
NumberSemanticToken numberSemanticToken = new NumberSemanticToken()
{
LinePos = _line,
CharacterPos = _chPos,
LiteralValue = result,
NumberType = numberType
};
return numberSemanticToken;
}
public static OperatorSemanticToken MakeToken(OperatorType operatorType,string literal,uint _line,uint _chPos)
{
OperatorSemanticToken operatorSemanticToken = new OperatorSemanticToken()
{
LinePos = _line,
CharacterPos = _chPos,
LiteralValue = literal,
OperatorType = operatorType
};
return operatorSemanticToken;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -339,18 +339,4 @@ public class EndSemanticToken : SemanticToken
public override SemanticTokenType TokenType => SemanticTokenType.End;
}
/// <summary>
/// 错误类型记号
/// </summary>
public class ErrorSemanticToken : SemanticToken
{
public override SemanticTokenType TokenType => SemanticTokenType.Error;
public static bool TryParse(uint linePos, uint characterPos, LinkedListNode<char> now,
out IdentifierSemanticToken? token)
{
token = null;
return false;
}
}

View File

@ -3,12 +3,14 @@ using Canon.Core.CodeGenerators;
using Canon.Core.LexicalParser;
using Canon.Core.SyntaxNodes;
using Canon.Tests.GeneratedParserTests;
using Canon.Tests.Utils;
namespace Canon.Tests.CCodeGeneratorTests;
public class BasicTests
{
private readonly IGrammarParser _parser = GeneratedGrammarParser.Instance;
private readonly ILexer _lexer = new Lexer();
[Fact]
public void ProgramStructTest()
@ -21,9 +23,7 @@ public class BasicTests
end.
""";
Lexer lexer = new(program);
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
ProgramStruct root = _parser.Analyse(tokens);
root.GenerateCCode(builder);

View File

@ -4,12 +4,14 @@ using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser;
using Canon.Core.SyntaxNodes;
using Canon.Tests.GeneratedParserTests;
using Canon.Tests.Utils;
namespace Canon.Tests.GrammarParserTests;
public class PascalGrammarTests
{
private readonly IGrammarParser _parser = GeneratedGrammarParser.Instance;
private readonly ILexer _lexer = new Lexer();
[Fact]
public void DoNothingTest()
@ -20,9 +22,7 @@ public class PascalGrammarTests
end.
""";
Lexer lexer = new(program);
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
ProgramStruct root = _parser.Analyse(tokens);
Assert.Equal("DoNothing", root.Head.ProgramName.LiteralValue);
@ -39,10 +39,7 @@ public class PascalGrammarTests
a := 1 + 1
end.
""";
Lexer lexer = new(program);
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
ProgramStruct root = _parser.Analyse(tokens);
Assert.Equal("Add", root.Head.ProgramName.LiteralValue);
@ -59,10 +56,7 @@ public class PascalGrammarTests
writeln( str, ret );
end.
""";
Lexer lexer = new(program);
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
ProgramStruct root = _parser.Analyse(tokens);
Assert.Equal("exFunction", root.Head.ProgramName.LiteralValue);
@ -79,10 +73,7 @@ public class PascalGrammarTests
begin
end.
""";
Lexer lexer = new(program);
List<SemanticToken> tokens = lexer.Tokenize();
tokens.Add(SemanticToken.End);
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
ProgramStruct root = _parser.Analyse(tokens);
Assert.Equal("main", root.Head.ProgramName.LiteralValue);

View File

@ -2,13 +2,15 @@
using Canon.Core.LexicalParser;
using Xunit.Abstractions;
using Canon.Core.Exceptions;
using Canon.Core.Abstractions;
using Canon.Tests.Utils;
namespace Canon.Tests.LexicalParserTests
{
public class CharacterTypeTests
{
private readonly ITestOutputHelper _testOutputHelper;
private readonly ILexer _lexer = new Lexer();
public CharacterTypeTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
@ -20,16 +22,15 @@ namespace Canon.Tests.LexicalParserTests
public void TestCharacterType(string input, string? expectedResult)
{
Lexer lexer = new(input);
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = tokensEnumerable.ToList();
if (expectedResult == null)
{
Assert.Throws<LexemeException>(() => lexer.Tokenize());
Assert.Throws<LexemeException>(() => tokens);
}
else
{
List<SemanticToken> tokens = lexer.Tokenize();
_testOutputHelper.WriteLine(tokens[0].LiteralValue);
Assert.Single(tokens);
Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType);
Assert.Equal(expectedResult, tokens[0].LiteralValue);
}
@ -43,8 +44,8 @@ namespace Canon.Tests.LexicalParserTests
//[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)]
public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
{
Lexer lexer = new(input);
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(input)).ToList());
_testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(expectedErrorType, ex.ErrorType);
Assert.Equal(expectedLine, ex.Line);

View File

@ -1,10 +1,13 @@
using Canon.Core.Enums;
using Canon.Core.LexicalParser;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests;
public class DelimiterTests
{
private readonly ILexer _lexer = new Lexer();
[Theory]
[InlineData(",123", DelimiterType.Comma)]
// [InlineData(".123", DelimiterType.Period)]
@ -16,8 +19,8 @@ public class DelimiterTests
[InlineData("]asd", DelimiterType.RightSquareBracket)]
public void SmokeTest(string input, DelimiterType type)
{
Lexer lexer = new(input);
List<SemanticToken> tokens = lexer.Tokenize();
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = tokensEnumerable.ToList();
SemanticToken token = tokens[0];
Assert.Equal(SemanticTokenType.Delimiter, token.TokenType);

View File

@ -2,11 +2,14 @@
using Canon.Core.Exceptions;
using Xunit.Abstractions;
using Canon.Core.Enums;
using Canon.Core.Abstractions;
using Canon.Tests.Utils;
namespace Canon.Tests.LexicalParserTests
{
public class ErrorSingleTests
{
private readonly ILexer _lexer = new Lexer();
private readonly ITestOutputHelper _testOutputHelper;
public ErrorSingleTests(ITestOutputHelper testOutputHelper)
{
@ -20,9 +23,7 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("identifier_with_special_chars@#",1, 30, LexemeErrorType.UnknownCharacterOrString)]
public void TestUnknownCharacterError(string pascalProgram, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
{
var lexer = new Lexer(pascalProgram);
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
_testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(expectedErrorType, ex.ErrorType);
Assert.Equal(expectedLine, ex.Line);

View File

@ -1,10 +1,13 @@
using Canon.Core.Enums;
using Canon.Core.LexicalParser;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests
{
public class IdentifierTests
{
private readonly ILexer _lexer = new Lexer();
[Theory]
[InlineData("identifier", true)]
[InlineData("_identifier", true)]
@ -14,10 +17,9 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("andand", true)]
public void TestParseIdentifier(string input, bool expectedResult)
{
Lexer lexer = new(input);
List<SemanticToken> tokens = lexer.Tokenize();
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.Single(tokens);
Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier);
}
}

View File

@ -1,10 +1,14 @@
using Canon.Core.Enums;
using Canon.Core.LexicalParser;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests;
public class KeywordTypeTests
{
private readonly ILexer _lexer = new Lexer();
[Theory]
[InlineData("program", KeywordType.Program)]
[InlineData("const", KeywordType.Const)]
@ -24,8 +28,8 @@ public class KeywordTypeTests
[InlineData("DO", KeywordType.Do)]
public void SmokeTest(string input, KeywordType type)
{
Lexer lexer = new(input);
List<SemanticToken> tokens = lexer.Tokenize();
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = tokensEnumerable.ToList();
SemanticToken token = tokens[0];
Assert.Equal(SemanticTokenType.Keyword, token.TokenType);

View File

@ -3,12 +3,15 @@ using Canon.Core.Enums;
using Canon.Core.Exceptions;
using Canon.Core.LexicalParser;
using Xunit.Abstractions;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests;
public class LexicalFileTests
{
private readonly ITestOutputHelper _testOutputHelper;
private readonly ILexer _lexer = new Lexer();
public LexicalFileTests(ITestOutputHelper testOutputHelper)
{
@ -126,14 +129,16 @@ public class LexicalFileTests
}
: token).ToList();
var lexer = new Lexer(pascalProgram);
var actualTokens = lexer.Tokenize();
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
var actualTokens = tokens;
for (int i = 0; i < expectedTokens.Count; i++)
{
_testOutputHelper.WriteLine($"Expect: {expectedTokens[i]}");
_testOutputHelper.WriteLine($"Actual: {actualTokens[i]}");
_testOutputHelper.WriteLine("----");
Assert.Equal(expectedTokens[i], actualTokens[i]);
// Assert.Equal(expectedTokens[i], actualTokens[i]);
}
Assert.Equal(expectedTokens, actualTokens);
@ -143,14 +148,14 @@ public class LexicalFileTests
public void TestLexicalAnalysisFirst()
{
string pascalProgram = """
program HelloWorld;
var
message: string;
begin
message := 'hello, world!';
writeln(message);
end.
""";
program HelloWorld;
var
message: string;
begin
message := 'hello, world!';
writeln(message);
end.
""";
var stringLiterals = new List<(string, SemanticTokenType, int)>
{
@ -182,14 +187,14 @@ public class LexicalFileTests
public void TestLexicalAnalysisSecond()
{
string pascalProgram = """
program main;
var
ab: integer;
begin
ab := 3;
write(ab);
end.
""";
program main;
var
ab: integer;
begin
ab := 3;
write(ab);
end.
""";
var stringLiterals = new List<(string, SemanticTokenType, int)>
{
@ -222,17 +227,17 @@ public class LexicalFileTests
public void TestLexicalAnalysisThird()
{
string pascalProgram = """
{test}
program main;
var
ab, ba: integer;
begin
ab := 3;
ba := 5;
ab := 5;
write(ab + ba);
end.
""";
{test}
program main;
var
ab, ba: integer;
begin
ab := 3;
ba := 5;
ab := 5;
write(ab + ba);
end.
""";
var stringLiterals = new List<(string, SemanticTokenType, int)>
{
@ -276,16 +281,15 @@ public class LexicalFileTests
public void UnclosedCommentFirst()
{
string pascalProgram = """
(* This is an example of an unclosed comment
program CommentError;
var
x: integer;
begin
x := 42;
end.
""";
var lexer = new Lexer(pascalProgram);
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
(* This is an example of an unclosed comment
program CommentError;
var
x: integer;
begin
x := 42;
end.
""";
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
//打印exception信息
_testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
@ -302,11 +306,108 @@ public class LexicalFileTests
program CommentNotClosed;
""";
var lexer = new Lexer(pascalProgram);
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
_testOutputHelper.WriteLine(ex.ToString());
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
_testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
Assert.Equal((uint)4, ex.Line);
Assert.Equal((uint)26, ex.CharPosition);
}
[Fact]
public void ClosedCommentFirst()
{
string pascalProgram = """
program exFunction;
var
a, b, ret : integer;
begin
a := 100;
b := 200;
(* calling a function to get max value
*)
ret := a - b;
end.
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentSecond()
{
string pascalProgram = """
program exFunction;
var
a, b, ret : integer;
begin
a := 100;
b := 200;
(* calling a function to get max valued *)
ret := a - b;
end.
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentThird()
{
string pascalProgram = """
{
This is a block comment that does closed.
}
program CommentClosed;
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentFourth()
{
string pascalProgram = """
{}
program CommentClosed;
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentFifth()
{
string pascalProgram = """
{
}
program CommentClosed;
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
[Fact]
public void ClosedCommentSixth()
{
string pascalProgram = """
(**)
""";
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
List<SemanticToken> tokens = tokensEnumerable.ToList();
Assert.NotNull(tokens);
}
}

View File

@ -2,12 +2,14 @@
using Canon.Core.LexicalParser;
using Canon.Core.Exceptions;
using Xunit.Abstractions;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests
{
public class NumberTests
{
private readonly ILexer _lexer = new Lexer();
private readonly ITestOutputHelper _testOutputHelper;
public NumberTests(ITestOutputHelper testOutputHelper)
{
@ -31,8 +33,8 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("$123", "0x123", NumberType.Hex)]
public void TestParseNumber(string input, string expected, NumberType expectedNumberType)
{
Lexer lexer = new(input);
List<SemanticToken> tokens = lexer.Tokenize();
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = tokensEnumerable.ToList();
SemanticToken token = tokens[0];
Assert.Equal(SemanticTokenType.Number, token.TokenType);
NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token;
@ -41,14 +43,13 @@ namespace Canon.Tests.LexicalParserTests
}
[Theory]
[InlineData("1E", 1, 3, LexemeErrorType.IllegalNumberFormat)]
[InlineData("1E", 1, 2, LexemeErrorType.IllegalNumberFormat)]
[InlineData("123abc", 1, 4, LexemeErrorType.IllegalNumberFormat)]
[InlineData("123.45.67", 1, 7, LexemeErrorType.IllegalNumberFormat)]
[InlineData("123identifier", 1, 4, LexemeErrorType.IllegalNumberFormat)]
public void TestParseNumberError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
{
Lexer lexer = new(input);
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(input)).ToList());
_testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(expectedErrorType, ex.ErrorType);
Assert.Equal(expectedLine, ex.Line);

View File

@ -1,10 +1,13 @@
using Canon.Core.Enums;
using Canon.Core.LexicalParser;
using Canon.Tests.Utils;
using Canon.Core.Abstractions;
namespace Canon.Tests.LexicalParserTests;
public class OperatorTypeTests
{
private readonly ILexer _lexer = new Lexer();
[Theory]
[InlineData("+ 123", OperatorType.Plus, true)]
[InlineData("+123", OperatorType.Plus, true)]
@ -22,8 +25,8 @@ public class OperatorTypeTests
[InlineData("m +123", OperatorType.Plus, false)]
public void ParseTest(string input, OperatorType result, bool expectedResult)
{
Lexer lexer = new(input);
List<SemanticToken> tokens = lexer.Tokenize();
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = tokensEnumerable.ToList();
SemanticToken token = tokens[0];
if (!expectedResult)

View File

@ -0,0 +1,15 @@
namespace Canon.Tests.Utils;
public static class EnumerableExtensions
{
/// <summary>
/// 含有索引的遍历
/// </summary>
/// <param name="enumerable">可遍历的接口</param>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
public static IEnumerable<(T, uint)> WithIndex<T>(this IEnumerable<T> enumerable)
{
return enumerable.Select((value, index) => (value, (uint)index));
}
}

View File

@ -6,10 +6,11 @@ namespace Canon.Tests.Utils;
/// <summary>
/// 从字符串中读取源代码
/// </summary>
public sealed class StringSourceReader(string source) : ISourceReader, IDisposable
public sealed class StringSourceReader(string source) : ISourceReader
{
private readonly IEnumerator<char> _enumerator =
source.GetEnumerator();
private int _pos = -1;
private uint _lastPos;
public uint Line { get; private set; } = 1;
@ -17,31 +18,70 @@ public sealed class StringSourceReader(string source) : ISourceReader, IDisposab
public string FileName => "string";
public bool TryReadChar([NotNullWhen(true)] out char? c)
public char Current
{
if (Pos != 0 || Line != 1)
get
{
// 不是第一次读取
if (_enumerator.Current == '\n')
if (_pos == -1)
{
Pos = 0;
Line += 1;
throw new InvalidOperationException("Reader at before the start.");
}
else
{
return source[_pos];
}
}
}
if (!_enumerator.MoveNext())
public bool Retract()
{
if (_pos <= 0)
{
return false;
}
_pos -= 1;
if (Current == '\n')
{
Line -= 1;
// TODO: 如果一直回退就完蛋了
Pos = _lastPos;
}
else
{
Pos -= 1;
}
return true;
}
public bool MoveNext()
{
if (_pos >= source.Length - 1)
{
return false;
}
if (_pos != -1 && Current == '\n')
{
Line += 1;
_lastPos = Pos;
Pos = 0;
}
_pos += 1;
Pos += 1;
return true;
}
public bool TryPeekChar([NotNullWhen(true)] out char? c)
{
if (_pos >= source.Length - 1)
{
c = null;
return false;
}
Pos += 1;
c = _enumerator.Current;
c = source[_pos + 1];
return true;
}
public void Dispose()
{
_enumerator.Dispose();
}
}

View File

@ -8,78 +8,64 @@ public class StringSourceReaderTests
public void LineFeedTest()
{
ISourceReader reader = new StringSourceReader("program Main;\nbegin\nend.\n");
reader.MoveNext();
Assert.Equal(0u, reader.Pos);
Assert.Equal(1u, reader.Line);
// program
Assert.True(reader.TryReadChar(out char? c));
Assert.Equal('p', c);
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out c));
Assert.Equal(' ', c);
// main;
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out char? _));
Assert.True(reader.TryReadChar(out c));
Assert.Equal('\n', c);
// begin
for (uint i = 1; i <= 5; i++)
{
Assert.True(reader.TryReadChar(out char? _));
Assert.Equal(i, reader.Pos);
Assert.Equal(2u, reader.Line);
}
// \n
Assert.True(reader.TryReadChar(out c));
Assert.Equal('\n', c);
// end.
foreach (char i in "end.")
{
Assert.True(reader.TryReadChar(out c));
Assert.Equal(i, c);
}
CheckLine(reader, "program Main;", 1);
reader.MoveNext();
CheckLine(reader, "begin", 2);
reader.MoveNext();
CheckLine(reader, "end.", 3);
}
[Fact]
public void CarriageReturnLineFeedTest()
{
ISourceReader reader = new StringSourceReader("program Main;\r\nbegin\r\nend.\r\n");
reader.MoveNext();
// program Main;
foreach ((char value, uint index) in
"program Main;".Select((value, index) => (value, (uint)index)))
CheckLine(reader, "program Main;", 1);
reader.MoveNext();
reader.MoveNext();
CheckLine(reader, "begin", 2);
reader.MoveNext();
reader.MoveNext();
CheckLine(reader, "end.", 3);
}
[Fact]
public void RetractTest()
{
ISourceReader reader = new StringSourceReader("test");
reader.MoveNext();
Assert.Equal('t', reader.Current);
Assert.True(reader.MoveNext());
Assert.Equal('e', reader.Current);
Assert.True(reader.Retract());
Assert.Equal('t', reader.Current);
Assert.False(reader.Retract());
}
[Fact]
public void PeekTest()
{
ISourceReader reader = new StringSourceReader("peek");
reader.MoveNext();
Assert.Equal('p', reader.Current);
Assert.True(reader.TryPeekChar(out char? c));
Assert.Equal('e', c);
Assert.Equal('p', reader.Current);
}
private static void CheckLine(ISourceReader reader, string line, uint lineNumber)
{
foreach ((char value, uint index) in line.WithIndex())
{
Assert.True(reader.TryReadChar(out char? c));
Assert.Equal(value, c);
Assert.Equal(value, reader.Current);
Assert.Equal(lineNumber, reader.Line);
Assert.Equal(index + 1, reader.Pos);
Assert.Equal(1u, reader.Line);
}
Assert.True(reader.TryReadChar(out _));
Assert.True(reader.TryReadChar(out _));
// begin
foreach ((char value, uint index) in
"begin".Select((value, index) => (value, (uint)index)))
{
Assert.True(reader.TryReadChar(out char? c));
Assert.Equal(value, c);
Assert.Equal(index + 1, reader.Pos);
Assert.Equal(2u, reader.Line);
reader.MoveNext();
}
}
}