refeat: ILexer接口适配 (#38)
Co-authored-by: Huaps <1183155719@qq.com> Co-authored-by: duqoo <92306417+duqoo@users.noreply.github.com> Reviewed-on: PostGuard/Canon#38
This commit is contained in:
parent
d631a28703
commit
4b6635796c
|
@ -7,12 +7,7 @@ namespace Canon.Core.Abstractions;
|
|||
/// </summary>
|
||||
public interface ISourceReader
|
||||
{
|
||||
/// <summary>
|
||||
/// 尝试读取下一个字符
|
||||
/// </summary>
|
||||
/// <param name="c">读取到的字符</param>
|
||||
/// <returns>是否成功读取</returns>
|
||||
public bool TryReadChar([NotNullWhen(true)] out char? c);
|
||||
public char Current { get; }
|
||||
|
||||
/// <summary>
|
||||
/// 源文件名称
|
||||
|
@ -28,4 +23,23 @@ public interface ISourceReader
|
|||
/// 当前读取字符的列号
|
||||
/// </summary>
|
||||
public uint Pos { get; }
|
||||
|
||||
/// <summary>
|
||||
/// 回退一个字符
|
||||
/// </summary>
|
||||
/// <returns>回退是否成功</returns>
|
||||
public bool Retract();
|
||||
|
||||
/// <summary>
|
||||
/// 前进一个字符
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public bool MoveNext();
|
||||
|
||||
/// <summary>
|
||||
/// 读取下一个字符但是移进
|
||||
/// </summary>
|
||||
/// <param name="c">读取到的下一个字符</param>
|
||||
/// <returns>是否能够读取下一个字符</returns>
|
||||
public bool TryPeekChar([NotNullWhen(true)] out char? c);
|
||||
}
|
||||
|
|
|
@ -9,7 +9,6 @@ public enum SemanticTokenType
|
|||
Identifier,
|
||||
Character,
|
||||
Empty,
|
||||
Error, // 加了一个错误token
|
||||
/// <summary>
|
||||
/// 语法分析中的栈底符号
|
||||
/// </summary>
|
||||
|
@ -90,10 +89,15 @@ public enum NumberType
|
|||
|
||||
public enum StateType
|
||||
{
|
||||
Start,
|
||||
Comment,
|
||||
Word,
|
||||
Digit,
|
||||
Num,
|
||||
Delimiter,
|
||||
Operator
|
||||
Operator,
|
||||
BreakPoint,
|
||||
Unknown,
|
||||
Done
|
||||
}
|
||||
|
||||
public enum BasicIdType
|
||||
|
|
83
Canon.Core/LexicalParser/LexRules.cs
Normal file
83
Canon.Core/LexicalParser/LexRules.cs
Normal file
|
@ -0,0 +1,83 @@
|
|||
namespace Canon.Core.LexicalParser;
|
||||
|
||||
public static class LexRules
|
||||
{
|
||||
// 保留关键字
|
||||
private static readonly string[] _keywords =
|
||||
[
|
||||
"Program", "Const", "Var", "Procedure",
|
||||
"Function", "Begin", "End", "Array",
|
||||
"Of", "If", "Then", "Else",
|
||||
"For", "To", "Do", "Integer",
|
||||
"Real", "Boolean", "Character", "Divide",
|
||||
"Not", "Mod", "And", "Or"
|
||||
];
|
||||
|
||||
private static readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]", "'", "\"", ".."];
|
||||
|
||||
private static readonly string[] _operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="];
|
||||
|
||||
// 判断字符
|
||||
public static bool IsDigit(char _ch) {
|
||||
if (_ch >= '0' && _ch <= '9') return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public static bool IsHexDigit(char _ch)
|
||||
{
|
||||
if ((_ch >= '0' && _ch <= '9') || (_ch<= 'F' && _ch >= 'A')) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public static bool IsLetter(char _ch) {
|
||||
if ((_ch >= 'A' && _ch <= 'Z') || (_ch >= 'a' && _ch <= 'z' || _ch == '_')) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static bool IsKeyword(string tokenString)
|
||||
{
|
||||
|
||||
foreach (var t in _keywords)
|
||||
{
|
||||
if (string.Equals(tokenString, t, StringComparison.OrdinalIgnoreCase)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public static bool IsDelimiter(char ch)
|
||||
{
|
||||
foreach (var delimiter in _delimiter)
|
||||
{
|
||||
if (delimiter.Contains(ch))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static bool IsOperator(char ch)
|
||||
{
|
||||
foreach (var o in _operator)
|
||||
{
|
||||
if (o.Contains(ch))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static bool IsBreakPoint(char ch)
|
||||
{
|
||||
if (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
95
Canon.Core/LexicalParser/LexemeFactory.cs
Normal file
95
Canon.Core/LexicalParser/LexemeFactory.cs
Normal file
|
@ -0,0 +1,95 @@
|
|||
using Canon.Core.Enums;
|
||||
|
||||
namespace Canon.Core.LexicalParser;
|
||||
|
||||
public static class LexemeFactory
|
||||
{
|
||||
|
||||
public static SemanticToken MakeToken(SemanticTokenType tokenType,string literal,uint _line,uint _chPos)
|
||||
{
|
||||
SemanticToken? token;
|
||||
switch (tokenType)
|
||||
{
|
||||
case SemanticTokenType.Character:
|
||||
CharacterSemanticToken characterSemanticToken = new CharacterSemanticToken()
|
||||
{
|
||||
LinePos = _line, CharacterPos = _chPos, LiteralValue = literal,
|
||||
};
|
||||
token = characterSemanticToken;
|
||||
break;
|
||||
case SemanticTokenType.Identifier:
|
||||
IdentifierSemanticToken identifierSemanticToken = new IdentifierSemanticToken()
|
||||
{
|
||||
LinePos = _line, CharacterPos = _chPos, LiteralValue = literal,
|
||||
};
|
||||
token = identifierSemanticToken;
|
||||
break;
|
||||
default:
|
||||
throw new ArgumentOutOfRangeException(nameof(tokenType), tokenType, null);
|
||||
}
|
||||
|
||||
return token;
|
||||
|
||||
|
||||
}
|
||||
|
||||
public static KeywordSemanticToken MakeToken(KeywordType keywordType,string literal,uint _line,uint _chPos)
|
||||
{
|
||||
KeywordSemanticToken keywordSemanticToken = new KeywordSemanticToken
|
||||
{
|
||||
LinePos = _line,
|
||||
CharacterPos = _chPos,
|
||||
LiteralValue = literal,
|
||||
KeywordType = keywordType
|
||||
};
|
||||
return keywordSemanticToken;
|
||||
}
|
||||
|
||||
public static DelimiterSemanticToken MakeToken(DelimiterType delimiterType,string literal,uint _line,uint _chPos)
|
||||
{
|
||||
DelimiterSemanticToken delimiterSemanticToken = new DelimiterSemanticToken()
|
||||
{
|
||||
LinePos = _line,
|
||||
CharacterPos = _chPos,
|
||||
LiteralValue = literal,
|
||||
DelimiterType = delimiterType
|
||||
};
|
||||
return delimiterSemanticToken;
|
||||
}
|
||||
|
||||
public static NumberSemanticToken MakeToken(NumberType numberType,string literal,uint _line,uint _chPos)
|
||||
{
|
||||
string temp = literal;
|
||||
string result;
|
||||
if (numberType == NumberType.Hex)
|
||||
{
|
||||
result = string.Concat("0x", temp.AsSpan(1, temp.Length - 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
result = temp;
|
||||
}
|
||||
|
||||
NumberSemanticToken numberSemanticToken = new NumberSemanticToken()
|
||||
{
|
||||
LinePos = _line,
|
||||
CharacterPos = _chPos,
|
||||
LiteralValue = result,
|
||||
NumberType = numberType
|
||||
};
|
||||
return numberSemanticToken;
|
||||
|
||||
}
|
||||
|
||||
public static OperatorSemanticToken MakeToken(OperatorType operatorType,string literal,uint _line,uint _chPos)
|
||||
{
|
||||
OperatorSemanticToken operatorSemanticToken = new OperatorSemanticToken()
|
||||
{
|
||||
LinePos = _line,
|
||||
CharacterPos = _chPos,
|
||||
LiteralValue = literal,
|
||||
OperatorType = operatorType
|
||||
};
|
||||
return operatorSemanticToken;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -339,18 +339,4 @@ public class EndSemanticToken : SemanticToken
|
|||
public override SemanticTokenType TokenType => SemanticTokenType.End;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 错误类型记号
|
||||
/// </summary>
|
||||
public class ErrorSemanticToken : SemanticToken
|
||||
{
|
||||
public override SemanticTokenType TokenType => SemanticTokenType.Error;
|
||||
|
||||
public static bool TryParse(uint linePos, uint characterPos, LinkedListNode<char> now,
|
||||
out IdentifierSemanticToken? token)
|
||||
{
|
||||
token = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,12 +3,14 @@ using Canon.Core.CodeGenerators;
|
|||
using Canon.Core.LexicalParser;
|
||||
using Canon.Core.SyntaxNodes;
|
||||
using Canon.Tests.GeneratedParserTests;
|
||||
using Canon.Tests.Utils;
|
||||
|
||||
namespace Canon.Tests.CCodeGeneratorTests;
|
||||
|
||||
public class BasicTests
|
||||
{
|
||||
private readonly IGrammarParser _parser = GeneratedGrammarParser.Instance;
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
|
||||
[Fact]
|
||||
public void ProgramStructTest()
|
||||
|
@ -21,9 +23,7 @@ public class BasicTests
|
|||
end.
|
||||
""";
|
||||
|
||||
Lexer lexer = new(program);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
tokens.Add(SemanticToken.End);
|
||||
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
|
||||
|
||||
ProgramStruct root = _parser.Analyse(tokens);
|
||||
root.GenerateCCode(builder);
|
||||
|
|
|
@ -4,12 +4,14 @@ using Canon.Core.GrammarParser;
|
|||
using Canon.Core.LexicalParser;
|
||||
using Canon.Core.SyntaxNodes;
|
||||
using Canon.Tests.GeneratedParserTests;
|
||||
using Canon.Tests.Utils;
|
||||
|
||||
namespace Canon.Tests.GrammarParserTests;
|
||||
|
||||
public class PascalGrammarTests
|
||||
{
|
||||
private readonly IGrammarParser _parser = GeneratedGrammarParser.Instance;
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
|
||||
[Fact]
|
||||
public void DoNothingTest()
|
||||
|
@ -20,9 +22,7 @@ public class PascalGrammarTests
|
|||
end.
|
||||
""";
|
||||
|
||||
Lexer lexer = new(program);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
tokens.Add(SemanticToken.End);
|
||||
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
|
||||
|
||||
ProgramStruct root = _parser.Analyse(tokens);
|
||||
Assert.Equal("DoNothing", root.Head.ProgramName.LiteralValue);
|
||||
|
@ -39,10 +39,7 @@ public class PascalGrammarTests
|
|||
a := 1 + 1
|
||||
end.
|
||||
""";
|
||||
|
||||
Lexer lexer = new(program);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
tokens.Add(SemanticToken.End);
|
||||
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
|
||||
|
||||
ProgramStruct root = _parser.Analyse(tokens);
|
||||
Assert.Equal("Add", root.Head.ProgramName.LiteralValue);
|
||||
|
@ -59,10 +56,7 @@ public class PascalGrammarTests
|
|||
writeln( str, ret );
|
||||
end.
|
||||
""";
|
||||
|
||||
Lexer lexer = new(program);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
tokens.Add(SemanticToken.End);
|
||||
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
|
||||
|
||||
ProgramStruct root = _parser.Analyse(tokens);
|
||||
Assert.Equal("exFunction", root.Head.ProgramName.LiteralValue);
|
||||
|
@ -79,10 +73,7 @@ public class PascalGrammarTests
|
|||
begin
|
||||
end.
|
||||
""";
|
||||
|
||||
Lexer lexer = new(program);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
tokens.Add(SemanticToken.End);
|
||||
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(program));
|
||||
|
||||
ProgramStruct root = _parser.Analyse(tokens);
|
||||
Assert.Equal("main", root.Head.ProgramName.LiteralValue);
|
||||
|
|
|
@ -2,13 +2,15 @@
|
|||
using Canon.Core.LexicalParser;
|
||||
using Xunit.Abstractions;
|
||||
using Canon.Core.Exceptions;
|
||||
using Canon.Core.Abstractions;
|
||||
using Canon.Tests.Utils;
|
||||
|
||||
namespace Canon.Tests.LexicalParserTests
|
||||
{
|
||||
public class CharacterTypeTests
|
||||
{
|
||||
private readonly ITestOutputHelper _testOutputHelper;
|
||||
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
public CharacterTypeTests(ITestOutputHelper testOutputHelper)
|
||||
{
|
||||
_testOutputHelper = testOutputHelper;
|
||||
|
@ -20,16 +22,15 @@ namespace Canon.Tests.LexicalParserTests
|
|||
|
||||
public void TestCharacterType(string input, string? expectedResult)
|
||||
{
|
||||
Lexer lexer = new(input);
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
if (expectedResult == null)
|
||||
{
|
||||
Assert.Throws<LexemeException>(() => lexer.Tokenize());
|
||||
Assert.Throws<LexemeException>(() => tokens);
|
||||
}
|
||||
else
|
||||
{
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
_testOutputHelper.WriteLine(tokens[0].LiteralValue);
|
||||
Assert.Single(tokens);
|
||||
Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType);
|
||||
Assert.Equal(expectedResult, tokens[0].LiteralValue);
|
||||
}
|
||||
|
@ -43,8 +44,8 @@ namespace Canon.Tests.LexicalParserTests
|
|||
//[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)]
|
||||
public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
|
||||
{
|
||||
Lexer lexer = new(input);
|
||||
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
|
||||
|
||||
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(input)).ToList());
|
||||
_testOutputHelper.WriteLine(ex.ToString());
|
||||
Assert.Equal(expectedErrorType, ex.ErrorType);
|
||||
Assert.Equal(expectedLine, ex.Line);
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
using Canon.Core.Enums;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
using Canon.Tests.Utils;
|
||||
using Canon.Core.Abstractions;
|
||||
namespace Canon.Tests.LexicalParserTests;
|
||||
|
||||
public class DelimiterTests
|
||||
{
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
|
||||
[Theory]
|
||||
[InlineData(",123", DelimiterType.Comma)]
|
||||
// [InlineData(".123", DelimiterType.Period)]
|
||||
|
@ -16,8 +19,8 @@ public class DelimiterTests
|
|||
[InlineData("]asd", DelimiterType.RightSquareBracket)]
|
||||
public void SmokeTest(string input, DelimiterType type)
|
||||
{
|
||||
Lexer lexer = new(input);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
|
||||
SemanticToken token = tokens[0];
|
||||
Assert.Equal(SemanticTokenType.Delimiter, token.TokenType);
|
||||
|
|
|
@ -2,11 +2,14 @@
|
|||
using Canon.Core.Exceptions;
|
||||
using Xunit.Abstractions;
|
||||
using Canon.Core.Enums;
|
||||
using Canon.Core.Abstractions;
|
||||
using Canon.Tests.Utils;
|
||||
|
||||
namespace Canon.Tests.LexicalParserTests
|
||||
{
|
||||
public class ErrorSingleTests
|
||||
{
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
private readonly ITestOutputHelper _testOutputHelper;
|
||||
public ErrorSingleTests(ITestOutputHelper testOutputHelper)
|
||||
{
|
||||
|
@ -20,9 +23,7 @@ namespace Canon.Tests.LexicalParserTests
|
|||
[InlineData("identifier_with_special_chars@#",1, 30, LexemeErrorType.UnknownCharacterOrString)]
|
||||
public void TestUnknownCharacterError(string pascalProgram, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
|
||||
{
|
||||
var lexer = new Lexer(pascalProgram);
|
||||
|
||||
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
|
||||
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
|
||||
_testOutputHelper.WriteLine(ex.ToString());
|
||||
Assert.Equal(expectedErrorType, ex.ErrorType);
|
||||
Assert.Equal(expectedLine, ex.Line);
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
using Canon.Core.Enums;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
using Canon.Tests.Utils;
|
||||
using Canon.Core.Abstractions;
|
||||
namespace Canon.Tests.LexicalParserTests
|
||||
{
|
||||
public class IdentifierTests
|
||||
{
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
|
||||
[Theory]
|
||||
[InlineData("identifier", true)]
|
||||
[InlineData("_identifier", true)]
|
||||
|
@ -14,10 +17,9 @@ namespace Canon.Tests.LexicalParserTests
|
|||
[InlineData("andand", true)]
|
||||
public void TestParseIdentifier(string input, bool expectedResult)
|
||||
{
|
||||
Lexer lexer = new(input);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
|
||||
Assert.Single(tokens);
|
||||
Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
using Canon.Core.Enums;
|
||||
using Canon.Core.LexicalParser;
|
||||
using Canon.Tests.Utils;
|
||||
using Canon.Core.Abstractions;
|
||||
|
||||
namespace Canon.Tests.LexicalParserTests;
|
||||
|
||||
public class KeywordTypeTests
|
||||
{
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
|
||||
[Theory]
|
||||
[InlineData("program", KeywordType.Program)]
|
||||
[InlineData("const", KeywordType.Const)]
|
||||
|
@ -24,8 +28,8 @@ public class KeywordTypeTests
|
|||
[InlineData("DO", KeywordType.Do)]
|
||||
public void SmokeTest(string input, KeywordType type)
|
||||
{
|
||||
Lexer lexer = new(input);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
|
||||
SemanticToken token = tokens[0];
|
||||
Assert.Equal(SemanticTokenType.Keyword, token.TokenType);
|
||||
|
|
|
@ -3,12 +3,15 @@ using Canon.Core.Enums;
|
|||
using Canon.Core.Exceptions;
|
||||
using Canon.Core.LexicalParser;
|
||||
using Xunit.Abstractions;
|
||||
using Canon.Tests.Utils;
|
||||
using Canon.Core.Abstractions;
|
||||
|
||||
namespace Canon.Tests.LexicalParserTests;
|
||||
|
||||
public class LexicalFileTests
|
||||
{
|
||||
private readonly ITestOutputHelper _testOutputHelper;
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
|
||||
public LexicalFileTests(ITestOutputHelper testOutputHelper)
|
||||
{
|
||||
|
@ -126,14 +129,16 @@ public class LexicalFileTests
|
|||
}
|
||||
: token).ToList();
|
||||
|
||||
var lexer = new Lexer(pascalProgram);
|
||||
var actualTokens = lexer.Tokenize();
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
|
||||
var actualTokens = tokens;
|
||||
for (int i = 0; i < expectedTokens.Count; i++)
|
||||
{
|
||||
_testOutputHelper.WriteLine($"Expect: {expectedTokens[i]}");
|
||||
_testOutputHelper.WriteLine($"Actual: {actualTokens[i]}");
|
||||
_testOutputHelper.WriteLine("----");
|
||||
Assert.Equal(expectedTokens[i], actualTokens[i]);
|
||||
// Assert.Equal(expectedTokens[i], actualTokens[i]);
|
||||
}
|
||||
|
||||
Assert.Equal(expectedTokens, actualTokens);
|
||||
|
@ -143,14 +148,14 @@ public class LexicalFileTests
|
|||
public void TestLexicalAnalysisFirst()
|
||||
{
|
||||
string pascalProgram = """
|
||||
program HelloWorld;
|
||||
var
|
||||
message: string;
|
||||
begin
|
||||
message := 'hello, world!';
|
||||
writeln(message);
|
||||
end.
|
||||
""";
|
||||
program HelloWorld;
|
||||
var
|
||||
message: string;
|
||||
begin
|
||||
message := 'hello, world!';
|
||||
writeln(message);
|
||||
end.
|
||||
""";
|
||||
|
||||
var stringLiterals = new List<(string, SemanticTokenType, int)>
|
||||
{
|
||||
|
@ -182,14 +187,14 @@ public class LexicalFileTests
|
|||
public void TestLexicalAnalysisSecond()
|
||||
{
|
||||
string pascalProgram = """
|
||||
program main;
|
||||
var
|
||||
ab: integer;
|
||||
begin
|
||||
ab := 3;
|
||||
write(ab);
|
||||
end.
|
||||
""";
|
||||
program main;
|
||||
var
|
||||
ab: integer;
|
||||
begin
|
||||
ab := 3;
|
||||
write(ab);
|
||||
end.
|
||||
""";
|
||||
|
||||
var stringLiterals = new List<(string, SemanticTokenType, int)>
|
||||
{
|
||||
|
@ -222,17 +227,17 @@ public class LexicalFileTests
|
|||
public void TestLexicalAnalysisThird()
|
||||
{
|
||||
string pascalProgram = """
|
||||
{test}
|
||||
program main;
|
||||
var
|
||||
ab, ba: integer;
|
||||
begin
|
||||
ab := 3;
|
||||
ba := 5;
|
||||
ab := 5;
|
||||
write(ab + ba);
|
||||
end.
|
||||
""";
|
||||
{test}
|
||||
program main;
|
||||
var
|
||||
ab, ba: integer;
|
||||
begin
|
||||
ab := 3;
|
||||
ba := 5;
|
||||
ab := 5;
|
||||
write(ab + ba);
|
||||
end.
|
||||
""";
|
||||
|
||||
var stringLiterals = new List<(string, SemanticTokenType, int)>
|
||||
{
|
||||
|
@ -276,16 +281,15 @@ public class LexicalFileTests
|
|||
public void UnclosedCommentFirst()
|
||||
{
|
||||
string pascalProgram = """
|
||||
(* This is an example of an unclosed comment
|
||||
program CommentError;
|
||||
var
|
||||
x: integer;
|
||||
begin
|
||||
x := 42;
|
||||
end.
|
||||
""";
|
||||
var lexer = new Lexer(pascalProgram);
|
||||
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
|
||||
(* This is an example of an unclosed comment
|
||||
program CommentError;
|
||||
var
|
||||
x: integer;
|
||||
begin
|
||||
x := 42;
|
||||
end.
|
||||
""";
|
||||
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
|
||||
//打印exception信息
|
||||
_testOutputHelper.WriteLine(ex.ToString());
|
||||
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
|
||||
|
@ -302,11 +306,108 @@ public class LexicalFileTests
|
|||
|
||||
program CommentNotClosed;
|
||||
""";
|
||||
var lexer = new Lexer(pascalProgram);
|
||||
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
|
||||
_testOutputHelper.WriteLine(ex.ToString());
|
||||
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(pascalProgram)).ToList());
|
||||
_testOutputHelper.WriteLine(ex.ToString());
|
||||
Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
|
||||
Assert.Equal((uint)4, ex.Line);
|
||||
Assert.Equal((uint)26, ex.CharPosition);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ClosedCommentFirst()
|
||||
{
|
||||
string pascalProgram = """
|
||||
program exFunction;
|
||||
var
|
||||
a, b, ret : integer;
|
||||
|
||||
begin
|
||||
a := 100;
|
||||
b := 200;
|
||||
(* calling a function to get max value
|
||||
*)
|
||||
ret := a - b;
|
||||
|
||||
|
||||
|
||||
end.
|
||||
""";
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
Assert.NotNull(tokens);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ClosedCommentSecond()
|
||||
{
|
||||
string pascalProgram = """
|
||||
program exFunction;
|
||||
var
|
||||
a, b, ret : integer;
|
||||
|
||||
begin
|
||||
a := 100;
|
||||
b := 200;
|
||||
(* calling a function to get max valued *)
|
||||
ret := a - b;
|
||||
|
||||
|
||||
|
||||
end.
|
||||
""";
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
Assert.NotNull(tokens);
|
||||
}
|
||||
|
||||
|
||||
[Fact]
|
||||
public void ClosedCommentThird()
|
||||
{
|
||||
string pascalProgram = """
|
||||
{
|
||||
This is a block comment that does closed.
|
||||
}
|
||||
program CommentClosed;
|
||||
""";
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
Assert.NotNull(tokens);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ClosedCommentFourth()
|
||||
{
|
||||
string pascalProgram = """
|
||||
{}
|
||||
program CommentClosed;
|
||||
""";
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
Assert.NotNull(tokens);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ClosedCommentFifth()
|
||||
{
|
||||
string pascalProgram = """
|
||||
{
|
||||
}
|
||||
program CommentClosed;
|
||||
""";
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
Assert.NotNull(tokens);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ClosedCommentSixth()
|
||||
{
|
||||
string pascalProgram = """
|
||||
(**)
|
||||
""";
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(pascalProgram));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
Assert.NotNull(tokens);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,12 +2,14 @@
|
|||
using Canon.Core.LexicalParser;
|
||||
using Canon.Core.Exceptions;
|
||||
using Xunit.Abstractions;
|
||||
|
||||
using Canon.Tests.Utils;
|
||||
using Canon.Core.Abstractions;
|
||||
namespace Canon.Tests.LexicalParserTests
|
||||
{
|
||||
|
||||
public class NumberTests
|
||||
{
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
private readonly ITestOutputHelper _testOutputHelper;
|
||||
public NumberTests(ITestOutputHelper testOutputHelper)
|
||||
{
|
||||
|
@ -31,8 +33,8 @@ namespace Canon.Tests.LexicalParserTests
|
|||
[InlineData("$123", "0x123", NumberType.Hex)]
|
||||
public void TestParseNumber(string input, string expected, NumberType expectedNumberType)
|
||||
{
|
||||
Lexer lexer = new(input);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
SemanticToken token = tokens[0];
|
||||
Assert.Equal(SemanticTokenType.Number, token.TokenType);
|
||||
NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token;
|
||||
|
@ -41,14 +43,13 @@ namespace Canon.Tests.LexicalParserTests
|
|||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("1E", 1, 3, LexemeErrorType.IllegalNumberFormat)]
|
||||
[InlineData("1E", 1, 2, LexemeErrorType.IllegalNumberFormat)]
|
||||
[InlineData("123abc", 1, 4, LexemeErrorType.IllegalNumberFormat)]
|
||||
[InlineData("123.45.67", 1, 7, LexemeErrorType.IllegalNumberFormat)]
|
||||
[InlineData("123identifier", 1, 4, LexemeErrorType.IllegalNumberFormat)]
|
||||
public void TestParseNumberError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
|
||||
{
|
||||
Lexer lexer = new(input);
|
||||
var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
|
||||
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(input)).ToList());
|
||||
_testOutputHelper.WriteLine(ex.ToString());
|
||||
Assert.Equal(expectedErrorType, ex.ErrorType);
|
||||
Assert.Equal(expectedLine, ex.Line);
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
using Canon.Core.Enums;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
using Canon.Tests.Utils;
|
||||
using Canon.Core.Abstractions;
|
||||
namespace Canon.Tests.LexicalParserTests;
|
||||
|
||||
public class OperatorTypeTests
|
||||
{
|
||||
private readonly ILexer _lexer = new Lexer();
|
||||
|
||||
[Theory]
|
||||
[InlineData("+ 123", OperatorType.Plus, true)]
|
||||
[InlineData("+123", OperatorType.Plus, true)]
|
||||
|
@ -22,8 +25,8 @@ public class OperatorTypeTests
|
|||
[InlineData("m +123", OperatorType.Plus, false)]
|
||||
public void ParseTest(string input, OperatorType result, bool expectedResult)
|
||||
{
|
||||
Lexer lexer = new(input);
|
||||
List<SemanticToken> tokens = lexer.Tokenize();
|
||||
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
|
||||
List<SemanticToken> tokens = tokensEnumerable.ToList();
|
||||
|
||||
SemanticToken token = tokens[0];
|
||||
if (!expectedResult)
|
||||
|
|
15
Canon.Tests/Utils/EnumerableExtensions.cs
Normal file
15
Canon.Tests/Utils/EnumerableExtensions.cs
Normal file
|
@ -0,0 +1,15 @@
|
|||
namespace Canon.Tests.Utils;
|
||||
|
||||
public static class EnumerableExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// 含有索引的遍历
|
||||
/// </summary>
|
||||
/// <param name="enumerable">可遍历的接口</param>
|
||||
/// <typeparam name="T"></typeparam>
|
||||
/// <returns></returns>
|
||||
public static IEnumerable<(T, uint)> WithIndex<T>(this IEnumerable<T> enumerable)
|
||||
{
|
||||
return enumerable.Select((value, index) => (value, (uint)index));
|
||||
}
|
||||
}
|
|
@ -6,10 +6,11 @@ namespace Canon.Tests.Utils;
|
|||
/// <summary>
|
||||
/// 从字符串中读取源代码
|
||||
/// </summary>
|
||||
public sealed class StringSourceReader(string source) : ISourceReader, IDisposable
|
||||
public sealed class StringSourceReader(string source) : ISourceReader
|
||||
{
|
||||
private readonly IEnumerator<char> _enumerator =
|
||||
source.GetEnumerator();
|
||||
private int _pos = -1;
|
||||
|
||||
private uint _lastPos;
|
||||
|
||||
public uint Line { get; private set; } = 1;
|
||||
|
||||
|
@ -17,31 +18,70 @@ public sealed class StringSourceReader(string source) : ISourceReader, IDisposab
|
|||
|
||||
public string FileName => "string";
|
||||
|
||||
public bool TryReadChar([NotNullWhen(true)] out char? c)
|
||||
public char Current
|
||||
{
|
||||
if (Pos != 0 || Line != 1)
|
||||
get
|
||||
{
|
||||
// 不是第一次读取
|
||||
if (_enumerator.Current == '\n')
|
||||
if (_pos == -1)
|
||||
{
|
||||
Pos = 0;
|
||||
Line += 1;
|
||||
throw new InvalidOperationException("Reader at before the start.");
|
||||
}
|
||||
else
|
||||
{
|
||||
return source[_pos];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!_enumerator.MoveNext())
|
||||
public bool Retract()
|
||||
{
|
||||
if (_pos <= 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
_pos -= 1;
|
||||
if (Current == '\n')
|
||||
{
|
||||
Line -= 1;
|
||||
// TODO: 如果一直回退就完蛋了
|
||||
Pos = _lastPos;
|
||||
}
|
||||
else
|
||||
{
|
||||
Pos -= 1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool MoveNext()
|
||||
{
|
||||
if (_pos >= source.Length - 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_pos != -1 && Current == '\n')
|
||||
{
|
||||
Line += 1;
|
||||
_lastPos = Pos;
|
||||
Pos = 0;
|
||||
}
|
||||
|
||||
_pos += 1;
|
||||
Pos += 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool TryPeekChar([NotNullWhen(true)] out char? c)
|
||||
{
|
||||
if (_pos >= source.Length - 1)
|
||||
{
|
||||
c = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
Pos += 1;
|
||||
c = _enumerator.Current;
|
||||
c = source[_pos + 1];
|
||||
return true;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_enumerator.Dispose();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,78 +8,64 @@ public class StringSourceReaderTests
|
|||
public void LineFeedTest()
|
||||
{
|
||||
ISourceReader reader = new StringSourceReader("program Main;\nbegin\nend.\n");
|
||||
reader.MoveNext();
|
||||
|
||||
Assert.Equal(0u, reader.Pos);
|
||||
Assert.Equal(1u, reader.Line);
|
||||
|
||||
// program
|
||||
Assert.True(reader.TryReadChar(out char? c));
|
||||
Assert.Equal('p', c);
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out c));
|
||||
Assert.Equal(' ', c);
|
||||
|
||||
// main;
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
|
||||
Assert.True(reader.TryReadChar(out c));
|
||||
Assert.Equal('\n', c);
|
||||
|
||||
// begin
|
||||
for (uint i = 1; i <= 5; i++)
|
||||
{
|
||||
Assert.True(reader.TryReadChar(out char? _));
|
||||
Assert.Equal(i, reader.Pos);
|
||||
Assert.Equal(2u, reader.Line);
|
||||
}
|
||||
|
||||
// \n
|
||||
Assert.True(reader.TryReadChar(out c));
|
||||
Assert.Equal('\n', c);
|
||||
|
||||
// end.
|
||||
foreach (char i in "end.")
|
||||
{
|
||||
Assert.True(reader.TryReadChar(out c));
|
||||
Assert.Equal(i, c);
|
||||
}
|
||||
CheckLine(reader, "program Main;", 1);
|
||||
reader.MoveNext();
|
||||
CheckLine(reader, "begin", 2);
|
||||
reader.MoveNext();
|
||||
CheckLine(reader, "end.", 3);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CarriageReturnLineFeedTest()
|
||||
{
|
||||
ISourceReader reader = new StringSourceReader("program Main;\r\nbegin\r\nend.\r\n");
|
||||
reader.MoveNext();
|
||||
|
||||
// program Main;
|
||||
foreach ((char value, uint index) in
|
||||
"program Main;".Select((value, index) => (value, (uint)index)))
|
||||
CheckLine(reader, "program Main;", 1);
|
||||
reader.MoveNext();
|
||||
reader.MoveNext();
|
||||
CheckLine(reader, "begin", 2);
|
||||
reader.MoveNext();
|
||||
reader.MoveNext();
|
||||
CheckLine(reader, "end.", 3);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RetractTest()
|
||||
{
|
||||
ISourceReader reader = new StringSourceReader("test");
|
||||
reader.MoveNext();
|
||||
|
||||
Assert.Equal('t', reader.Current);
|
||||
Assert.True(reader.MoveNext());
|
||||
Assert.Equal('e', reader.Current);
|
||||
Assert.True(reader.Retract());
|
||||
Assert.Equal('t', reader.Current);
|
||||
Assert.False(reader.Retract());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PeekTest()
|
||||
{
|
||||
ISourceReader reader = new StringSourceReader("peek");
|
||||
reader.MoveNext();
|
||||
|
||||
Assert.Equal('p', reader.Current);
|
||||
Assert.True(reader.TryPeekChar(out char? c));
|
||||
Assert.Equal('e', c);
|
||||
Assert.Equal('p', reader.Current);
|
||||
}
|
||||
|
||||
private static void CheckLine(ISourceReader reader, string line, uint lineNumber)
|
||||
{
|
||||
foreach ((char value, uint index) in line.WithIndex())
|
||||
{
|
||||
Assert.True(reader.TryReadChar(out char? c));
|
||||
Assert.Equal(value, c);
|
||||
Assert.Equal(value, reader.Current);
|
||||
Assert.Equal(lineNumber, reader.Line);
|
||||
Assert.Equal(index + 1, reader.Pos);
|
||||
Assert.Equal(1u, reader.Line);
|
||||
}
|
||||
|
||||
Assert.True(reader.TryReadChar(out _));
|
||||
Assert.True(reader.TryReadChar(out _));
|
||||
|
||||
// begin
|
||||
foreach ((char value, uint index) in
|
||||
"begin".Select((value, index) => (value, (uint)index)))
|
||||
{
|
||||
Assert.True(reader.TryReadChar(out char? c));
|
||||
Assert.Equal(value, c);
|
||||
Assert.Equal(index + 1, reader.Pos);
|
||||
Assert.Equal(2u, reader.Line);
|
||||
reader.MoveNext();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user