feat: 正则词法识别器 (#1)

Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/1
Co-authored-by: jackfiled <xcrenchangjun@outlook.com>
Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
This commit is contained in:
2024-07-29 16:59:29 +08:00
committed by 任昌骏
parent 6ff8622906
commit 3c0d51cec5
11 changed files with 587 additions and 6 deletions

View File

@@ -0,0 +1,25 @@
using CanonSharp.Common.Abstractions;
using StringReader = CanonSharp.Common.Reader.StringReader;
namespace CanonSharp.Tests.LexicalAnalyzerTests;
public class ReaderTests
{
[Fact]
public void StringReaderTest()
{
StringReader reader = new("ab");
Assert.True(reader.TryPeek(out char c));
Assert.Equal('a', c);
Assert.True(reader.TryPeek(out c));
Assert.Equal('a', c);
Assert.Equal('a', reader.Read());
Assert.True(reader.TryPeek(out c));
Assert.Equal('b', c);
Assert.True(reader.TryPeek(out c));
Assert.Equal('b', c);
Assert.Equal('b', reader.Read());
Assert.False(reader.TryPeek(out c));
}
}

View File

@@ -43,6 +43,15 @@ public class RegularExpressionTests
s => s.Transactions.ContainsKey(new EmptyChar('a')));
}
[Fact]
public void RangeTest()
{
RegularExpression expression = RegularExpression.Range('a', 'z');
NondeterministicFiniteAutomation nfa = expression.Convert2Nfa();
Assert.Equal(26, nfa.Start.Transactions.Count);
}
[Fact]
public void ConvertTest()
{
@@ -74,4 +83,22 @@ public class RegularExpressionTests
Assert.Equal('a', map[new NondeterministicStateSet([key2, key1])]);
}
[Fact]
public void PrefixConvertTest()
{
RegularExpression expression = RegularExpression.Alternate(
RegularExpression.String("string"),
RegularExpression.String("string1"));
NondeterministicFiniteAutomation nfa = expression.Convert2Nfa();
DeterministicFiniteAutomation.Create(nfa);
}
[Fact]
public void WhiteSpaceConvertTest()
{
NondeterministicFiniteAutomation nfa = LexicalToken.WhiteSpace.Expression.Convert2Nfa();
DeterministicFiniteAutomation.Create(nfa);
}
}

View File

@@ -0,0 +1,71 @@
using CanonSharp.Common.LexicalAnalyzer;
using StringReader = CanonSharp.Common.Reader.StringReader;
namespace CanonSharp.Tests.LexicalAnalyzerTests;
public class ScanTests
{
[Fact]
public void ScanTest1()
{
LexicalScannerBuilder builder = LexicalScanner.CreateEmptyBuilder();
LexicalToken token1 = new(RegularExpression.String("ab"), 1);
builder.DefineToken(token1);
StringReader reader = new("ab");
LexicalScanner scanner = builder.Build(reader);
Assert.True(scanner.TryRead(out LexicalToken? result));
Assert.Equal(token1, result);
Assert.Equal("ab", result.LiteralValue);
}
[Fact]
public void ScanTest2()
{
LexicalScannerBuilder builder = LexicalScanner.CreateDefaultBuilder();
LexicalToken stringKeyword = new(RegularExpression.String("string"), 100);
LexicalToken periodDelimiter = new(RegularExpression.Single('.'), 100);
LexicalToken semiColonDelimiter = new(RegularExpression.Single(';'), 100);
LexicalToken identifier = new(RegularExpression.Concatenate(RegularExpression.Range('a', 'z'),
RegularExpression.Kleene(RegularExpression.Range('a', 'z'))), 0);
LexicalToken assigner = new(RegularExpression.String(":="), 100);
builder.DefineToken(stringKeyword);
builder.DefineToken(periodDelimiter);
builder.DefineToken(semiColonDelimiter);
builder.DefineToken(identifier);
builder.DefineToken(assigner);
StringReader reader = new("""
string value := origin;
string abc := value.
""");
LexicalScanner scanner = builder.Build(reader);
Validate(scanner, [
stringKeyword,
identifier,
assigner,
identifier,
semiColonDelimiter,
stringKeyword,
identifier,
assigner,
identifier,
periodDelimiter
]);
Assert.False(scanner.TryRead(out _));
}
private static void Validate(LexicalScanner scanner, IEnumerable<LexicalToken> expectedTokens)
{
foreach (LexicalToken token in expectedTokens)
{
Assert.True(scanner.TryRead(out LexicalToken? outToken));
Assert.NotNull(outToken);
Assert.Equal(token, outToken);
}
}
}