feat: 正则词法识别器 (#1)
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/1 Co-authored-by: jackfiled <xcrenchangjun@outlook.com> Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
This commit is contained in:
parent
6ff8622906
commit
3c0d51cec5
17
CanonSharp.Common/Abstractions/ISourceReader.cs
Normal file
17
CanonSharp.Common/Abstractions/ISourceReader.cs
Normal file
|
@ -0,0 +1,17 @@
|
|||
namespace CanonSharp.Common.Abstractions;
|
||||
|
||||
public interface ISourceReader
|
||||
{
|
||||
/// <summary>
|
||||
/// 偷看一下下一个字符
|
||||
/// </summary>
|
||||
/// <param name="c">看到的下一个字符</param>
|
||||
/// <returns></returns>
|
||||
public bool TryPeek(out char c);
|
||||
|
||||
/// <summary>
|
||||
/// 读取下一个字符
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public char Read();
|
||||
}
|
|
@ -1,11 +1,13 @@
|
|||
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||
|
||||
public class DeterministicState : IEquatable<DeterministicState>
|
||||
public class DeterministicState(HashSet<NondeterministicState> closure) : IEquatable<DeterministicState>
|
||||
{
|
||||
public Guid Id { get; } = Guid.NewGuid();
|
||||
|
||||
public Dictionary<char, DeterministicState> Transaction { get; } = [];
|
||||
|
||||
public HashSet<NondeterministicState> Closure { get; } = closure;
|
||||
|
||||
public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id);
|
||||
|
||||
public override bool Equals(object? obj) => obj is DeterministicState other && Equals(other);
|
||||
|
@ -35,7 +37,7 @@ public class DeterministicFiniteAutomation
|
|||
HashSet<DeterministicState> finalStates = [];
|
||||
|
||||
HashSet<NondeterministicState> startClosure = nfa.Start.CalculateEmptyClosure();
|
||||
DeterministicState start = new();
|
||||
DeterministicState start = new(startClosure);
|
||||
map.Add(new NondeterministicStateSet(startClosure), start);
|
||||
queue.Enqueue(new Pair(startClosure, start));
|
||||
|
||||
|
@ -50,7 +52,7 @@ public class DeterministicFiniteAutomation
|
|||
|
||||
foreach (NondeterministicState state in pair.States)
|
||||
{
|
||||
foreach (KeyValuePair<EmptyChar,HashSet<NondeterministicState>> transaction in
|
||||
foreach (KeyValuePair<EmptyChar, HashSet<NondeterministicState>> transaction in
|
||||
state.Transactions.Where(p => !p.Key.IsEmpty))
|
||||
{
|
||||
HashSet<NondeterministicState> closure = [];
|
||||
|
@ -64,16 +66,19 @@ public class DeterministicFiniteAutomation
|
|||
{
|
||||
n.UnionWith(closure);
|
||||
}
|
||||
else
|
||||
{
|
||||
next.Add(transaction.Key.Char, closure);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach (KeyValuePair<char,HashSet<NondeterministicState>> transaction in next)
|
||||
foreach (KeyValuePair<char, HashSet<NondeterministicState>> transaction in next)
|
||||
{
|
||||
NondeterministicStateSet set = new(transaction.Value);
|
||||
if (!map.TryGetValue(set, out DeterministicState? nextState))
|
||||
{
|
||||
nextState = new DeterministicState();
|
||||
nextState = new DeterministicState(transaction.Value);
|
||||
map.Add(set, nextState);
|
||||
}
|
||||
|
||||
|
|
92
CanonSharp.Common/LexicalAnalyzer/LexicalScanner.cs
Normal file
92
CanonSharp.Common/LexicalAnalyzer/LexicalScanner.cs
Normal file
|
@ -0,0 +1,92 @@
|
|||
using System.Diagnostics.CodeAnalysis;
|
||||
using CanonSharp.Common.Abstractions;
|
||||
|
||||
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||
|
||||
public class LexicalScanner(
|
||||
DeterministicState startState,
|
||||
Dictionary<DeterministicState, LexicalToken> finalStateMap,
|
||||
HashSet<LexicalToken> skippedTokens,
|
||||
ISourceReader reader)
|
||||
{
|
||||
private readonly DeterministicState _startState = startState;
|
||||
|
||||
private readonly List<char> _readHistory = [];
|
||||
|
||||
private DeterministicState _currentState = startState;
|
||||
|
||||
public bool TryRead([NotNullWhen(true)] out LexicalToken? token)
|
||||
{
|
||||
while (TryReadInternal(out token))
|
||||
{
|
||||
if (!skippedTokens.Contains(token))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private bool TryReadInternal([NotNullWhen(true)] out LexicalToken? token)
|
||||
{
|
||||
while (reader.TryPeek(out char c))
|
||||
{
|
||||
if (_currentState.Transaction.TryGetValue(c, out DeterministicState? nextState))
|
||||
{
|
||||
// 可以迁移到下一个状态
|
||||
_currentState = nextState;
|
||||
_readHistory.Add(reader.Read());
|
||||
}
|
||||
else
|
||||
{
|
||||
// 无法迁移到下一个状态
|
||||
if (!finalStateMap.TryGetValue(_currentState, out LexicalToken? possibleToken))
|
||||
{
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
// 当前状态是终止状态
|
||||
token = new LexicalToken(possibleToken, new string(_readHistory.ToArray()));
|
||||
|
||||
// 重置状态
|
||||
_readHistory.Clear();
|
||||
_currentState = _startState;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// 当前状态是终止状态
|
||||
if (finalStateMap.TryGetValue(_currentState, out LexicalToken? possibleToken2))
|
||||
{
|
||||
token = new LexicalToken(possibleToken2, new string(_readHistory.ToArray()));
|
||||
|
||||
_readHistory.Clear();
|
||||
_currentState = _startState;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!_currentState.Equals(_startState))
|
||||
{
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
token = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
public static LexicalScannerBuilder CreateDefaultBuilder()
|
||||
{
|
||||
LexicalScannerBuilder builder = new();
|
||||
|
||||
builder.DefineToken(LexicalToken.LineBreaker);
|
||||
builder.DefineToken(LexicalToken.WhiteSpace);
|
||||
|
||||
builder.AddSkippedToken(LexicalToken.LineBreaker);
|
||||
builder.AddSkippedToken(LexicalToken.WhiteSpace);
|
||||
|
||||
return builder;
|
||||
}
|
||||
|
||||
public static LexicalScannerBuilder CreateEmptyBuilder() => new();
|
||||
}
|
62
CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs
Normal file
62
CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs
Normal file
|
@ -0,0 +1,62 @@
|
|||
using CanonSharp.Common.Abstractions;
|
||||
|
||||
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||
|
||||
public class LexicalScannerBuilder
|
||||
{
|
||||
private readonly Dictionary<NondeterministicState, LexicalToken> _finalStateMap = [];
|
||||
private readonly List<NondeterministicFiniteAutomation> _nondeterministicFiniteAutomations = [];
|
||||
private readonly HashSet<LexicalToken> _skippedTokens = [];
|
||||
|
||||
internal LexicalScannerBuilder()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public void DefineToken(LexicalToken token)
|
||||
{
|
||||
NondeterministicFiniteAutomation automation = token.Expression.Convert2Nfa();
|
||||
_nondeterministicFiniteAutomations.Add(automation);
|
||||
|
||||
foreach (NondeterministicState state in automation.FinalStates)
|
||||
{
|
||||
_finalStateMap.Add(state, token);
|
||||
}
|
||||
}
|
||||
|
||||
public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token);
|
||||
|
||||
public LexicalScanner Build(ISourceReader reader)
|
||||
{
|
||||
NondeterministicFiniteAutomation finaAutomation = Combine();
|
||||
DeterministicFiniteAutomation deterministicFiniteAutomation =
|
||||
DeterministicFiniteAutomation.Create(finaAutomation);
|
||||
|
||||
Dictionary<DeterministicState, LexicalToken> finalTokenMap = [];
|
||||
|
||||
foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates)
|
||||
{
|
||||
finalTokenMap.Add(state, state.Closure
|
||||
.Where(s => _finalStateMap.ContainsKey(s))
|
||||
.Select(s => _finalStateMap[s])
|
||||
.OrderByDescending(t => t.Priority)
|
||||
.First());
|
||||
}
|
||||
|
||||
return new LexicalScanner(deterministicFiniteAutomation.Start, finalTokenMap, _skippedTokens, reader);
|
||||
}
|
||||
|
||||
private NondeterministicFiniteAutomation Combine()
|
||||
{
|
||||
NondeterministicState head = new();
|
||||
NondeterministicFiniteAutomation result = new(head, []);
|
||||
|
||||
foreach (NondeterministicFiniteAutomation automation in _nondeterministicFiniteAutomations)
|
||||
{
|
||||
head.AddTransaction(EmptyChar.Empty, automation.Start);
|
||||
result.FinalStates.UnionWith(automation.FinalStates);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
56
CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs
Normal file
56
CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs
Normal file
|
@ -0,0 +1,56 @@
|
|||
using System.Globalization;
|
||||
|
||||
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||
|
||||
public class LexicalToken : IEquatable<LexicalToken>
|
||||
{
|
||||
private readonly Guid _tokenId;
|
||||
|
||||
public RegularExpression Expression { get; }
|
||||
|
||||
public int Priority { get; }
|
||||
|
||||
public LexicalToken(RegularExpression expression, int priority)
|
||||
{
|
||||
_tokenId = Guid.NewGuid();
|
||||
Expression = expression;
|
||||
Priority = priority;
|
||||
LiteralValue = string.Empty;
|
||||
}
|
||||
|
||||
internal LexicalToken(LexicalToken definition, string literalValue)
|
||||
{
|
||||
_tokenId = definition._tokenId;
|
||||
Expression = definition.Expression;
|
||||
Priority = definition.Priority;
|
||||
LiteralValue = literalValue;
|
||||
}
|
||||
|
||||
public string LiteralValue { get; }
|
||||
|
||||
public bool Equals(LexicalToken? other) => other is not null && _tokenId == other._tokenId;
|
||||
|
||||
public override bool Equals(object? obj) => obj is LexicalToken other && Equals(other);
|
||||
|
||||
public override int GetHashCode() => _tokenId.GetHashCode();
|
||||
|
||||
public static bool operator ==(LexicalToken a, LexicalToken b) => a.Equals(b);
|
||||
|
||||
public static bool operator !=(LexicalToken a, LexicalToken b) => !(a == b);
|
||||
|
||||
/// <summary>
|
||||
/// 匹配所有的空白字符
|
||||
/// </summary>
|
||||
public static readonly LexicalToken WhiteSpace = new(
|
||||
RegularExpression.Alternate(
|
||||
RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator),
|
||||
RegularExpression.CharSetOf("\u0009\u000B\u000C")), int.MinValue);
|
||||
|
||||
/// <summary>
|
||||
/// 匹配所有的换行符
|
||||
/// </summary>
|
||||
public static readonly LexicalToken LineBreaker = new(
|
||||
RegularExpression.Alternate(
|
||||
RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029"),
|
||||
RegularExpression.String("\r\n")), int.MinValue);
|
||||
}
|
|
@ -4,17 +4,76 @@ public abstract class RegularExpression
|
|||
{
|
||||
public abstract NondeterministicFiniteAutomation Convert2Nfa();
|
||||
|
||||
/// <summary>
|
||||
/// 匹配空字符串
|
||||
/// </summary>
|
||||
public static RegularExpression Empty => new EmptyExpression();
|
||||
|
||||
/// <summary>
|
||||
/// 匹配单个字符
|
||||
/// c
|
||||
/// </summary>
|
||||
/// <param name="c"></param>
|
||||
/// <returns></returns>
|
||||
public static RegularExpression Single(char c) => new SymbolExpression(c);
|
||||
|
||||
/// <summary>
|
||||
/// left|right
|
||||
/// </summary>
|
||||
/// <param name="left"></param>
|
||||
/// <param name="right"></param>
|
||||
/// <returns></returns>
|
||||
public static RegularExpression Alternate(RegularExpression left, RegularExpression right) =>
|
||||
new AlternationExpression(left, right);
|
||||
|
||||
/// <summary>
|
||||
/// left-right
|
||||
/// </summary>
|
||||
/// <param name="first"></param>
|
||||
/// <param name="second"></param>
|
||||
/// <returns></returns>
|
||||
public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) =>
|
||||
new ConcatenationExpression(first, second);
|
||||
|
||||
/// <summary>
|
||||
/// inner*
|
||||
/// </summary>
|
||||
/// <param name="inner"></param>
|
||||
/// <returns></returns>
|
||||
public static RegularExpression Kleene(RegularExpression inner) => new KleeneExpression(inner);
|
||||
|
||||
/// <summary>
|
||||
/// value
|
||||
/// </summary>
|
||||
/// <param name="value"></param>
|
||||
/// <returns></returns>
|
||||
public static RegularExpression String(string value) => new StringExpression(value);
|
||||
|
||||
public static RegularExpression CharSetOf(string value) => new CharSetExpression(value.ToCharArray());
|
||||
|
||||
public static RegularExpression CharSetOf(Func<char, bool> predicate)
|
||||
=> new CharSetExpression(Iterate(char.MinValue, char.MaxValue).Where(predicate).ToArray());
|
||||
|
||||
/// <summary>
|
||||
/// [a-b]
|
||||
/// </summary>
|
||||
/// <param name="a"></param>
|
||||
/// <param name="b"></param>
|
||||
/// <returns></returns>
|
||||
public static RegularExpression Range(char a, char b) => new CharSetExpression(Iterate(a, b).ToArray());
|
||||
|
||||
private static IEnumerable<char> Iterate(char a, char b)
|
||||
{
|
||||
for (char c = a; c <= b; c++)
|
||||
{
|
||||
if (c == char.MaxValue)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
yield return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class EmptyExpression : RegularExpression
|
||||
|
@ -107,3 +166,59 @@ public class KleeneExpression(RegularExpression inner) : RegularExpression
|
|||
return new NondeterministicFiniteAutomation(final, [final]);
|
||||
}
|
||||
}
|
||||
|
||||
public class CharSetExpression : RegularExpression
|
||||
{
|
||||
public char[] Set { get; }
|
||||
|
||||
public CharSetExpression(Span<char> set)
|
||||
{
|
||||
if (set.Length == 0)
|
||||
{
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
Set = set.ToArray();
|
||||
}
|
||||
|
||||
public override NondeterministicFiniteAutomation Convert2Nfa()
|
||||
{
|
||||
NondeterministicState start = new();
|
||||
NondeterministicState final = new();
|
||||
|
||||
foreach (char c in Set)
|
||||
{
|
||||
start.AddTransaction(new EmptyChar(c), final);
|
||||
}
|
||||
|
||||
return new NondeterministicFiniteAutomation(start, [final]);
|
||||
}
|
||||
}
|
||||
|
||||
public class StringExpression : RegularExpression
|
||||
{
|
||||
public string Word { get; }
|
||||
|
||||
public StringExpression(string word)
|
||||
{
|
||||
if (string.IsNullOrEmpty(word))
|
||||
{
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
Word = word;
|
||||
}
|
||||
|
||||
public override NondeterministicFiniteAutomation Convert2Nfa()
|
||||
{
|
||||
NondeterministicState start = new();
|
||||
NondeterministicState final = Word.Aggregate(start, (state, c) =>
|
||||
{
|
||||
NondeterministicState next = new();
|
||||
state.AddTransaction(new EmptyChar(c), next);
|
||||
return next;
|
||||
});
|
||||
|
||||
return new NondeterministicFiniteAutomation(start, [final]);
|
||||
}
|
||||
}
|
||||
|
|
78
CanonSharp.Common/Reader/SourceReader.cs
Normal file
78
CanonSharp.Common/Reader/SourceReader.cs
Normal file
|
@ -0,0 +1,78 @@
|
|||
using CanonSharp.Common.Abstractions;
|
||||
|
||||
namespace CanonSharp.Common.Reader;
|
||||
|
||||
public class SourceReader : ISourceReader
|
||||
{
|
||||
private readonly StreamReader _reader;
|
||||
private char? _lookAhead;
|
||||
|
||||
public SourceReader(string filename)
|
||||
{
|
||||
FileInfo source = new(filename);
|
||||
|
||||
if (!source.Exists)
|
||||
{
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
_reader = new StreamReader(filename);
|
||||
}
|
||||
|
||||
public char Read()
|
||||
{
|
||||
if (_lookAhead.HasValue)
|
||||
{
|
||||
char result = _lookAhead.Value;
|
||||
_lookAhead = null;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (!TryFetchChar(out char c))
|
||||
{
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
public bool TryPeek(out char c)
|
||||
{
|
||||
if (_lookAhead.HasValue)
|
||||
{
|
||||
c = _lookAhead.Value;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!TryFetchChar(out c))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
_lookAhead = c;
|
||||
return true;
|
||||
}
|
||||
|
||||
private readonly char[] _buffer = new char[1024];
|
||||
private int _length;
|
||||
private int _count;
|
||||
|
||||
private bool TryFetchChar(out char c)
|
||||
{
|
||||
if (_length == _count)
|
||||
{
|
||||
_length = _reader.Read(_buffer);
|
||||
_count = 0;
|
||||
}
|
||||
|
||||
if (_length == 0)
|
||||
{
|
||||
c = char.MinValue;
|
||||
return false;
|
||||
}
|
||||
|
||||
c = _buffer[_count];
|
||||
_count += 1;
|
||||
return true;
|
||||
}
|
||||
}
|
33
CanonSharp.Common/Reader/StringReader.cs
Normal file
33
CanonSharp.Common/Reader/StringReader.cs
Normal file
|
@ -0,0 +1,33 @@
|
|||
using CanonSharp.Common.Abstractions;
|
||||
|
||||
namespace CanonSharp.Common.Reader;
|
||||
|
||||
public class StringReader(string source) : ISourceReader
|
||||
{
|
||||
private int _pos;
|
||||
|
||||
public char Read()
|
||||
{
|
||||
if (_pos >= source.Length)
|
||||
{
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
char result = source[_pos];
|
||||
_pos += 1;
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public bool TryPeek(out char c)
|
||||
{
|
||||
if (_pos < source.Length)
|
||||
{
|
||||
c = source[_pos];
|
||||
return true;
|
||||
}
|
||||
|
||||
c = char.MinValue;
|
||||
return false;
|
||||
}
|
||||
}
|
25
CanonSharp.Tests/LexicalAnalyzerTests/ReaderTests.cs
Normal file
25
CanonSharp.Tests/LexicalAnalyzerTests/ReaderTests.cs
Normal file
|
@ -0,0 +1,25 @@
|
|||
using CanonSharp.Common.Abstractions;
|
||||
using StringReader = CanonSharp.Common.Reader.StringReader;
|
||||
|
||||
namespace CanonSharp.Tests.LexicalAnalyzerTests;
|
||||
|
||||
public class ReaderTests
|
||||
{
|
||||
[Fact]
|
||||
public void StringReaderTest()
|
||||
{
|
||||
StringReader reader = new("ab");
|
||||
|
||||
Assert.True(reader.TryPeek(out char c));
|
||||
Assert.Equal('a', c);
|
||||
Assert.True(reader.TryPeek(out c));
|
||||
Assert.Equal('a', c);
|
||||
Assert.Equal('a', reader.Read());
|
||||
Assert.True(reader.TryPeek(out c));
|
||||
Assert.Equal('b', c);
|
||||
Assert.True(reader.TryPeek(out c));
|
||||
Assert.Equal('b', c);
|
||||
Assert.Equal('b', reader.Read());
|
||||
Assert.False(reader.TryPeek(out c));
|
||||
}
|
||||
}
|
|
@ -43,6 +43,15 @@ public class RegularExpressionTests
|
|||
s => s.Transactions.ContainsKey(new EmptyChar('a')));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RangeTest()
|
||||
{
|
||||
RegularExpression expression = RegularExpression.Range('a', 'z');
|
||||
NondeterministicFiniteAutomation nfa = expression.Convert2Nfa();
|
||||
|
||||
Assert.Equal(26, nfa.Start.Transactions.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ConvertTest()
|
||||
{
|
||||
|
@ -74,4 +83,22 @@ public class RegularExpressionTests
|
|||
|
||||
Assert.Equal('a', map[new NondeterministicStateSet([key2, key1])]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PrefixConvertTest()
|
||||
{
|
||||
RegularExpression expression = RegularExpression.Alternate(
|
||||
RegularExpression.String("string"),
|
||||
RegularExpression.String("string1"));
|
||||
|
||||
NondeterministicFiniteAutomation nfa = expression.Convert2Nfa();
|
||||
DeterministicFiniteAutomation.Create(nfa);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WhiteSpaceConvertTest()
|
||||
{
|
||||
NondeterministicFiniteAutomation nfa = LexicalToken.WhiteSpace.Expression.Convert2Nfa();
|
||||
DeterministicFiniteAutomation.Create(nfa);
|
||||
}
|
||||
}
|
||||
|
|
71
CanonSharp.Tests/LexicalAnalyzerTests/ScanTests.cs
Normal file
71
CanonSharp.Tests/LexicalAnalyzerTests/ScanTests.cs
Normal file
|
@ -0,0 +1,71 @@
|
|||
using CanonSharp.Common.LexicalAnalyzer;
|
||||
using StringReader = CanonSharp.Common.Reader.StringReader;
|
||||
|
||||
namespace CanonSharp.Tests.LexicalAnalyzerTests;
|
||||
|
||||
public class ScanTests
|
||||
{
|
||||
[Fact]
|
||||
public void ScanTest1()
|
||||
{
|
||||
LexicalScannerBuilder builder = LexicalScanner.CreateEmptyBuilder();
|
||||
|
||||
LexicalToken token1 = new(RegularExpression.String("ab"), 1);
|
||||
builder.DefineToken(token1);
|
||||
|
||||
StringReader reader = new("ab");
|
||||
LexicalScanner scanner = builder.Build(reader);
|
||||
|
||||
Assert.True(scanner.TryRead(out LexicalToken? result));
|
||||
Assert.Equal(token1, result);
|
||||
Assert.Equal("ab", result.LiteralValue);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ScanTest2()
|
||||
{
|
||||
LexicalScannerBuilder builder = LexicalScanner.CreateDefaultBuilder();
|
||||
|
||||
LexicalToken stringKeyword = new(RegularExpression.String("string"), 100);
|
||||
LexicalToken periodDelimiter = new(RegularExpression.Single('.'), 100);
|
||||
LexicalToken semiColonDelimiter = new(RegularExpression.Single(';'), 100);
|
||||
LexicalToken identifier = new(RegularExpression.Concatenate(RegularExpression.Range('a', 'z'),
|
||||
RegularExpression.Kleene(RegularExpression.Range('a', 'z'))), 0);
|
||||
LexicalToken assigner = new(RegularExpression.String(":="), 100);
|
||||
builder.DefineToken(stringKeyword);
|
||||
builder.DefineToken(periodDelimiter);
|
||||
builder.DefineToken(semiColonDelimiter);
|
||||
builder.DefineToken(identifier);
|
||||
builder.DefineToken(assigner);
|
||||
|
||||
StringReader reader = new("""
|
||||
string value := origin;
|
||||
string abc := value.
|
||||
""");
|
||||
LexicalScanner scanner = builder.Build(reader);
|
||||
Validate(scanner, [
|
||||
stringKeyword,
|
||||
identifier,
|
||||
assigner,
|
||||
identifier,
|
||||
semiColonDelimiter,
|
||||
stringKeyword,
|
||||
identifier,
|
||||
assigner,
|
||||
identifier,
|
||||
periodDelimiter
|
||||
]);
|
||||
|
||||
Assert.False(scanner.TryRead(out _));
|
||||
}
|
||||
|
||||
private static void Validate(LexicalScanner scanner, IEnumerable<LexicalToken> expectedTokens)
|
||||
{
|
||||
foreach (LexicalToken token in expectedTokens)
|
||||
{
|
||||
Assert.True(scanner.TryRead(out LexicalToken? outToken));
|
||||
Assert.NotNull(outToken);
|
||||
Assert.Equal(token, outToken);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user