From 6e8e3885ac251fd7a0609f7b650729e0777c94b4 Mon Sep 17 00:00:00 2001 From: jackfiled Date: Sat, 4 May 2024 13:57:14 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=8C=BA=E5=88=86=E5=AD=97=E7=AC=A6?= =?UTF-8?q?=E5=92=8C=E5=AD=97=E7=AC=A6=E4=B8=B2=20(#74)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Huaps <1183155719@qq.com> Reviewed-on: https://git.rrricardo.top/PostGuard/Canon/pulls/74 --- Canon.Core/Enums/SemanticEnums.cs | 1 + Canon.Core/LexicalParser/LexemeFactory.cs | 7 +++ Canon.Core/LexicalParser/Lexer.cs | 16 +++++-- Canon.Core/LexicalParser/SemanticToken.cs | 26 +++++++++++ .../GrammarParserTests/PascalGrammarTests.cs | 2 +- .../LexicalParserTests/CharacterTypeTests.cs | 46 +++++++++---------- .../LexicalParserTests/LexicalFileTests.cs | 6 +-- 7 files changed, 74 insertions(+), 30 deletions(-) diff --git a/Canon.Core/Enums/SemanticEnums.cs b/Canon.Core/Enums/SemanticEnums.cs index 93d7cd4..5fef0b3 100644 --- a/Canon.Core/Enums/SemanticEnums.cs +++ b/Canon.Core/Enums/SemanticEnums.cs @@ -8,6 +8,7 @@ public enum SemanticTokenType Delimiter, Identifier, Character, + String, Empty, /// /// 语法分析中的栈底符号 diff --git a/Canon.Core/LexicalParser/LexemeFactory.cs b/Canon.Core/LexicalParser/LexemeFactory.cs index 2b7957f..85cc452 100644 --- a/Canon.Core/LexicalParser/LexemeFactory.cs +++ b/Canon.Core/LexicalParser/LexemeFactory.cs @@ -16,6 +16,13 @@ public static class LexemeFactory }; token = characterSemanticToken; break; + case SemanticTokenType.String: + StringSemanticToken stringSemanticToken = new() + { + LinePos = line, CharacterPos = chPos, LiteralValue = literal, + }; + token = stringSemanticToken; + break; case SemanticTokenType.Identifier: IdentifierSemanticToken identifierSemanticToken = new() { diff --git a/Canon.Core/LexicalParser/Lexer.cs b/Canon.Core/LexicalParser/Lexer.cs index c6bf38a..a779b9e 100644 --- a/Canon.Core/LexicalParser/Lexer.cs +++ b/Canon.Core/LexicalParser/Lexer.cs @@ -80,6 +80,7 @@ public class Lexer : ILexer } _tokens.Add(SemanticToken.End); + return _tokens; } @@ -447,7 +448,6 @@ public class Lexer : ILexer } break; case '\'': - case '\"': { // 重置_token,准备收集字符串内容 ResetTokenBuilder(); @@ -464,8 +464,18 @@ public class Lexer : ILexer } } - _semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Character, - GetCurrentTokenString(), _line, _chPos); + string currentString = GetCurrentTokenString(); + if (currentString.Length > 1) + { + _semanticToken = LexemeFactory.MakeToken(SemanticTokenType.String, + currentString, _line, _chPos); + } + else + { + _semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Character, + currentString, _line, _chPos); + } + ResetTokenBuilder(); diff --git a/Canon.Core/LexicalParser/SemanticToken.cs b/Canon.Core/LexicalParser/SemanticToken.cs index db3ba3d..fb88d96 100644 --- a/Canon.Core/LexicalParser/SemanticToken.cs +++ b/Canon.Core/LexicalParser/SemanticToken.cs @@ -104,6 +104,32 @@ public abstract class SemanticToken : IEquatable public class CharacterSemanticToken : SemanticToken { public override SemanticTokenType TokenType => SemanticTokenType.Character; + + /// + /// 获得令牌代表的字符 + /// + /// 字符 + public char ParseAsCharacter() + { + return char.Parse(LiteralValue); + } +} + +/// +/// 字符串类型记号 +/// +public class StringSemanticToken : SemanticToken +{ + public override SemanticTokenType TokenType => SemanticTokenType.String; + + /// + /// 获得令牌代表的字符串 + /// + /// 字符串 + public string ParseAsString() + { + return LiteralValue; + } } /// diff --git a/Canon.Tests/GrammarParserTests/PascalGrammarTests.cs b/Canon.Tests/GrammarParserTests/PascalGrammarTests.cs index b252722..119c5e8 100644 --- a/Canon.Tests/GrammarParserTests/PascalGrammarTests.cs +++ b/Canon.Tests/GrammarParserTests/PascalGrammarTests.cs @@ -39,7 +39,7 @@ public class PascalGrammarTests { const string program = """ program exFunction; - const str = 'result is : '; + const str = 'a'; var a, b : Integer; begin writeln( str, ret ); diff --git a/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs b/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs index d0a2179..f955eb1 100644 --- a/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs +++ b/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs @@ -7,33 +7,33 @@ using Canon.Tests.Utils; namespace Canon.Tests.LexicalParserTests { - public class CharacterTypeTests + public class CharacterTypeTests(ITestOutputHelper testOutputHelper) { - private readonly ITestOutputHelper _testOutputHelper; private readonly ILexer _lexer = new Lexer(); - public CharacterTypeTests(ITestOutputHelper testOutputHelper) - { - _testOutputHelper = testOutputHelper; - } [Theory] - [InlineData("'a'", "a")] - [InlineData("'Hello, World!'", "Hello, World!")] - - public void TestCharacterType(string input, string? expectedResult) + [InlineData("'a'", 'a')] + [InlineData("'+'", '+')] + public void TestCharacterType(string input, char expectedResult) { IEnumerable tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input)); List tokens = tokensEnumerable.ToList(); - if (expectedResult == null) - { - Assert.Throws(() => tokens); - } - else - { - _testOutputHelper.WriteLine(tokens[0].LiteralValue); - Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType); - Assert.Equal(expectedResult, tokens[0].LiteralValue); - } + + testOutputHelper.WriteLine(tokens[0].LiteralValue); + Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType); + Assert.Equal(expectedResult, tokens[0].Convert().ParseAsCharacter()); + } + + [Theory] + [InlineData("'Hello, world'", "Hello, world")] + [InlineData("'asdfasdf'", "asdfasdf")] + public void StringTypeTest(string input, string expect) + { + IEnumerable tokens = _lexer.Tokenize(new StringSourceReader(input)); + SemanticToken token = tokens.First(); + + Assert.Equal(SemanticTokenType.String, token.TokenType); + Assert.Equal(expect, token.Convert().ParseAsString()); } [Theory] @@ -42,11 +42,11 @@ namespace Canon.Tests.LexicalParserTests [InlineData("'This", 1, 5, LexemeErrorType.UnclosedStringLiteral)] [InlineData("x @", 1, 3, LexemeErrorType.UnknownCharacterOrString)] //[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)] - public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType) + public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, + LexemeErrorType expectedErrorType) { - var ex = Assert.Throws(() => _lexer.Tokenize(new StringSourceReader(input)).ToList()); - _testOutputHelper.WriteLine(ex.ToString()); + testOutputHelper.WriteLine(ex.ToString()); Assert.Equal(expectedErrorType, ex.ErrorType); Assert.Equal(expectedLine, ex.Line); Assert.Equal(expectedCharPosition, ex.CharPosition); diff --git a/Canon.Tests/LexicalParserTests/LexicalFileTests.cs b/Canon.Tests/LexicalParserTests/LexicalFileTests.cs index 8928030..da16d5d 100644 --- a/Canon.Tests/LexicalParserTests/LexicalFileTests.cs +++ b/Canon.Tests/LexicalParserTests/LexicalFileTests.cs @@ -17,9 +17,9 @@ public class LexicalFileTests(ITestOutputHelper testOutputHelper) string pascalProgram = """ program HelloWorld; var - message: string; + message: char; begin - message := 'hello, world!'; + message := 'h'; writeln(message); end. """; @@ -32,7 +32,7 @@ public class LexicalFileTests(ITestOutputHelper testOutputHelper) SemanticTokenType.Keyword, SemanticTokenType.Identifier, SemanticTokenType.Delimiter, - SemanticTokenType.Identifier, + SemanticTokenType.Keyword, SemanticTokenType.Delimiter, SemanticTokenType.Keyword, SemanticTokenType.Identifier,