Compare commits

..

3 Commits

Author SHA1 Message Date
1094158e37 feat: README.md & LICENSE
Some checks failed
Build Docker Image / Build-Canon (push) Has been cancelled
Integration Test / Open-Set-Test (push) Has been cancelled
Test canon project / Test-Canon (push) Has been cancelled
2024-07-03 16:29:42 +08:00
460a1e71e2 feat: add docs latex source files 2024-07-03 16:28:25 +08:00
580f2d505a fix:对于const重复定义检查错误 (#85)
Some checks failed
Integration Test / Open-Set-Test (push) Has been cancelled
Test canon project / Test-Canon (push) Has been cancelled
Reviewed-on: PostGuard/Canon#85
2024-05-14 14:10:42 +08:00
48 changed files with 4588 additions and 17 deletions

View File

@ -31,7 +31,7 @@ public class CodeGeneratorVisitor(ICompilerLogger? logger = null) : TypeCheckVis
public override void PostVisit(ConstDeclaration constDeclaration) public override void PostVisit(ConstDeclaration constDeclaration)
{ {
base.PreVisit(constDeclaration); base.PostVisit(constDeclaration);
(IdentifierSemanticToken token, ConstValue constValue) = constDeclaration.ConstValue; (IdentifierSemanticToken token, ConstValue constValue) = constDeclaration.ConstValue;
@ -791,7 +791,7 @@ public class CodeGeneratorVisitor(ICompilerLogger? logger = null) : TypeCheckVis
{ {
// GenerateWhileLabel(); // GenerateWhileLabel();
Builder.AddLine($""" Builder.AddLine($"""
if ({_whileConditionNames.Peek()} == false) if (!{_whileConditionNames.Peek()})
goto {_whileEndLabels.Peek()}; goto {_whileEndLabels.Peek()};
"""); """);
} }
@ -1080,7 +1080,7 @@ public class CodeGeneratorVisitor(ICompilerLogger? logger = null) : TypeCheckVis
private void GenerateWhileLabel() private void GenerateWhileLabel()
{ {
_whileBeginLabels.Push($"while_{_labelCount}"); _whileBeginLabels.Push($"while_{_labelCount}");
//_whileConditionNames.Push($"while_condition_{_labelCount}"); _whileConditionNames.Push($"while_condition_{_labelCount}");
_whileEndLabels.Push($"while_end_{_labelCount}"); _whileEndLabels.Push($"while_end_{_labelCount}");
_labelCount += 1; _labelCount += 1;

View File

@ -31,7 +31,7 @@ public abstract class PascalType : IEquatable<PascalType>
return false; return false;
} }
return IsReference == other.IsReference; return true;
} }
public T Convert<T>() where T : PascalType public T Convert<T>() where T : PascalType

View File

@ -44,7 +44,16 @@ public class SymbolTable
/// </summary> /// </summary>
/// <param name="symbol">欲添加的符号</param> /// <param name="symbol">欲添加的符号</param>
/// <returns>是否添加成功</returns> /// <returns>是否添加成功</returns>
public bool TryAddSymbol(Symbol symbol) => _symbols.TryAdd(symbol.SymbolName, symbol); public bool TryAddSymbol(Symbol symbol)
{
if (_symbols.ContainsKey(symbol.SymbolName))
{
return false;
}
_symbols.Add(symbol.SymbolName, symbol);
return true;
}
/// <summary> /// <summary>
/// 尝试从符号表极其父符号表查找符号 /// 尝试从符号表极其父符号表查找符号

View File

@ -22,7 +22,7 @@ public class TypeCheckVisitor(ICompilerLogger? logger = null) : SyntaxNodeVisito
public override void PreVisit(ConstDeclaration constDeclaration) public override void PreVisit(ConstDeclaration constDeclaration)
{ {
base.PostVisit(constDeclaration); base.PreVisit(constDeclaration);
(IdentifierSemanticToken token, ConstValue constValue) = constDeclaration.ConstValue; (IdentifierSemanticToken token, ConstValue constValue) = constDeclaration.ConstValue;

View File

@ -54,4 +54,14 @@ public class SymbolTableTests
Assert.True(table.TryGetSymbol("temperature", out Symbol? temp)); Assert.True(table.TryGetSymbol("temperature", out Symbol? temp));
Assert.Equal(PascalBasicType.Real, temp.SymbolType); Assert.Equal(PascalBasicType.Real, temp.SymbolType);
} }
[Fact]
public void DuplicatedTest()
{
SymbolTable table = new();
Assert.True(table.TryAddSymbol(
new Symbol{SymbolName = "a", SymbolType = PascalBasicType.Integer, Const = true}));
Assert.False(table.TryAddSymbol(new Symbol{SymbolName = "a", SymbolType = PascalBasicType.Boolean}));
}
} }

674
LICENSE Normal file
View File

@ -0,0 +1,674 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.

View File

@ -1,21 +1,45 @@
# Canon # Canon
简单的`Pascal`编译器 北京邮电大学2021级编译原理与技术课程设计成果仓库
## 项目结构 ## 课程设计要求
主要由以下三个项目组成: 本次课程设计的要求为设计一个从Pascal-S语言到C语言的简单编译器Pascal-S语言为Pascal语言的一个子集需求中详细给出了该语言的语法定义。
- `Canon.Core` 核心的编译器库,负责词法分析、语法分析、语义分析和目标代码生成等等一系列工作; 课程设计的验收方式为头歌平台测试集验证与过程性评价两个部分组成。其中头歌平台类似于OJ平台提供了一系列指定的输入Pascal-S程序和输出结果验证编译器是否正确工作。测试集由70个公开测试集和25个隐藏测试集组成在隐藏测试集中可能存在部分测试点针对提供的Pascal-S语法进行了扩充例如要求实现字符串的语法。过程性评价包括一次中期进度汇报和最后的验收汇报最后提交的报告亦作为评分的依据之一。
- `Canon.Console`编译器的控制台应用程序,负责处理命令行参数和读写文件等等工作;
- `Canon.Tests`对`Canon.Core`进行测试的测试库。
## 开始使用 ## 本仓库中的实现
需要: 我们在此次课程设计中使用`dotnet`平台实现了一个名为`Canon`的Pascal-S编译器。
- `Dotnet SDK 8.0` > Cano n中文名卡农意为“规律”亦是一种音乐作曲技巧。
- `Visual Studio 2022`或者`Rider 2023.3.3`
才能运行和编辑该项目。 编译器全部为自行编程实现,没有使用类似于`flex`和`bison`之类的前端辅助工具。词法分析使用自行实现的自动机算法,语法分析使用`LR(1)`文法,在项目中实现了一个简单的`LR(1)`分析器生成工具。语义分析和代码生成使用类似于语法制导翻译的技术,详细设计见课程设计说明。
### 项目结构
项目中由程序、文档、公开测试集测试工具三部分组成。
项目中程序使用`Visual Studio`的解决方案工具进行管理。程序中由`C#`项目组成:
- `Canon.Core`项目中的核心程序实现,包括实现编译器的所有核心代码。
- `Canon.Generator`项目,编译器中的`LR(1)`分析器生成工具。
- `Canon.Console`项目为命令行版本的编译器项目,负责编译器中的输入输出管理等的功能,并能够通过`NativAOT`功能输出为单个可执行文件。
- `Canon.Server`项目为服务器版本的编译器项目提供了交互式编译的功能并可以可视化的查看编译输出的语法树和C语言代码。
- `Canon.Tests`项目,单元测试项目。
项目中的最终报告文件使用`latex`撰写,报告的源代码在`docs`文件夹下,可以使用`latexmk`进行编译:
```shell
cd docs
latexmk main.tex
```
即可获得`CanonReport.pdf`文件。
项目中提供了一个公开测试集的自动测试工具,使用`python`编写,需要在系统中安装`fpc`编译器和`gcc`编译器。脚本会自动使用`fpc`编译器和自行实现的编译器`pascc`编译所有的公开测试集输入代码,并比对两个编译器输出文件的执行结果是否一致。使用方法可参见`CI`文件`.gitea/workflows/integration_test.yaml`。
## 支持
如果您在学习或者是抄袭的过程中发现了问题,我们十分欢迎您提出,您可以通过发起`issue`或者是发送电子邮件的方式联系我们。

7
docs/.latexmkrc Normal file
View File

@ -0,0 +1,7 @@
$pdf_mode = 1;
$pdflatex = "xelatex -file-line-error --shell-escape -src-specials -synctex=1 -interaction=nonstopmode %O %S;cp %D %R.pdf";
$recorder = 1;
$clean_ext = "synctex.gz acn acr alg aux bbl bcf blg brf fdb_latexmk glg glo gls idx ilg ind ist lof log lot out run.xml toc dvi";
$bibtex_use = 2;
$out_dir = "temp";
$jobname = "CanonReport";

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

BIN
docs/assets/测试/test.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 162 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

BIN
docs/contents/assets/1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 347 KiB

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,459 @@
\documentclass[../main.tex]{subfiles}
\begin{document}
\section{总体设计}
% 说明,包括:
% 1数据结构设计
% 2总体结构设计包括
% 功能模块的划分
% 模块功能
% 模块之间的关系
% 模块之间的接口
% 3用户接口设计
\subsection{数据流图}
\begin{figure}[h]
\centering
\includegraphics[width=0.9\linewidth]{assets/数据流图.png}
\caption{数据流图}
\label{fig:data_flow_diagram}
\end{figure}
\subsection{数据结构设计}
在整个编译器设计中,数据结构的设计是至关重要的一环。它不仅需要支持编译器的各个阶段,还需要保证数据的正确传递和高效处理。在本节中将对编译器中各个模块之间共有的部分数据结构进行说明。
\subsubsection{词法记号}
\texttt{SemanticToken} 是一个抽象基类,定义了所有词法记号的共有属性和方法。具体类型的词法记号(如关键字、标识符等)都继承自这个类。
每个Token至少有四个属性记号类型 \texttt{SemanticTokenType TokenType},行号 \texttt{uint LinePos},字符位置 \texttt{uint CharacterPos},字面量 \texttt{string LiteralValue}
\begin{lstlisting}[
style=csharp
]
public abstract class SemanticToken
{
public abstract SemanticTokenType TokenType { get; }
/// <summary>
/// 记号出现的行号
/// </summary>
public required uint LinePos { get; init; }
/// <summary>
/// 记号出现的列号
/// </summary>
public required uint CharacterPos { get; init; }
/// <summary>
/// 记号的字面值
/// </summary>
public required string LiteralValue { get; init; }
}
\end{lstlisting}
实际继承词法记号基类的词法记号类有:
\begin{itemize}
\item 字符类型记号 \texttt{CharacterSemanticToken}
\item 字符串类型记号 \texttt{StringSemanticToken}
\item 分隔符类型记号 \texttt{DelimiterSemanticToken}
\item 关键词类型记号 \texttt{KeywordSemanticToken}
\item 操作符类型记号 \texttt{OperatorSemanticToken}
\item 数值类型记号 \texttt{NumberSemanticToken}
\item 标识符类型记号 \texttt{IdentifierSemanticToken}
\end{itemize}
其中分隔符类型、关键词类型、操作符类型等记号提供一个属性获得该记号代表的分隔符、关键词、操作符,这些可以穷举的类型使用枚举标识,在表\ref{table:operator_and_delimiter}和表\ref{table:keyword_and_operator}中列举了所有的分隔符、关键词和操作符。而对于字符类型记号字符串类型记号、数组类型记号在代码中分别提供了将字面值识别为C\#中的字符、字符串和数值等类型的功能方便在代码中对于这种固定值进行操作。在标识符类型中则是提供了一个返回标识符值的方法在该方法中会自动将字面值小写以此来提供Pascal代码中对于大小写不敏感的功能。
% \begin{table}[h]
% \centering
% \caption{基本类型和标识符}
% \begin{tabular}{|c|c|c|c|}
% \hline
% \textbf{描述} & \textbf{字面量记录} & \textbf{记号类型} & \textbf{详细类型} \\
% \hline
% 标识符 & 该标识符本身 & IDENTIFIER & \\
% 无符号整数 & 该整数本身(字符串表示) & NUMBER & 整数 \\
% 无符号浮点数 & 该浮点数本身(字符串表示) & & 实数 \\
% 十六进制数 & 该十六进制数本身(字符串表示) & & 十六进制 \\
% 字符常量 & 该字符常量本身(不包含两侧的单引号) & CHARACTER & \\
% \hline
% \end{tabular}
% \end{table}
\begin{longtable}{|c|c|c|c|}
\caption{运算符和分界符} \label{table:operator_and_delimiter} \\
\hline
% 跨页表的第一行
\textbf{描述} & \textbf{字面量记录} & \textbf{记号类型} & \textbf{详细类型} \\
\hline
\endhead
% 跨页表的最后一行
\hline
\multicolumn{4}{r@{}}{接下一页}
\endfoot
% 跨页表的最后一页的最后一行
\hline
\endlastfoot
关系运算符 & $\geq$ & Operator & 大于等于 \\
& $>$ & & 大于 \\
& $\leq$ & & 小于等于 \\
& $\neq$ & & 不等于 \\
& $<$ & & 小于 \\
关系运算符:相等 & $=$ & & 等于 \\
算术运算符:加法 & $+$ & &\\
算术运算符:减法 & $-$ & &\\
算术运算符:乘法 & $*$ & &\\
算术运算符:除法 & $/$ & &\\
赋值符号 & $:=$ & & 赋值 \\
范围连接符 & $..$ & Delimiter & 点点 \\
界符 & $($ & & 左括号 \\
& $)$ & & 右括号 \\
& $[$ & & 左方括号 \\
& $]$ & & 右方括号 \\
& $:$ & & 冒号 \\
& $,$ & & 逗号 \\
& $;$ & & 分号 \\
& $.$ & & 句号/点 \\
\hline
\end{longtable}
\begin{longtable}{|c|c|c|c|}
\caption{关键字和逻辑运算符} \label{table:keyword_and_operator} \\
\hline
\textbf{描述} & \textbf{字面量记录} & \textbf{记号类型} & \textbf{详细类型} \\
\hline
\endhead
% 跨页表的最后一行
\hline
\multicolumn{4}{r@{}}{接下一页}
\endfoot
% 跨页表的最后一页的最后一行
\hline
\endlastfoot
逻辑运算符:或 & or & Keyword &\\
算术运算符:取余 & mod & & 取余 \\
逻辑运算符:且 & and & &\\
逻辑运算符:非 & not & &\\
关键字 & program & & 程序 \\
关键字 & const & & 常量 \\
关键字 & var & & 变量 \\
关键字 & array & & 数组 \\
关键字 & of & & 属于 \\
关键字 & procedure & & 过程 \\
关键字 & function & & 函数 \\
关键字 & begin & & 开始 \\
关键字 & end & & 结束 \\
关键字 & if & & 如果 \\
关键字 & then & & 那么 \\
关键字 & for & & 对于 \\
关键字 & to & &\\
关键字 & do & & 执行 \\
关键字 & else & & 否则 \\
关键字 & repeat & & 重复 \\
关键字 & until & & 直到 \\
关键字 & while & &\\
关键字 & integer & & 整数 \\
关键字 & real & & 实数 \\
关键字 & char & & 字符 \\
关键字 & boolean & & 布尔 \\
\end{longtable}
\subsubsection{语法树}
语法树是编译器中用于表示源代码结构的树状数据结构。在语法分析阶段,编译器将源代码转换为语法树,以便后续阶段可以更高效地进行处理。因此,语法树中每个节点和语法中的每个符号一一对应,其中非终结符即对应书上的父节点,终结符对应了树上的叶子节点。
在终结节点上直接封装了访问对应的词法分析令牌的功能。
\begin{lstlisting}[style=csharp]
public class TerminatedSyntaxNode : SyntaxNodeBase
{
public override bool IsTerminated => true;
public required SemanticToken Token { get; init; }
// 其他代码有删节
}
\end{lstlisting}
在针对不同的非终结节点,首先在其的共同基类\texttt{NonTerminatedSyntaxNode}中封装了访问其子节点的功能,并针对该节点产生式的不同提供了不同的方式模型。
针对只有一个产生式的非终结节点,直接在该非终结节点上使用属性的方式将其有意义的子节点暴露出来,例如在\texttt{ProgramStruct}上就直接暴露放访问\texttt{ProgramHead}的属性。
\begin{lstlisting}[style=csharp]
public class ProgramStruct : NonTerminatedSyntaxNode
{
public override NonTerminatorType Type => NonTerminatorType.ProgramStruct;
/// <summary>
/// 程序头
/// </summary>
public ProgramHead Head => Children[0].Convert<ProgramHead>();
}
\end{lstlisting}
针对含有多个产生式的非终结节点,如果是有效的子节点只有一种的,则仍然使用属性的方式进行暴露,例如\texttt{ConstDeclaration},其就暴露了标识符名称和值两个属性。
\begin{lstlisting}[style=csharp]
public class ConstDeclaration : NonTerminatedSyntaxNode
{
public override NonTerminatorType Type => NonTerminatorType.ConstDeclaration;
/// <summary>
/// 是否递归的声明下一个ConstDeclaration
/// </summary>
public bool IsRecursive { get; private init; }
/// <summary>
/// 获得声明的常量
/// </summary>
public (IdentifierSemanticToken, ConstValue) ConstValue => GetConstValue();
public static ConstDeclaration Create(List<SyntaxNodeBase> children)
{
bool isRecursive;
if (children.Count == 3)
{
isRecursive = false;
}
else if (children.Count == 5)
{
isRecursive = true;
}
else
{
throw new InvalidOperationException();
}
return new ConstDeclaration { Children = children, IsRecursive = isRecursive };
}
private static IdentifierSemanticToken ConvertToIdentifierSemanticToken(SyntaxNodeBase node)
{
return (IdentifierSemanticToken)node.Convert<TerminatedSyntaxNode>().Token;
}
private (IdentifierSemanticToken, ConstValue) GetConstValue()
{
if (IsRecursive)
{
return (ConvertToIdentifierSemanticToken(Children[2]), Children[4].Convert<ConstValue>());
}
else
{
return (ConvertToIdentifierSemanticToken(Children[0]), Children[2].Convert<ConstValue>());
}
}
}
\end{lstlisting}
而对于使用的多个产生式且无法有效提取信息的非终结节点,则设计使用\textbf{事件}以提供相关信息的功能。访问者可以在需要使用对应产生式的信息时订阅对应的事件,并且语法树的实现保证对应的事件会在第一次访问和第二次访问时按照订阅的顺序进行调用。对应事件的事件参数也可提供产生式相关的信息。
\texttt{ConstValue}就是一个不错的例子,其提供了使用数值产生式和字符产生式的两个事件供订阅。
\begin{lstlisting}[style=csharp]
/// <summary>
/// 使用数值产生式事件的事件参数
/// </summary>
public class NumberConstValueEventArgs : EventArgs
{
/// <summary>
/// 是否含有负号
/// </summary>
public bool IsNegative { get; init; }
/// <summary>
/// 数值记号
/// </summary>
public required NumberSemanticToken Token { get; init; }
}
/// <summary>
/// 使用字符产生式事件的事件参数
/// </summary>
public class CharacterConstValueEventArgs : EventArgs
{
/// <summary>
/// 字符记号
/// </summary>
public required CharacterSemanticToken Token { get; init; }
}
public class ConstValue : NonTerminatedSyntaxNode
{
public override NonTerminatorType Type => NonTerminatorType.ConstValue;
/// <summary>
/// 使用数值产生式的事件
/// </summary>
public event EventHandler<NumberConstValueEventArgs>? OnNumberGenerator;
/// <summary>
/// 使用字符产生式的事件
/// </summary>
public event EventHandler<CharacterConstValueEventArgs>? OnCharacterGenerator;
}
\end{lstlisting}
\subsubsection{符号表}
符号表是在语义分析阶段使用的数据结构,用于存储变量、函数和过程的信息。符号表支持查询、插入和作用域管理操作。每个作用域都有自己的符号表,如果当前作用域中没有找到符号,则会递归查询父作用域。
符号表的设计如下:
\begin{itemize}
\item \textbf{符号表项SymbolTableItem}包含类型MegaType、名称、是否为变量、是否为函数和参数列表。
\item \textbf{类型MegaType}:包含指针类型和项类型。
\end{itemize}
符号表的物理结构采用哈希表实现,以支持高效的查询和插入操作。
\subsubsection{语法树上的旅行者}
在语法分析完成对于语法树的构建之后,我们需要在语法树的各个节点上进行一系列的操作,例如进行符号表的维护、类型检查和代码生成等任务。为了降低程序的复杂度,我们希望在程序中提供一个统一的语法树遍历和访问接口。因此,我们使用访问者设计模式设计了\texttt{SyntaxNodeVisitor}(语法节点访问者)和\texttt{SyntaxTreeTraveller}(语法树旅行者)。同时结合编译原理课程中语义分析和翻译方案相关的知识,我们设计了一种称为\textit{前后序遍历}的语法树访问模型。例如对于图\ref{fig:syntax_tree_example}中的一颗语法树,其的遍历顺序为
\begin{align}\notag
& ProgramStruct \to ProgramHead \to program \to program \to main \to main \\ \notag
&\to ProgramHead \to ; \to ; \to ProgramBody \to ConstDeclarations \to \\ \notag
&ConstDelcarations \to VarDeclarations \to VarDeclarations \to \\ \notag
&SubprogramDeclarations \to SubprogramDeclarations \to CompoundStatement \\ \notag
&\to begin \to begin \to StatementList \to Statement \to Statement \to \\ \notag
&StatementList \to end \to end \to CompoundStatement \to ProgramBody \\ \notag
&\to . \to . \to ProgramStruct \notag
\end{align}
\begin{figure}[t]
\centering
\includegraphics[width=0.9\linewidth]{assets/示例语法树图.png}
\caption{示例的语法树图}
\label{fig:syntax_tree_example}
\end{figure}
在设计对于语法树的遍历之后,我们在设计了对于语法节点的访问者,访问者针对语法树上的每一个节点都提供了两个访问接口,分别会在第一次遍历到该节点和第二次遍历到该节点时调用,称为\texttt{PreVisit}\texttt{PostVisit}。按照编译原理课程中的知识来说,\texttt{PreVisit}接口理解为对于该节点的L-属性计算,\texttt{PostVisit}接口理解为对该节点的S-属性计算。
为了使得各语义分析的工作可以方便的组合在一起运行,例如类型检查需要在代码检查之前运行,容易想到使用类型继承的方式进行抽象。例如类型检查类直接继承了语法节点访问者抽象基类\texttt{SyntaxNodeVisitor},而代码生成了则直接继承了类型检查类。需要注意的是,在重载访问语法节点的接口函数之间,需要在执行任何操作之前调用基类的对应操作。
\begin{lstlisting}[
style=csharp,
caption={示例的代码生成类代码}
]
public class CodeGeneratorVisitor(ICompilerLogger? logger = null) : TypeCheckVisitor(logger)
{
public override void PreVisit(ProgramHead programHead)
{
// 调用基类的访问方法
base.PreVisit(programHead);
// 实际的代码生成逻辑...
}
}
\end{lstlisting}
\subsection{总体结构设计}
\textit{Canon}编译器的核心库按照编译的工作流程和相关工作划分为各个模块:
\begin{itemize}
\item 源代码读取模块
\item 词法分析模块
\item 语法分析模块
\item 语义分析模块
\item 日志输出模块
\end{itemize}
鉴于项目中主要使用依赖注入的设计模块进行开发,因此各个模块都提供了对应接口。下面首先介绍各个模块之前的接口,然后将分模块介绍各个模块的功能。
\subsubsection{模块提供的接口}
\paragraph{ISourceReader} 源代码读取模块提供的接口。该接口在提供文件读取函数的同时,还提供了读取的缓冲区功能,在获得当前读取字符及行号、列号的同时,可以前进读取一个字符,后退一个字符,最后尝试读取下一个字符。
\paragraph{ILexer} 词法分析器的接口。该接口提供了从源代码中分析为一个语法分析流的功能。
\paragraph{IGrammarParser} 语法分析模块的接口。该接口提供了将一个词法分析流构建为一颗语法树的功能。
\paragraph{SyntaxNodeVisitor} 语法树节点访问抽象类。该接口提供了对于语法树上各个节点的访问方法。
\paragraph{ICompilerLogger} 编译日志输出接口。该接口提供了输出各个等级信息的能力。
\subsubsection{词法分析模块}
词法分析模块负责读入输入字符串,解析为词法记号流输出。
\subsubsection{语法分析模块}
语法分析模块主要负责从Pascal语法构建LR(1)分析表和对输入的词法记号流进行分析构建语法树的工作。
对于语法分析模块而言LR(1)分析表存在两种表现形式:(1) 内存形式直接通过Pascal-S语法分析并构建自动机进而得到的分析表(2)源代码形式鉴于每次都从Pascal-S语法进行分析并构建自动机消耗的时间和资源非常多而且语法在大多数时间都是不变的因此我们实现了将LR(1)分析表生成到C\#源代码形式的功能。
因此语法分析模块主要提供三个功能从语法构建自动机并得到LR(1)分析表将LR(1)分析表生成为C\#源代码形式;从分析表分析输入的语法分析流并构建语法树。
\subsubsection{语义分析模块}
语义分析模块负责完成类型检查和代码生成两个功能。为了完成上述的工作在语义分析模块中实现了Pascal-S语言的类型系统和对于语法树的访问和遍历逻辑。
\subsection{用户接口设计}
\subsubsection{命令行版本}
命令行版本的接口设计旨在为用户提供一个简单、直接的方式来使用编译器。用户可以通过命令行工具 \texttt{Canon Pascal Compiler} 来转换 Pascal 源代码文件到 C 代码文件。
使用方法如下:
\begin{verbatim}
Canon.Console [options]
Options:
-i, --input <input> (REQUIRED) Pascal源代码文件地址
--version 显示版本信息
-?, -h, --help 显示帮助信息
\end{verbatim}
其中 \texttt{<input>} 是必须提供的 Pascal 源文件路径。命令行版本支持以下特性:
\begin{itemize}
\item \textbf{参数解析}:通过 \texttt{System.CommandLine} 库解析命令行参数,提供灵活的命令行选项。
\item \textbf{日志记录}:使用 \texttt{CompilerLogger} 类记录编译过程中的信息,帮助用户了解编译状态。
\end{itemize}
\subsubsection{Web在线版本}
\begin{figure}[h]
\centering
\includegraphics[width=0.9\linewidth]{assets/编译器Web在线版本.png}
\caption{编译器Web在线版本}
\label{fig:compiler_web_fig}
\end{figure}
Web在线版本提供了一个图形化界面允许用户在网页上直接输入Pascal 源代码,并在线编译和查看生成的 C 代码。这为没有命令行使用经验的用户提供了便利(图\ref{fig:compiler_web_fig}。同时图形化界面提供了Pascal源代码生成的语法树示意图\ref{fig:compiler_web_fig_tree}),可供用户查看并分析语法树结构。
\begin{figure}[h]
\centering
\includegraphics[width=0.9\linewidth]{assets/编译器Web在线版本_语法树.png}
\caption{语法树渲染}
\label{fig:compiler_web_fig_tree}
\end{figure}
Web版本的特点包括
\begin{itemize}
\item \textbf{代码编辑器}:集成代码编辑器,支持语法高亮,提供更好的代码编写体验。
\item \textbf{实时编译}:用户输入代码后,可以实时编译并显示输出结果。
\item \textbf{错误提示}:编译过程中的错误会在网页上直接显示,方便用户快速定位问题。
\item \textbf{语法树渲染}:编译过程中,会根据输入的代码,渲染出对应的语法树。语法树上节点对应的记号类型。
\item \textbf{历史记录}编译器会保存成功编译的记录并提供查看历史记录的功能。使用唯一id作为历史记录标识实现了通过连接分享一个编译记录的功能\ref{fig:compiler_web_fig_history})。
\end{itemize}
\textit{注: 在实现语法树的可视化过程中,我们参考了论文\cite{goos_improving_2002}以在线性时间复杂度中绘制完整棵树。}
\begin{figure}[h]
\centering
\includegraphics[width=0.9\linewidth]{assets/编译器Web在线版本_历史记录.png}
\caption{历史记录}
\label{fig:compiler_web_fig_history}
\end{figure}
Web在线版本的实现依赖于前后端分离的架构前端使用React框架提供用户交互界面后端处理编译任务。通过 AJAX 请求与后端通信,实现代码的提交和结果的获取。
总体来说,用户接口设计考虑了不同用户群体的使用习惯和需求,提供了灵活、友好的使用方式,使得用户可以更加方便地使用。
\end{document}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,118 @@
\documentclass[../main.tex]{subfiles}
\begin{document}
\section{需求分析}
% 包括:数据流图、功能及数据说明等
% 开发环境
\subsection{开发环境}
在本次课程设计中我们没有使用编译原理课程设计的传统工具flex和bison而是决定自行手动实现词法分析和语法分析。因此我们在进行开发环境选型是就具有较高的灵活性不必拘泥于C++语言。在综合了小组人员的开发经验和各个不同语言的优劣之后,我们决定选择.NET平台的C\#语言作为我们的开发语言。使用C\#语言作为开发语言所给我们带来的好处有:
\begin{itemize}
\item C\#是一门面向对象的新式类型安全语言,具有自动垃圾回收的功能。
\item .NET平台提供了多种不同的部署方式。可以直接AOT(Ahead of time)编译到单个可执行程序亦可以使用JIT(Just in time)编译的方式使用运行时进行运行。因此在共享同样的核心库时,我们可以提供编译到单个可执行文件的编译器程序,也可以基于.NET强大的Web开发能力提供在线编译器。
\item C\#在全平台上提供了统一的开发和运行体验,适用于我们小组中需要兼容多个平台开发的需求。
\end{itemize}
此外,为了提高开发效率和代码的可维护性,我们还选用了一些辅助工具和库:
\begin{itemize}
\item \textbf{Gitea}我们通过使用自行搭建的Gitea服务器进行版本控制这样可以确保团队成员之间的代码同步和变更记录。
\item \textbf{Gitea Actions}我们依托Gitea提供的丰富持续集成、自动部署的功能编写了一系列的自动化脚本在每次提交新代码和合并到主线代码时运行单元测试和集成测试。
\end{itemize}
为了撰写开发文档和实验报告我们利用了Overleaf和飞书的在线文档协作功能。这使得文档的共享和协作变得更加高效和便捷尤其是在团队分布在不同地点时。
\subsection{功能分析}
在需求文档中提供的Pascal-S语法基础上我们希望我们的编译器支持如下的Pascal语法和功能
\begin{enumerate}
\item 支持Pascal-S语法中的常见数据类型包括整数、浮点数、字符值和布尔值。
\item 支持Pascal-S语法中的常见流程控制语句包括分支语句循环语句(While循环和For循环)
\item 支持Pascal-S语法中的流程定义和函数定义
\item 支持Pascal-S标准库中的输入输出函数(write, writeln, read)
\end{enumerate}
基于上述语法和功能我们基于Pascal-S语法设计了如下的Pascal语法。
\subsubsection{支持的Pascal语法}\label{pascal_grammar}
\begin{lstlisting}[
style=grammar,
caption={Pascal-S语法},
]
ProgramStart -> ProgramStruct
ProgramStruct -> ProgramHead ; ProgramBody .
ProgramHead -> program id (IdList) | program id
ProgramBody -> ConstDeclarations
VarDeclarations
SubprogramDeclarations
CompoundStatement
IdList -> , id IdList | : Type
ConstDeclarations -> $\epsilon$ | const ConstDeclaration ;
ConstDeclaration -> id = ConstValue | ConstDeclaration ; id = ConstValue
ConstValue -> +num | -num | num | 'letter' | true | false
VarDeclarations -> | var VarDeclaration ;
VarDeclaration -> id IdList | VarDeclaration ; id IdList
Type -> BasicType | array [ Period ] of BasicType
BasicType -> integer | real | boolean | char
Period -> digits .. digits | Period , digits .. digits
SubprogramDeclarations -> $\epsilon$ | SubprogramDeclarations Subprogram ;
Subprogram -> SubprogramHead ; SubprogramBody
SubprogramHead -> procedure id FormalParameter
| function id FormalParameter : BasicType
FormalParameter -> $\epsilon$ | () | ( ParameterList )
ParameterList -> Parameter | ParameterList ; Parameter
Parameter -> VarParameter | ValueParameter
VarParameter -> var ValueParameter
ValueParameter -> id IdList
SubprogramBody -> ConstDeclarations
VarDeclarations
CompoundStatement
CompoundStatement -> begin StatementList end
StatementList -> Statement | StatementList ; Statement
Statement -> $\epsilon$
| Variable assignOp Expression
| ProcedureCall
| CompoundStatement
| if Expression then Statement ElsePart
| for id assignOp Expression to Expression do Statement
| while Expression do Statement
Variable -> id IdVarPart
IdVarPart -> $\epsilon$ | [ ExpressionList ]
ProcedureCall -> id | id () | id ( ExpressionList )
ElsePart -> $\epsilon$ | else Statement
ExpressionList -> Expression | ExpressionList , Expression
Expression -> SimpleExpression | SimpleExpression RelationOperator SimpleExpression
SimpleExpression -> Term | SimpleExpression AddOperator Term
Term -> Factor | Term MultiplyOperator Factor
Factor -> num
| true
| false
| Variable
| ( Expression )
| id ()
| id ( ExpressionList )
| not Factor
| - Factor
| + Factor
AddOperator -> + | - | or
MultiplyOperator -> * | / | div | mod | and
RelationOperator -> = | <> | < | <= | > | >=
\end{lstlisting}
\paragraph{对语法的调整} 相较于需求中给定的Pascal-S语法我们在开发和实践的过程中对于语法做出了如下的调整和扩充。
\begin{itemize}
\item 消除文法中存在的部分左递归例如VarDeclaration。消除左递归使得我们可以使用S-属性的翻译方案进行类型检查和代码生成。
\item 将ProcedureCall中添加空括号的产生式。支持在调用无参的过程或者是函数时添加一对空括号。
\item 删除Statment中产生funcid的产生式。因为Pascal中的函数返回语句只是一个合法的赋值语句在实际上并不会终止函数的执行。因此删除该产生式并在类型检查和代码生成的阶段进行进一步的处理。
\item 添加Factor中对于加号的支持。支持在运算的过程中使用显式注明的整数$ 1 ++ 1$类型的表达式。
\item 调整对于Factor中对于ProcedureCall的定义为Id() | Id (ExpressionList)。支持调用没有参数的函数。
\item 在FormalParameter中添加一对空括号。支持在定义无参的过程和函数时添加一对空括号。
\item 增加while-do语句的支持。
\end{itemize}
\paragraph{冲突的处理} 在实现使用LR(1)分析技术的语法分析器时我们发现在需求分析中给出的Pascal-S语法中存在着一处移进-归约冲突即语法中的ElsePart非终结符在对含有多个嵌套的If语句进行处理时ElsePart既可以直接从空产生式中归约出来也继续可以继续移进。但是在语法层面上Else语句应该和最近的一个If语句相结合。因此在语法分析器中做如下处理(1) 在构建分析表出添加一个特殊判断如果是检测到ElsePart的移进-归约冲突,则不报错继续处理;(2) 在按照分析表进行分析时首先进行移进操作然后再进行归约操作这样就能保证ElsePart会优先和最近和If语句进行结合。
\end{document}

256
docs/contents/source.tex Normal file
View File

@ -0,0 +1,256 @@
\documentclass[../main.tex]{subfiles}
\begin{document}
\section{源程序清单}
为了使得项目开发更加清晰程序中由五个C\#项目组成:
\begin{itemize}
\item Canon.Core 编译器的核心库,含有编译器的所有核心功能。
\item Canon.Tests 编译器核心库的测试库,含有项目中编写的所有单元测试。
\item Canon.Console 编译器的命令行版本程序,在核心库的基础上以命令行的方式同编译器进行交互。
\item Canon.Server 编译器的服务器版本程序以Web的方式同编译器进行交互。
\item Canon.Generator 用于生成源代码形式的LR(1)分析表的工具。
\end{itemize}
代码中的总行数如表\ref{tab:code_lines}所示。
\begin{table}[htbp]
\centering
\begin{tabular}{|l|r|r|r|r|r|}
\hline
语言 & 文件数 & 行数 & 空白行数 & 注释数 & 代码行数 \\
\hline
C\# & 132 & 13263 & 1889 & 978 & 10396 \\
Pascal & 95 & 4989 & 368 & 34 & 4587 \\
TypeScript & 8 & 521 & 52 & 7 & 462 \\
MSBuild & 6 & 195 & 23 & 2 & 170 \\
TypeScript Typings & 2 & 149 & 7 & 13 & 129 \\
HTML & 1 & 12 & 0 & 0 & 12 \\
Python & 1 & 111 & 26 & 0 & 85 \\
\hline
\end{tabular}
\caption{代码行数统计}
\label{tab:code_lines}
\end{table}
\subsection{Canon.Core项目}
\begin{verbatim}
.
├── Abstractions
│ ├── ICompilerLogger.cs
│   ├── IGrammarParser.cs
│   ├── ILexer.cs
│   ├── ISourceReader.cs
│   ├── ITransformer.cs
│   └── SyntaxNodeVisitor.cs
├── Canon.Core.csproj
├── CodeGenerators
│   └── CCodeBuilder.cs
├── Enums
│   ├── BasicType.cs
│   ├── ErrorEnums.cs
│   ├── GrammarEnums.cs
│   └── SemanticEnums.cs
├── Exceptions
│   ├── GrammarException.cs
│   ├── LexemeException.cs
│   ├── ReduceAndShiftConflictException.cs
│   └── ReduceConflictException.cs
├── GrammarParser
│   ├── Expression.cs
│   ├── GeneratedParser.g.cs
│   ├── GrammarBuilder.cs
│   ├── Grammar.cs
│   ├── LrState.cs
│   ├── PascalGrammar.cs
│   └── Terminator.cs
├── LexicalParser
│   ├── LexemeFactory.cs
│   ├── Lexer.cs
│   ├── LexRules.cs
│   └── SemanticToken.cs
├── SemanticParser
│   ├── CodeGeneratorVisitor.cs
│   ├── PascalArrayType.cs
│   ├── PascalBasicType.cs
│   ├── PascalFunctionType.cs
│   ├── PascalParameterType.cs
│   ├── PascalType.cs
│   ├── Symbol.cs
│   ├── SymbolTable.cs
│   ├── SyntaxTreeTraveller.cs
│   ├── TypeCheckVisitor.cs
│   └── TypeTable.cs
└── SyntaxNodes
├── AddOperator.cs
├── BasicType.cs
├── CompoundStatement.cs
├── ConstDeclaration.cs
├── ConstDeclarations.cs
├── ConstValue.cs
├── ElsePart.cs
├── Expression.cs
├── ExpressionList.cs
├── Factor.cs
├── FormalParameter.cs
├── IdentifierList.cs
├── IdentifierVarPart.cs
├── MultiplyOperator.cs
├── NonTerminatedSyntaxNode.cs
├── Parameter.cs
├── ParameterList.cs
├── Period.cs
├── ProcedureCall.cs
├── ProgramBody.cs
├── ProgramHead.cs
├── ProgramStruct.cs
├── RelationOperator.cs
├── SimpleExpression.cs
├── Statement.cs
├── StatementList.cs
├── SubprogramBody.cs
├── Subprogram.cs
├── SubprogramDeclarations.cs
├── SubprogramHead.cs
├── SyntaxNodeBase.cs
├── Term.cs
├── TerminatedSyntaxNode.cs
├── TypeSyntaxNode.cs
├── ValueParameter.cs
├── VarDeclaration.cs
├── VarDeclarations.cs
├── Variable.cs
└── VarParameter.cs
\end{verbatim}
\subsection{Canon.Console项目}
\begin{verbatim}
.
├── Canon.Console.csproj
├── Extensions
│   └── RootCommandExtensions.cs
├── Models
│   └── CompilerOption.cs
├── Program.cs
└── Services
├── Compiler.cs
├── CompilerLogger.cs
└── StringSourceReader.cs
\end{verbatim}
\subsection{Canon.Server项目}
\begin{verbatim}
.
├── appsettings.json
├── Canon.Server.csproj
├── client-app
│   ├── index.html
│   ├── package.json
│   ├── pnpm-lock.yaml
│   ├── public
│   │   └── pic
│   │   └── uncompiled.png
│   ├── src
│   │   ├── App.tsx
│   │   ├── main.tsx
│   │   ├── openapi.d.ts
│   │   ├── Pages
│   │   │   ├── HistoryPage.tsx
│   │   │   ├── Index.tsx
│   │   │   ├── InputField.tsx
│   │   │   ├── Loader.tsx
│   │   │   └── OutputField.tsx
│   │   └── vite-env.d.ts
│   ├── tsconfig.json
│   ├── tsconfig.node.json
│   └── vite.config.ts
├── Controllers
│   ├── CompilerController.cs
│   └── FileController.cs
├── DataTransferObjects
│   ├── CompileResponse.cs
│   └── SourceCode.cs
├── Dockerfile
├── Entities
│   └── CompileResult.cs
├── Extensions
│   └── ServiceCollectionExtensions.cs
├── Models
│   ├── Brush.cs
│   ├── CodeReader.cs
│   ├── CompilerLogger.cs
│   └── PresentableTreeNode.cs
├── Program.cs
├── Properties
│   └── launchSettings.json
├── Services
│   ├── CompileDbContext.cs
│   ├── CompilerService.cs
│   ├── DatabaseSetupService.cs
│   ├── GridFsService.cs
│   └── SyntaxTreePresentationService.cs
└── wwwroot
\end{verbatim}
\subsection{Canon.Generator项目}
\begin{verbatim}
.
├── Canon.Generator.csproj
├── Extensions
│   └── RootCommandExtension.cs
├── GrammarGenerator
│   ├── GenerateCommand.cs
│   ├── GeneratedGrammarParser.cs
│   ├── GeneratedTransformer.cs
│   └── GrammarExtensions.cs
├── Program.cs
└── SyntaxVisitorGenerator
└── SyntaxVisitorGenerator.cs
\end{verbatim}
\subsection{Canon.Tests项目}
\begin{verbatim}
.
├── Canon.Tests.csproj
├── GeneratedParserTests
│   └── GenerateParserTests.cs
├── GlobalUsings.cs
├── GrammarParserTests
│   ├── PascalGrammarFailedTests.cs
│   ├── PascalGrammarTests.cs
│   ├── SimpleGrammarTests.cs
│   ├── SimpleGrammarWithEmptyTests.cs
│   └── TerminatorTests.cs
├── LexicalParserTests
│   ├── CharacterTypeTests.cs
│   ├── DelimiterTests.cs
│   ├── ErrorSingleTests.cs
│   ├── IndentifierTypeTests.cs
│   ├── KeywordTypeTests.cs
│   ├── LexicalFileTests.cs
│   ├── NumberTests.cs
│   └── OperatorTypeTests.cs
├── SemanticTests
│   ├── ConstValueTests.cs
│   ├── PascalTypeTests.cs
│   ├── SymbolTableTests.cs
│   ├── SyntaxTreeTravellerTests.cs
│   ├── Tests.cs
│   └── TypeCheckVisitorTests.cs
└── Utils
├── CompilerHelpers.cs
├── EnumerableExtensions.cs
├── SampleSyntaxTreeVisitor.cs
├── StringSourceReader.cs
├── StringSourceReaderTests.cs
└── TestLogger.cs
\end{verbatim}
\end{document}

110
docs/contents/summary.tex Normal file
View File

@ -0,0 +1,110 @@
\documentclass[../main.tex]{subfiles}
\begin{document}
\section{课程设计总结}
% 1) 体会/收获(每个成员完成的工作、收获等)
% 2) 设计过程中遇到或存在的主要问题及解决方案
% 3) 改进建议
\subsection{成员分工}
\begin{table}[htbp]
\centering
\begin{tabular}{|l|l|}
\hline
\textbf{姓名} & \textbf{分工} \\
\hline
任昌骏 & 组长,主持开发工作,负责编译器的总体设计 \\
\hline
张弈纶 & 负责语法分析和类型检查部分的开发,前端界面的搭建 \\
\hline
兰建国 & 类型系统、符号表和代码生成部分的开发 \\
\hline
肖可扬 & 词法令牌、词法分析器的设计与实现 \\
\hline
杜含韵 & 词法分析和语法分析单元测试的编写 \\
\hline
陈劲淞 & 撰写课程设计文档 \\
\hline
\end{tabular}
\caption{成员分工表}
\label{tab:my_label}
\end{table}
\subsection{体会与收获}
\subsubsection{张弈纶}
这次课设我主要负责语法的定义引入、语义分析中代码类型检查和前端可视化落地。上学期我仅仅是完成了课程要求的词法分析和语法分析实验对语义分析和其中的代码类型检查部分没有实际的了解。通过这次课程设计我充分理解了代码类型检查的必要性和其实际运作流程。并且我通过这次的编码了解了c\#访问者模式和事件机制的使用,对我来说是一次开阔眼界的过程,收获颇多。
编码过程中,我经历了多次代码重构和迭代更新,这让我充分认识到代码质量对编码效率的影响。同时,对于一些代码设计,也重复验证了很多次。这给我很深的体会,在今后的程序设计中要做好规划,提高代码的鲁棒性与可读性。
这次课设还让我培养了一定的文献调查能力。对于语法树的绘制,我参考了相关论文并进行了复现,这也让我锻炼了论文的调研能力。同时,这次课设也让我加深了团队合作的精神,培养了团结协作的能力。
\subsubsection{兰建国}
本次课程设计我主要负责代码生成部分在此过程中我学到了很多。首先我对代码生成的过程有了更深入的理解。在一开始我以为代码生成就是机械地将Pascal-S代码翻译到C语言代码但是在动手编码之后发现困难重重。在经过几次代码重构和迭代之后我反应过来发现是设计方面的缺陷。在开始的设计中我采取的是一种"遍历到哪,翻译到哪"的设计。但实际上应该在语法树上收集够了相应信息之后才进行代码的生成。其次,我体会到了三地址代码的便捷性。在引入三地址代码之前,很多语句的代码生成很难进行,在引入三地址代码之后,代码生成的过程也更加清晰,程序的可扩展性也大大增加。此外,我还感受到了在开发过程的不稳定性,在实际开发过程中,设计方案需要不断调整,以适应各种变化。最后,此次课设还让我感受到了团队交流的重要性,通过在发现问题时及时沟通,提高了我们的开发效率。
\subsubsection{任昌骏}
在这次的课程设计中我有幸担任组长,负责整个编译器的总体设计。站在现在的角度上看来,当时选择整个编译器的设计和实现不依赖与传统的工具而是完全手动实现是非常冒险的,并且在语言选型方面也非常的``激进''。所幸在全组同学的通力配合以及王老师和助教同学的大力支持下顺利完成了。
这次的课程设计也算是我个人能力上的一次突破。在以往的项目经历中,很少有像``设计一个编译器''这样一个在算法设计和软件设计上都非常具有挑战性的课题。也正是有了这个机会使我得以将过去几年学到的各种知识和技能融会贯通无论是按照编译课本上的描述实现词法分析和语法分析的相关算法还是考古论文实现树的可视化绘制都是对于我算法能力的考验亦或是使用访问者模式在语法树节点上扩展各种功能还是通过事件的机制抽象同一个语法树节点可能使用的多个不同的生成式都是对我软件工程能力的挑战。而且编译原理课程设计作为大学少数几门要求由5至7人协作完成的课程设计也进一步锻炼了我组织小组合作的能力。无论是复杂项目的\texttt{Git}管理还是使用\texttt{CI/CD}实现持续测试和快速部署,都是我在个人项目之中难以接触到的东西。
最后还是非常感谢老师和各位同学能给我这样一个锻炼自己和提高自己的机会,在未来我一定认真复盘这次课程设计中的得与失,进一步的提高自己的个人能力。
\subsubsection{肖可扬}
这次课设我主要负责词法分析器的编写在上学期实验的基础上此次课设的词法分析器更接近真实场景需要处理更多与Pascal语法特性相关的内容所以在前期调研方面我们首先形成了翻译表在这个过程中我感受到了明确需求的重要性。此外这是我第一次使用C\#在纯代码环境中开发软件,和小组同学学习到了许多工具的使用、软件的组织架构以及代码编写规范等内容;在重构自己的代码的过程中,我更好地理解和掌握了面向对象的相关语法以及设计模式。同时,和负责测试的同学进行交流也是宝贵的经验,我意识到了自己在编程方面的严谨性还有待提高,需要更系统地全面地考虑输入的各种情况。在编写程序过程中,我明白了团队交流的意义,在编译的不同流程之间确定接口以及对特殊问题的配合处理需要团队紧密讨论、通力协作,才能提高代码的效率。
\subsubsection{杜含韵}
本次课程设计我主要参与了单元测试的部分编写并在此过程中受益良多。首先是通过此次项目我熟悉了C\#语言特性与.NET框架纠正了先前对git的错误使用帮助拓宽了我的技术认知与技术组成。其次是对Xunit测试框架的熟练使用也使得我在本学期其他课程中操作实践。最后是作为对上学期编译原理理论课程延申而出的课程设计帮助我学会如何将理论转化为实践以及如何克服实践的具体困难。同时我还认识到了测试的编写需要更为明确的对组件任务的了解和认知需要从宏观角度上思考测试的方向和方法组内同学的实践也让我了解到测试载体的多样化。而头歌平台测试集对边界情况的探索也帮助我认识到在思考的完善性上有着诸多不足。最后我最为感激的是组内同学的通力配合和辛苦付出他们在我遇到困难时的耐心解答与帮助使得我受益匪浅。此次学习实践经验也将助力我日后的学习生活行稳致远。
\subsubsection{陈劲淞}
在本次课程设计中,我主要负责撰写项目的文档。这不仅仅包括项目的设计文档,还有整个开发过程的文档记录和最终的报告。通过这个过程,我深刻理解到了文档在软件开发过程中的重要性。良好的文档不仅可以帮助团队成员理解和维护代码,还可以为未来的开发提供参考。
首先,我学习并实践了如何使用\LaTeX 来创建专业的文档。这包括了解其基本语法、文档结构组织、图表和代码的插入等。这些技能的获得,让我在未来的学术写作和报告制作中更加得心应手。
其次团队在开发过程中采用Docker容器化技术这极大地提高了开发环境的一致性和项目的可移植性。通过Docker我们能够确保每个团队成员都在相同的环境中开发和测试减少了环境差异带来的问题。我在文档中详细记录了如何使用Docker来配置和管理我们的开发环境这对于团队成员理解整个系统的部署和运行至关重要。
同时本项目中使用Git进行版本控制和团队协作我负责记录各个分支的合并和版本发布的详细过程确保所有团队成员都能迅速地获取最新的项目状态和历史修改记录。这不仅提高了团队的工作效率也增强了项目的可追溯性。
在编译原理中,词法分析、语法分析和语义分析是构建编译器的重要步骤。在本次课程设计中,我们团队的工作涉及到了这些方面,我作为文档撰写者也深刻地参与其中并从中受益匪浅。通过撰写文档的过程,我加深了对整个项目的理解,提高了与团队成员的交流合作能力,并锻炼了自己的表达和文字组织能力。
此外我参与了单元测试和集成测试的文档撰写记录了测试策略和测试结果。通过Xunit框架进行单元测试以及使用Jenkins进行持续集成我们能够及时发现并解决开发过程中出现的问题保证软件质量。这一过程不仅加深了我对测试理论的理解也提升了我在实际项目中应用测试的能力。
最后,通过这次经验,我认识到了持续学习和自我提升的重要性。未来,我希望能继续提高我的专业技能,尤其是在技术写作和项目管理方面,以便在未来的职业生涯中更好地服务于团队和项目。
\subsection{设计中的主要问题和解决方案}
\paragraph{生成LR(1)分析表耗时较长}
在编译的过程中从原始的语法生成对应的LR(1)分析表是一个时间复杂度较大的工作。经过实际测试生成本课程设计中需要支持的基础语法对应的分析表就需要大约7秒至10秒的时间。
\textbf{解决方案}: 将生成好的LR(1)分析表以C\#源代码的形式直接输出再打包编译到程序中。在输入的Pasccal语法没有变化的情况下不用重复的生成相同的分析表。
\paragraph{语法树的访问者和类型检测访问者}
在编译过程中,管理和遍历语法树对于进行有效的类型检查和语义分析至关重要。传统的遍历方法可能导致代码重复,难以维护,且使用递归进行遍历还可能因为递归深度过深而造成占空间耗尽的运行时错误。
\textbf{解决方案}: 采用访问者设计模式Visitor Pattern来分离数据结构和操作。这使得在不修改语法树结构的情况下添加新的操作变得简单提高了代码的可维护性和扩展性。对于类型检测定义一个专门的类型检测访问者该访问者遍历语法树并对每个节点进行类型验证。
\paragraph{语法中的左递归难以进行类型检查}
在原始需求文档中给定的Pascal语法中存在的大量左递归使得我们在进行语义分析时很难设计出S-属性的翻译方案。
\textbf{解决方案}: 改写文法,消除文法中的左递归,详解\ref{pascal_grammar}节中给出的对应语法和修改说明。
\paragraph{代码生成中涉及的各种困难}
在初始设计面向C语言的代码生成时在翻译循环语句和函数调用语句时遇到了很大的困难因为初始化仍然采用一对一的翻译思想试图将Pascal中的每一个语法结构都翻译到一个对应的语法结构。
\textbf{解决方案}: 借鉴三地址代码将翻译思想设计为翻译到一种使用C语言书写的三地址代码并且大量的使用\texttt{goto}语句和标签,成功地解决了上述问题。
\subsection{改进建议}
\paragraph{提供更为详尽的报错信息} 目前语法分析的报错系统仍然十分的不人性化,仅仅输出了编译器此时希望输入什么样的词法记号。
\paragraph{进行代码优化} 因为在进行代码生成时使用率类似于三地址代码的代码生成形式,因此在进行代码生成会生成大量的冗余变量,造成程序的编译时间和运行占用的内存空间都非常大。
\end{document}

View File

@ -0,0 +1,27 @@
\documentclass[../main.tex]{subfiles}
\begin{document}
\section{课程设计的任务和目标}
课程设计的目标是设计一个针对Pascal-S语言的编译程序使用C语言作为编译器的目标语言。
课程设计的目标是设计并实现一个编译器该编译器能够将Pascal-S语言编写的源代码转换为C语言代码。Pascal-S是Pascal语言的一个子集专门用于教学目的它包含了Pascal语言的核心特性但去除了一些复杂的构造以简化学习和编译过程。
编译器的设计将分为几个主要部分:
\begin{enumerate}
\item \textbf{词法分析器(Lexical Analyzer)}: 该部分将读取源代码并将其分解成一系列的标记tokens这些标记是编译过程中语法分析的基本单位。
\item \textbf{语法分析器(Syntax Analyzer)}: 语法分析器将使用词法分析器提供的标记来构建抽象语法树AST。AST是源代码的树状表示反映了程序的结构。
\item \textbf{语义分析器(Semantic Analyzer)}: 语义分析器将检查AST以确保源代码的逻辑是一致的例如变量的声明与使用是否匹配类型是否兼容等。
\item \textbf{中间代码生成器(Intermediate Code Generator)}: 该部分将AST转换为中间表示IRIR是一种更接近机器语言的代码形式但仍然保畴一定程度的抽象。
\item \textbf{代码优化器(Code Optimizer)}: 代码优化器将对IR进行分析和转换以提高生成的C代码的效率和性能。
\item \textbf{目标代码生成器(Target Code Generator)}: 最后目标代码生成器将把优化后的IR转换为C语言代码这是编译过程的最终产物。
\end{enumerate}
此外,编译器还将包括错误处理机制,以便在编译过程中捕捉并报告错误,帮助用户理解并修正源代码中的问题。
整个编译器的设计将遵循模块化原则每个部分都将有明确的接口和职责以便于测试和维护。我们还将使用C语言的特性如指针和结构体来高效地实现编译器的各个组成部分。
最终我们的目标是实现一个健壮的编译器它不仅能够正确地将Pascal-S代码转换为C代码而且还能够提供有用的错误信息帮助用户改进他们的源代码。
\end{document}

143
docs/main.tex Normal file
View File

@ -0,0 +1,143 @@
\documentclass[12pt, a4paper, oneside]{ctexart}
\usepackage{amsmath, amsthm, amssymb, appendix, bm, graphicx, hyperref, mathrsfs, geometry}
\usepackage{float}
\usepackage{subcaption}
\usepackage{listings}
\usepackage{longtable}
\usepackage[dvipsnames]{xcolor}
\usepackage{subfiles}
\usepackage{fontspec}
\usepackage{array}
\linespread{1.5}
\pagestyle{plain}
\geometry{a4paper, scale=0.8}
% 定义书写语法时的listings style
\lstdefinestyle{grammar}{
basicstyle=\ttfamily,
breaklines=true,
mathescape=true,
morekeywords={ProgramStart, ProgramStruct, ProgramHead, ProgramBody, IdList, ConstDeclarations, ConstDeclaration, ConstValue, VarDeclarations, VarDeclaration, Type, BasicType, BasicType, Period, SubprogramDeclarations, Subprogram, SubprogramHead, FormalParameter, ParameterList, Parameter, VarParameter, ValueParameter, SubprogramBody, CompoundStatement, StatementList, Statement, Variable, IdVarPart, ProcedureCall, ElsePart, ExpressionList, Expression, SimpleExpression, Term, Factor, AddOperator, MultiplyOperator, RelationOperator},
keywordstyle=\bfseries\color{NavyBlue},
emphstyle={\bfseries\color{Rhodamine}},
emph={program, id, num, true, false,var, array, of, integer, real, boolean, char, digits, procedure, function, begin, end, assignOp, if, then, for, to ,do ,while, else, not, or, div, mod, and, const, letter}
}
% 定义书写C#时的listings style
\lstdefinestyle{csharp}{
language=[sharp]c,
breaklines=true,
basicstyle=\ttfamily,
keywordstyle=\bfseries\color{violet},
emphstyle=\bfseries\color{blue},
morekeywords={required, get, set, init},
showstringspaces=false,
}
% 定义书写C时的listings style
\lstdefinestyle{c}{
language=c,
breaklines=true,
basicstyle=\ttfamily,
keywordstyle=\bfseries\color{blue},
showstringspaces=false,
}
% 定义书写Pascal时的listings style
\lstdefinestyle{pascal}{
language=Pascal,
breaklines=true,
basicstyle=\ttfamily,
keywordstyle=\bfseries\color{violet},
emphstyle=\bfseries\color{blue},
}
\begin{document}
\begin{titlepage}
% 标题
\begin{center}
\Huge{\textbf{\quad\quad\quad\quad\quad}}
\vspace{2em}
\Large{\textbf{《编译原理与技术课程设计》}}
\Large{\textbf{\qquad}}
\vspace{4em}
\large{指导教师: \underline{\makebox[8em][c]{王雅文}}}
\end{center}
\vspace{6em}
% 个人信息
\begin{table}[h]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
\textbf{姓名} & \textbf{班级} & \textbf{学号} & \textbf{备注} \\
\hline
张弈纶 & 2021211308 & 2021211177 & \\
\hline
兰建国 & 2021211308 & 2021211179 & \\
\hline
任昌骏 & 2021211308 & 2021211180 & 组长 \\
\hline
肖可扬 & 2021211308 & 2021211186 & \\
\hline
杜含韵 & 2021211308 & 2021211188 & \\
\hline
陈劲淞 & 2020219308 & 2018211608 & \\
\hline
\end{tabular}
\end{table}
% 封底
\vspace{8em}
\begin{center}
\Large{\textbf{计算机学院(国家示范性软件学院)}}
\Large{2024年5月}
\end{center}
\end{titlepage}
\clearpage
% 目录
% 目录的页码和正文的页码不一致
\pagenumbering{Roman}
\setcounter{page}{1}
\tableofcontents
\clearpage
\setcounter{page}{1}
\pagenumbering{arabic}
\begin{center}
\Large{\textbf{Pascal-S 语言编译程序的设计与实现}}
\end{center}
\subfile{contents/tasks-and-objectives}
\subfile{contents/requirements-analysis}
\subfile{contents/general-design}
\subfile{contents/detailed-design}
\subfile{contents/source}
\subfile{contents/program-test}
\subfile{contents/summary}
\clearpage
\bibliographystyle{unsrt}
\bibliography{ref}
\end{document}

20
docs/ref.bib Normal file
View File

@ -0,0 +1,20 @@
@incollection{goos_improving_2002,
location = {Berlin, Heidelberg},
title = {Improving Walkers Algorithm to Run in Linear Time},
volume = {2528},
isbn = {978-3-540-00158-4 978-3-540-36151-0},
url = {http://link.springer.com/10.1007/3-540-36151-0_32},
abstract = {The algorithm of Walker [5] is widely used for drawing trees of unbounded degree, and it is widely assumed to run in linear time, as the author claims in his article. But the presented algorithm clearly needs quadratic runtime. We explain the reasons for that and present a revised algorithm that creates the same layouts in linear time.},
pages = {344--353},
booktitle = {Graph Drawing},
publisher = {Springer Berlin Heidelberg},
author = {Buchheim, Christoph and Jünger, Michael and Leipert, Sebastian},
editor = {Goodrich, Michael T. and Kobourov, Stephen G.},
editorb = {Goos, Gerhard and Hartmanis, Juris and Van Leeuwen, Jan},
editorbtype = {redactor},
urldate = {2024-04-14},
date = {2002},
langid = {english},
doi = {10.1007/3-540-36151-0_32},
note = {Series Title: Lecture Notes in Computer Science},
}