From 11a1b2bee2416b3702eba1727db74806240a30b1 Mon Sep 17 00:00:00 2001 From: Arihiro Yoshida Date: Sun, 21 Apr 2024 22:21:30 +0900 Subject: [PATCH] Add a functionality to import external PEG files --- LICENSE | 2 +- README.md | 121 +++-- import/char/ascii_character_group.peg | 35 ++ import/char/unicode_general_category.peg | 54 +++ misc/README.md | 22 + misc/unicode_general_category.py | 136 ++++++ src/packcc.c | 412 +++++++++++++---- tests/import.d/.gitignore | 2 + tests/import.d/check_line_number.py | 76 ++++ tests/import.d/import.bats | 543 +++++++++++++++++++++++ tests/import.d/make_input.py | 48 ++ tests/import.d/reference.peg | 526 ++++++++++++++++++++++ tests/import.d/template.peg | 37 ++ 13 files changed, 1897 insertions(+), 117 deletions(-) create mode 100644 import/char/ascii_character_group.peg create mode 100644 import/char/unicode_general_category.peg create mode 100644 misc/README.md create mode 100644 misc/unicode_general_category.py create mode 100644 tests/import.d/.gitignore create mode 100644 tests/import.d/check_line_number.py create mode 100644 tests/import.d/import.bats create mode 100644 tests/import.d/make_input.py create mode 100644 tests/import.d/reference.peg create mode 100644 tests/import.d/template.peg diff --git a/LICENSE b/LICENSE index ddde94f..1222340 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ PackCC: a packrat parser generator for C. -Copyright (c) 2014, 2019-2022 Arihiro Yoshida. All rights reserved. +Copyright (c) 2014, 2019-2024 Arihiro Yoshida. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 7510d17..0103cec 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# PackCC # +# PackCC -## Overview ## +## Overview **PackCC** is a parser generator for C. Its main features are as follows: @@ -41,14 +41,14 @@ This feature is irrelevant to common users, but helpful for PackCC developers to PackCC itself is under MIT license, but you can distribute your generated code under any license you like. -## Installation ## +## Installation You can obtain the executable `packcc` by compiling [`src/packcc.c`](src/packcc.c) using your favorite C compiler. For convenience, the build environments using GCC, Clang, and Microsoft Visual Studio are prepared under [`build`](build) directory. -### Using GCC ### +### Using GCC -#### Other than MinGW #### +#### Other than MinGW `packcc` will be built in both directories `build/gcc/debug/bin` and `build/gcc/release/bin` using `gcc` by executing the following commands: @@ -60,7 +60,7 @@ make check # bats-core and uncrustify are required (see tests/README.md) `packcc` in the directory `build/gcc/release/bin` is suitable for practical use. -#### MinGW #### +#### MinGW `packcc` will be built in both directories `build/mingw-gcc/debug/bin` and `build/mingw-gcc/release/bin` using `gcc` by executing the following commands: @@ -72,9 +72,9 @@ make check # bats-core and uncrustify are required (see tests/README.md) `packcc` in the directory `build/mingw-gcc/release/bin` is suitable for practical use. -### Using Clang ### +### Using Clang -#### Other than MinGW #### +#### Other than MinGW `packcc` will be built in both directories `build/clang/debug/bin` and `build/clang/release/bin` using `clang` by executing the following commands: @@ -86,7 +86,7 @@ make check # bats-core and uncrustify are required (see tests/README.md) `packcc` in the directory `build/clang/release/bin` is suitable for practical use. -#### MinGW #### +#### MinGW `packcc` will be built in both directories `build/mingw-clang/debug/bin` and `build/mingw-clang/release/bin` using `clang` by executing the following commands: @@ -98,10 +98,11 @@ make check # bats-core and uncrustify are required (see tests/README.md) `packcc` in the directory `build/mingw-clang/release/bin` is suitable for practical use. -### Using Microsoft Visual Studio ### +### Using Microsoft Visual Studio You have to install Microsoft Visual Studio 2019 in advance. After that, you can build `packcc.exe` by the following instructions: + - Open the solution file `build\msvc\msvc.sln`, - Select a preferred solution configuration (*Debug* or *Release*) and a preferred solution platform (*x64* or *x86*), - Invoke the *Build Solution* menu item. @@ -110,12 +111,13 @@ After that, you can build `packcc.exe` by the following instructions: Here, `XXX` is `x64` or `x86`, and `YYY` is `Debug` or `Release`. `packcc.exe` in the directory `build\msvc\XXX\Release` is suitable for practical use. -## Usage ## +## Usage -### Command ### +### Command -You must prepare a PEG source file (see the following section). -Let the file name `example.peg` for example. +You must prepare a PEG source file in advance. +For details of the PEG source syntax, see the section "Syntax". +Here, let the file name `example.peg` for example. ``` packcc example.peg @@ -123,7 +125,7 @@ packcc example.peg By running this, the parser source `example.h` and `example.c` are generated. -If no PEG file name is specified, the PEG source is read from the standard input, and `-.h` and `-.c` are generated. +If no PEG file name is specified, the PEG source is read from the standard input, and `-.h` and `-.c` will be generated. The base name of the parser source files can be changed by `-o` option. @@ -132,6 +134,19 @@ packcc -o parser example.peg ``` By running this, the parser source `parser.h` and `parser.c` are generated. +This option can be specified only once. + +A directory to search for import files can be added by `-I` option (version 2.0.0 or later). +This option can be specified as many times as needed. +The firstly specified directory will be searched first, the secondly specified directory will be searched next, and so on. + +``` +packcc -I foo -I bar/baz example.peg +``` + +By running this, the directory `foo` is searched first, and the directory `bar/baz` is searched next. +The directories specified by this option have higher priority than those specified in the environment variable `PCC_IMPORT_PATH` and the default directories. +For more details of import, see the explanation of `%import` written in the section "Syntax". If you want to disable UTF-8 support, specify the command line option `-a` or `--ascii` (version 1.4.0 or later). @@ -144,7 +159,7 @@ If you want to confirm the version of the `packcc` command, execute the below. packcc -v ``` -### Syntax ### +### Syntax A grammar consists of a set of named rules. A rule definition can be split into multiple lines. @@ -317,37 +332,37 @@ All matched actions are guaranteed to be executed only once. In the action, the C source code can use the predefined variables below. -- **`$$`** +- **`$$`** : The output variable, to which the result of the rule is stored. The data type is the one specified by `%value`. The default data type is `int`. -- **`auxil`** +- **`auxil`** : The user-defined data that has been given via the API function `pcc_create()`. The data type is the one specified by `%auxil`. The default data type is `void *`. -- _variable_ +- _variable_ : The result of another rule that has already been evaluated. If the rule has not been evaluated, it is ensured that the value is zero-cleared (version 1.7.1 or later). The data type is the one specified by `%value`. The default data type is `int`. -- **`$`**_n_ +- **`$`**_n_ : The string of the captured text. The _n_ is the positive integer that corresponds to the order of capturing. The variable `$1` holds the string of the first captured text. -- **`$`**_n_**`s`** +- **`$`**_n_**`s`** : The start position in the input of the captured text, inclusive. The _n_ is the positive integer that corresponds to the order of capturing. The variable `$1s` holds the start position of the first captured text. -- **`$`**_n_**`e`** +- **`$`**_n_**`e`** : The end position in the input of the captured text, exclusive. The _n_ is the positive integer that corresponds to the order of capturing. The variable `$1e` holds the end position of the first captured text. -- **`$0`** +- **`$0`** : The string of the text between the start position in the input at which the rule pattern begins to match and the current position in the input at which the element immediately before the action ends to match. -- **`$0s`** +- **`$0s`** : The start position in the input at which the rule pattern begins to match. -- **`$0e`** +- **`$0e`** : The current position in the input at which the element immediately before the action ends to match. An example is shown below. @@ -390,17 +405,20 @@ rule2 <- (e1 e2 e3) ~{ error("one of e[123] has failed"); } The specified C source code is copied verbatim to the C header file before the generated parser API function declarations. Any braces in the C source code must be properly nested. Note that braces in directive lines and in comments (`/*`...`*/` and `//`...) are appropriately ignored. +When `%header` is used multiple times, the respective C source codes are copied in order of their appearance. **`%source` `{` _c source code_ `}`** The specified C source code is copied verbatim to the C source file before the generated parser implementation code. Any braces in the C source code must be properly nested. Note that braces in directive lines and in comments (`/*`...`*/` and `//`...) are appropriately ignored. +When `%source` is used multiple times, the respective C source codes are copied in order of their appearance. **`%common` `{` _c source code_ `}`** The specified C source code is copied verbatim to both of the C header file and the C source file before the generated parser API function declarations and the implementation code respectively. +This has the same effect as `%header {` _c source code_ `} %source {` _c source code_ `}`. Any braces in the C source code must be properly nested. Note that braces in directive lines and in comments (`/*`...`*/` and `//`...) are appropriately ignored. @@ -419,15 +437,42 @@ This can be useful for example when it is necessary to modify behavior of standa The type of output data, which is output as `$$` in each action and can be retrieved from the parser API function `pcc_parse()`, is changed to the specified one from the default `int`. +This can be used only once and cannot be used in imported files. **`%auxil` `"`_user-defined data type_`"`** The type of user-defined data, which is passed to the parser API function `pcc_create()`, is changed to the specified one from the default `void *`. +This can be used only once and cannot be used in imported files. **`%prefix` `"`_prefix_`"`** The prefix of the parser API functions is changed to the specified one from the default `pcc`. +This can be used only once and cannot be used in imported files. + +**`%import` `"`_import file name_`"`** + +The content of the specified import file is expanded at the text location of `%import` (version 2.0.0 or later). +This can be used multiple times anywhere and can be used also in imported files. +The _import file name_ can be a relative path to the current directory or an absolute path. +If it is a relative path, the directories listed below are searched for the import file in the listed order. + +1. the directory where the file that imports the import file is located +2. the directories specified with `-I` options + - They are prioritized in order of their appearance in the command line. +3. the directories specified by the environment variable `PCC_IMPORT_PATH` + - They are prioritized in order of their appearance in the value of this variable. + - The character used as a delimiter between directory names is the colon `':'` if PackCC is built for a Unix-like platform such as Linux, macOS, and MinGW. + The character is the semicolon `';'` if PackCC is built as a native Windows executable. + (This is exactly the same manner as the environment variable `PATH`.) +4. the per-user default directory + - This is the subdirectory `.packcc/import` in the home directory if PackCC is built for a Unix-like platform, + and in the user profile directory, "`C:\Users\`_username_" for example, if PackCC is built as a native Windows executable. +5. the system-wide default directory + - This is the directory `/usr/share/packcc/import` if PackCC is built for a Unix-like platform, + and is the subdirectory `packcc/import` in the common application data directory, "`C:\ProgramData`" for example. + +Note that the file imported once is silently ignored when it is attempted to be imported again. **`#`_comment_** @@ -440,7 +485,16 @@ All text following `%%` is copied verbatim to the C source file after the genera (The specification is determined by referring to [peg/leg](http://piumarta.com/software/peg/) developed by Ian Piumarta.) -### Macros ### +### Import Files + +The following import files are currently bundled. + +- [`import/char/ascii_character_group.peg`](import/char/ascii_character_group.peg) : + This contains various rules to match an ASCII character belonging to a specific character group. +- [`import/char/unicode_general_category.peg`](import/char/unicode_general_category.peg) : + This contains various rules to match a Unicode character belonging to a specific [general category](https://unicode.org/reports/tr44/#General_Category_Values). + +### Macros Some macros are prepared to customize the parser. The macro definition should be in `%source` section in the PEG source. @@ -560,9 +614,10 @@ For other events, `buffer` and `length` indicate a part of the currently loaded The user-defined data passed to the API function `pcc_create()` can be retrieved from this argument. There are currently three supported events: - - `PCC_DBG_EVALUATE` (= 0) - called when the parser starts to evaluate `rule` - - `PCC_DBG_MATCH` (= 1) - called when `rule` is matched, at which point buffer holds entire matched string - - `PCC_DBG_NOMATCH` (= 2) - called when the parser determines that the input does not match currently evaluated `rule` + +- `PCC_DBG_EVALUATE` (= 0) - called when the parser starts to evaluate `rule` +- `PCC_DBG_MATCH` (= 1) - called when `rule` is matched, at which point buffer holds entire matched string +- `PCC_DBG_NOMATCH` (= 2) - called when the parser determines that the input does not match currently evaluated `rule` A very simple implementation could look like this: @@ -590,7 +645,7 @@ The initial size (the number of elements) of the internal arrays other than the The arrays are expanded as needed. The default is `2`. -### API ### +### API The parser API has only 3 simple functions below. @@ -653,9 +708,9 @@ while (pcc_parse(ctx, &ret)); pcc_destroy(ctx); ``` -## Examples ## +## Examples -### Desktop calculator ### +### Desktop calculator A simple example which provides interactive four arithmetic operations of integers is shown here. Note that **left-recursive** grammar rules are defined in this example. @@ -700,7 +755,7 @@ int main() { } ``` -### AST builder for Tiny-C ### +### AST builder for Tiny-C You can find the more practical example in the directory [`examples/ast-tinyc`](examples/ast-tinyc). It builds an AST (abstract syntax tree) from an input source file diff --git a/import/char/ascii_character_group.peg b/import/char/ascii_character_group.peg new file mode 100644 index 0000000..0347740 --- /dev/null +++ b/import/char/ascii_character_group.peg @@ -0,0 +1,35 @@ +# This file is hereby placed in the public domain. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AS IS AND ANY EXPRESS +# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ASCII_Printable_Character <- ASCII_Special_Character / ASCII_Number / ASCII_Letter +ASCII_Letter <- ASCII_Uppercase_Letter / ASCII_Lowercase_Letter + +ASCII_Control_Character <- [\x00-\x1f\x7f] +ASCII_Special_Character <- [\x20-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e] +ASCII_Number <- [0-9] +ASCII_Uppercase_Letter <- [A-Z] +ASCII_Lowercase_Letter <- [a-z] + +ASCII_C_alnum <- [0-9A-Za-z] +ASCII_C_alpha <- [A-Za-z] +ASCII_C_blank <- [ \t] +ASCII_C_cntrl <- [\x00-\x1f\x7f] +ASCII_C_digit <- [0-9] +ASCII_C_graph <- [\x21-\x7e] +ASCII_C_lower <- [a-z] +ASCII_C_print <- [\x20-\x7e] +ASCII_C_punct <- [\x21-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e] +ASCII_C_space <- [ \t\n\v\f\r] +ASCII_C_upper <- [A-Z] +ASCII_C_xdigit <- [0-9A-Fa-f] diff --git a/import/char/unicode_general_category.peg b/import/char/unicode_general_category.peg new file mode 100644 index 0000000..34461b7 --- /dev/null +++ b/import/char/unicode_general_category.peg @@ -0,0 +1,54 @@ +# This file was generated using the script 'unicode_general_category.py'. + +# This file is hereby placed in the public domain. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AS IS AND ANY EXPRESS +# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Unicode_Letter <- Unicode_Cased_Letter / Unicode_Modifier_Letter / Unicode_Other_Letter +Unicode_Cased_Letter <- Unicode_Uppercase_Letter / Unicode_Lowercase_Letter / Unicode_Titlecase_Letter +Unicode_Mark <- Unicode_Nonspacing_Mark / Unicode_Spacing_Mark / Unicode_Enclosing_Mark +Unicode_Number <- Unicode_Decimal_Number / Unicode_Letter_Number / Unicode_Other_Number +Unicode_Punctuation <- Unicode_Connector_Punctuation / Unicode_Dash_Punctuation / Unicode_Open_Punctuation / Unicode_Close_Punctuation / Unicode_Initial_Punctuation / Unicode_Final_Punctuation / Unicode_Other_Punctuation +Unicode_Symbol <- Unicode_Math_Symbol / Unicode_Currency_Symbol / Unicode_Modifier_Symbol / Unicode_Other_Symbol +Unicode_Separator <- Unicode_Space_Separator / Unicode_Line_Separator / Unicode_Paragraph_Separator +Unicode_Other <- Unicode_Control / Unicode_Format / Unicode_Surrogate / Unicode_Private_Use + +Unicode_Uppercase_Letter <- [\u0041-\u005A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A-\u023B\u023D-\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u0370\u0372\u0376\u037F\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03CF\u03D2-\u03D4\u03D8\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F4\u03F7\u03F9-\u03FA\u03FD-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048A\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C0-\u04C1\u04C3\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F6\u04F8\u04FA\u04FC\u04FE\u0500\u0502\u0504\u0506\u0508\u050A\u050C\u050E\u0510\u0512\u0514\u0516\u0518\u051A\u051C\u051E\u0520\u0522\u0524\u0526\u0528\u052A\u052C\u052E\u0531-\u0556\u10A0-\u10C5\u10C7\u10CD\u13A0-\u13F5\u1C90-\u1CBA\u1CBD-\u1CBF\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFE\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1FB8-\u1FBB\u1FC8-\u1FCB\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFB\u2102\u2107\u210B-\u210D\u2110-\u2112\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u2130-\u2133\u213E-\u213F\u2145\u2183\u2C00-\u2C2F\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E-\u2C80\u2C82\u2C84\u2C86\u2C88\u2C8A\u2C8C\u2C8E\u2C90\u2C92\u2C94\u2C96\u2C98\u2C9A\u2C9C\u2C9E\u2CA0\u2CA2\u2CA4\u2CA6\u2CA8\u2CAA\u2CAC\u2CAE\u2CB0\u2CB2\u2CB4\u2CB6\u2CB8\u2CBA\u2CBC\u2CBE\u2CC0\u2CC2\u2CC4\u2CC6\u2CC8\u2CCA\u2CCC\u2CCE\u2CD0\u2CD2\u2CD4\u2CD6\u2CD8\u2CDA\u2CDC\u2CDE\u2CE0\u2CE2\u2CEB\u2CED\u2CF2\uA640\uA642\uA644\uA646\uA648\uA64A\uA64C\uA64E\uA650\uA652\uA654\uA656\uA658\uA65A\uA65C\uA65E\uA660\uA662\uA664\uA666\uA668\uA66A\uA66C\uA680\uA682\uA684\uA686\uA688\uA68A\uA68C\uA68E\uA690\uA692\uA694\uA696\uA698\uA69A\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D-\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\uA7BA\uA7BC\uA7BE\uA7C0\uA7C2\uA7C4-\uA7C7\uA7C9\uA7D0\uA7D6\uA7D8\uA7F5\uFF21-\uFF3A\u10400-\u10427\u104B0-\u104D3\u10570-\u1057A\u1057C-\u1058A\u1058C-\u10592\u10594-\u10595\u10C80-\u10CB2\u118A0-\u118BF\u16E40-\u16E5F\u1D400-\u1D419\u1D434-\u1D44D\u1D468-\u1D481\u1D49C\u1D49E-\u1D49F\u1D4A2\u1D4A5-\u1D4A6\u1D4A9-\u1D4AC\u1D4AE-\u1D4B5\u1D4D0-\u1D4E9\u1D504-\u1D505\u1D507-\u1D50A\u1D50D-\u1D514\u1D516-\u1D51C\u1D538-\u1D539\u1D53B-\u1D53E\u1D540-\u1D544\u1D546\u1D54A-\u1D550\u1D56C-\u1D585\u1D5A0-\u1D5B9\u1D5D4-\u1D5ED\u1D608-\u1D621\u1D63C-\u1D655\u1D670-\u1D689\u1D6A8-\u1D6C0\u1D6E2-\u1D6FA\u1D71C-\u1D734\u1D756-\u1D76E\u1D790-\u1D7A8\u1D7CA\u1E900-\u1E921] +Unicode_Lowercase_Letter <- [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0560-\u0588\u10D0-\u10FA\u10FD-\u10FF\u13F8-\u13FD\u1C80-\u1C88\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5F\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7BB\uA7BD\uA7BF\uA7C1\uA7C3\uA7C8\uA7CA\uA7D1\uA7D3\uA7D5\uA7D7\uA7D9\uA7F6\uA7FA\uAB30-\uAB5A\uAB60-\uAB68\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A\u10428-\u1044F\u104D8-\u104FB\u10597-\u105A1\u105A3-\u105B1\u105B3-\u105B9\u105BB-\u105BC\u10CC0-\u10CF2\u118C0-\u118DF\u16E60-\u16E7F\u1D41A-\u1D433\u1D44E-\u1D454\u1D456-\u1D467\u1D482-\u1D49B\u1D4B6-\u1D4B9\u1D4BB\u1D4BD-\u1D4C3\u1D4C5-\u1D4CF\u1D4EA-\u1D503\u1D51E-\u1D537\u1D552-\u1D56B\u1D586-\u1D59F\u1D5BA-\u1D5D3\u1D5EE-\u1D607\u1D622-\u1D63B\u1D656-\u1D66F\u1D68A-\u1D6A5\u1D6C2-\u1D6DA\u1D6DC-\u1D6E1\u1D6FC-\u1D714\u1D716-\u1D71B\u1D736-\u1D74E\u1D750-\u1D755\u1D770-\u1D788\u1D78A-\u1D78F\u1D7AA-\u1D7C2\u1D7C4-\u1D7C9\u1D7CB\u1DF00-\u1DF09\u1DF0B-\u1DF1E\u1E922-\u1E943] +Unicode_Titlecase_Letter <- [\u01C5\u01C8\u01CB\u01F2\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FBC\u1FCC\u1FFC] +Unicode_Modifier_Letter <- [\u02B0-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0374\u037A\u0559\u0640\u06E5-\u06E6\u07F4-\u07F5\u07FA\u081A\u0824\u0828\u08C9\u0971\u0E46\u0EC6\u10FC\u17D7\u1843\u1AA7\u1C78-\u1C7D\u1D2C-\u1D6A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\u2D6F\u2E2F\u3005\u3031-\u3035\u303B\u309D-\u309E\u30FC-\u30FE\uA015\uA4F8-\uA4FD\uA60C\uA67F\uA69C-\uA69D\uA717-\uA71F\uA770\uA788\uA7F2-\uA7F4\uA7F8-\uA7F9\uA9CF\uA9E6\uAA70\uAADD\uAAF3-\uAAF4\uAB5C-\uAB5F\uAB69\uFF70\uFF9E-\uFF9F\u10780-\u10785\u10787-\u107B0\u107B2-\u107BA\u16B40-\u16B43\u16F93-\u16F9F\u16FE0-\u16FE1\u16FE3\u1AFF0-\u1AFF3\u1AFF5-\u1AFFB\u1AFFD-\u1AFFE\u1E137-\u1E13D\u1E94B] +Unicode_Other_Letter <- [\u00AA\u00BA\u01BB\u01C0-\u01C3\u0294\u05D0-\u05EA\u05EF-\u05F2\u0620-\u063F\u0641-\u064A\u066E-\u066F\u0671-\u06D3\u06D5\u06EE-\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u0800-\u0815\u0840-\u0858\u0860-\u086A\u0870-\u0887\u0889-\u088E\u08A0-\u08C8\u0904-\u0939\u093D\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u09FC\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0-\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C5D\u0C60-\u0C61\u0C80\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDD-\u0CDE\u0CE0-\u0CE1\u0CF1-\u0CF2\u0D04-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D54-\u0D56\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065-\u1066\u106E-\u1070\u1075-\u1081\u108E\u1100-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u1711\u171F-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17DC\u1820-\u1842\u1844-\u1878\u1880-\u1884\u1887-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1B05-\u1B33\u1B45-\u1B4C\u1B83-\u1BA0\u1BAE-\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C77\u1CE9-\u1CEC\u1CEE-\u1CF3\u1CF5-\u1CF6\u1CFA\u2135-\u2138\u2D30-\u2D67\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u3006\u303C\u3041-\u3096\u309F\u30A1-\u30FA\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BF\u31F0-\u31FF\u3400\u4DBF\u4E00\u9FFF-\uA014\uA016-\uA48C\uA4D0-\uA4F7\uA500-\uA60B\uA610-\uA61F\uA62A-\uA62B\uA66E\uA6A0-\uA6E5\uA78F\uA7F7\uA7FB-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD-\uA8FE\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9E0-\uA9E4\uA9E7-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA6F\uAA71-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5-\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADC\uAAE0-\uAAEA\uAAF2\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF66-\uFF6F\uFF71-\uFF9D\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\u10000-\u1000B\u1000D-\u10026\u10028-\u1003A\u1003C-\u1003D\u1003F-\u1004D\u10050-\u1005D\u10080-\u100FA\u10280-\u1029C\u102A0-\u102D0\u10300-\u1031F\u1032D-\u10340\u10342-\u10349\u10350-\u10375\u10380-\u1039D\u103A0-\u103C3\u103C8-\u103CF\u10450-\u1049D\u10500-\u10527\u10530-\u10563\u10600-\u10736\u10740-\u10755\u10760-\u10767\u10800-\u10805\u10808\u1080A-\u10835\u10837-\u10838\u1083C\u1083F-\u10855\u10860-\u10876\u10880-\u1089E\u108E0-\u108F2\u108F4-\u108F5\u10900-\u10915\u10920-\u10939\u10980-\u109B7\u109BE-\u109BF\u10A00\u10A10-\u10A13\u10A15-\u10A17\u10A19-\u10A35\u10A60-\u10A7C\u10A80-\u10A9C\u10AC0-\u10AC7\u10AC9-\u10AE4\u10B00-\u10B35\u10B40-\u10B55\u10B60-\u10B72\u10B80-\u10B91\u10C00-\u10C48\u10D00-\u10D23\u10E80-\u10EA9\u10EB0-\u10EB1\u10F00-\u10F1C\u10F27\u10F30-\u10F45\u10F70-\u10F81\u10FB0-\u10FC4\u10FE0-\u10FF6\u11003-\u11037\u11071-\u11072\u11075\u11083-\u110AF\u110D0-\u110E8\u11103-\u11126\u11144\u11147\u11150-\u11172\u11176\u11183-\u111B2\u111C1-\u111C4\u111DA\u111DC\u11200-\u11211\u11213-\u1122B\u11280-\u11286\u11288\u1128A-\u1128D\u1128F-\u1129D\u1129F-\u112A8\u112B0-\u112DE\u11305-\u1130C\u1130F-\u11310\u11313-\u11328\u1132A-\u11330\u11332-\u11333\u11335-\u11339\u1133D\u11350\u1135D-\u11361\u11400-\u11434\u11447-\u1144A\u1145F-\u11461\u11480-\u114AF\u114C4-\u114C5\u114C7\u11580-\u115AE\u115D8-\u115DB\u11600-\u1162F\u11644\u11680-\u116AA\u116B8\u11700-\u1171A\u11740-\u11746\u11800-\u1182B\u118FF-\u11906\u11909\u1190C-\u11913\u11915-\u11916\u11918-\u1192F\u1193F\u11941\u119A0-\u119A7\u119AA-\u119D0\u119E1\u119E3\u11A00\u11A0B-\u11A32\u11A3A\u11A50\u11A5C-\u11A89\u11A9D\u11AB0-\u11AF8\u11C00-\u11C08\u11C0A-\u11C2E\u11C40\u11C72-\u11C8F\u11D00-\u11D06\u11D08-\u11D09\u11D0B-\u11D30\u11D46\u11D60-\u11D65\u11D67-\u11D68\u11D6A-\u11D89\u11D98\u11EE0-\u11EF2\u11FB0\u12000-\u12399\u12480-\u12543\u12F90-\u12FF0\u13000-\u1342E\u14400-\u14646\u16800-\u16A38\u16A40-\u16A5E\u16A70-\u16ABE\u16AD0-\u16AED\u16B00-\u16B2F\u16B63-\u16B77\u16B7D-\u16B8F\u16F00-\u16F4A\u16F50\u17000\u187F7\u18800-\u18CD5\u18D00\u18D08\u1B000-\u1B122\u1B150-\u1B152\u1B164-\u1B167\u1B170-\u1B2FB\u1BC00-\u1BC6A\u1BC70-\u1BC7C\u1BC80-\u1BC88\u1BC90-\u1BC99\u1DF0A\u1E100-\u1E12C\u1E14E\u1E290-\u1E2AD\u1E2C0-\u1E2EB\u1E7E0-\u1E7E6\u1E7E8-\u1E7EB\u1E7ED-\u1E7EE\u1E7F0-\u1E7FE\u1E800-\u1E8C4\u1EE00-\u1EE03\u1EE05-\u1EE1F\u1EE21-\u1EE22\u1EE24\u1EE27\u1EE29-\u1EE32\u1EE34-\u1EE37\u1EE39\u1EE3B\u1EE42\u1EE47\u1EE49\u1EE4B\u1EE4D-\u1EE4F\u1EE51-\u1EE52\u1EE54\u1EE57\u1EE59\u1EE5B\u1EE5D\u1EE5F\u1EE61-\u1EE62\u1EE64\u1EE67-\u1EE6A\u1EE6C-\u1EE72\u1EE74-\u1EE77\u1EE79-\u1EE7C\u1EE7E\u1EE80-\u1EE89\u1EE8B-\u1EE9B\u1EEA1-\u1EEA3\u1EEA5-\u1EEA9\u1EEAB-\u1EEBB\u20000\u2A6DF\u2A700\u2B738\u2B740\u2B81D\u2B820\u2CEA1\u2CEB0\u2EBE0\u2F800-\u2FA1D\u30000\u3134A] +Unicode_Nonspacing_Mark <- [\u0300-\u036F\u0483-\u0487\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u0898-\u089F\u08CA-\u08E1\u08E3-\u0902\u093A\u093C\u0941-\u0948\u094D\u0951-\u0957\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u09FE\u0A01-\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A51\u0A70-\u0A71\u0A75\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0AE2-\u0AE3\u0AFA-\u0AFF\u0B01\u0B3C\u0B3F\u0B41-\u0B44\u0B4D\u0B55-\u0B56\u0B62-\u0B63\u0B82\u0BC0\u0BCD\u0C00\u0C04\u0C3C\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C62-\u0C63\u0C81\u0CBC\u0CBF\u0CC6\u0CCC-\u0CCD\u0CE2-\u0CE3\u0D00-\u0D01\u0D3B-\u0D3C\u0D41-\u0D44\u0D4D\u0D62-\u0D63\u0D81\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032-\u1037\u1039-\u103A\u103D-\u103E\u1058-\u1059\u105E-\u1060\u1071-\u1074\u1082\u1085-\u1086\u108D\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1733\u1752-\u1753\u1772-\u1773\u17B4-\u17B5\u17B7-\u17BD\u17C6\u17C9-\u17D3\u17DD\u180B-\u180D\u180F\u1885-\u1886\u18A9\u1920-\u1922\u1927-\u1928\u1932\u1939-\u193B\u1A17-\u1A18\u1A1B\u1A56\u1A58-\u1A5E\u1A60\u1A62\u1A65-\u1A6C\u1A73-\u1A7C\u1A7F\u1AB0-\u1ABD\u1ABF-\u1ACE\u1B00-\u1B03\u1B34\u1B36-\u1B3A\u1B3C\u1B42\u1B6B-\u1B73\u1B80-\u1B81\u1BA2-\u1BA5\u1BA8-\u1BA9\u1BAB-\u1BAD\u1BE6\u1BE8-\u1BE9\u1BED\u1BEF-\u1BF1\u1C2C-\u1C33\u1C36-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE0\u1CE2-\u1CE8\u1CED\u1CF4\u1CF8-\u1CF9\u1DC0-\u1DFF\u20D0-\u20DC\u20E1\u20E5-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302D\u3099-\u309A\uA66F\uA674-\uA67D\uA69E-\uA69F\uA6F0-\uA6F1\uA802\uA806\uA80B\uA825-\uA826\uA82C\uA8C4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA951\uA980-\uA982\uA9B3\uA9B6-\uA9B9\uA9BC-\uA9BD\uA9E5\uAA29-\uAA2E\uAA31-\uAA32\uAA35-\uAA36\uAA43\uAA4C\uAA7C\uAAB0\uAAB2-\uAAB4\uAAB7-\uAAB8\uAABE-\uAABF\uAAC1\uAAEC-\uAAED\uAAF6\uABE5\uABE8\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\u101FD\u102E0\u10376-\u1037A\u10A01-\u10A03\u10A05-\u10A06\u10A0C-\u10A0F\u10A38-\u10A3A\u10A3F\u10AE5-\u10AE6\u10D24-\u10D27\u10EAB-\u10EAC\u10F46-\u10F50\u10F82-\u10F85\u11001\u11038-\u11046\u11070\u11073-\u11074\u1107F-\u11081\u110B3-\u110B6\u110B9-\u110BA\u110C2\u11100-\u11102\u11127-\u1112B\u1112D-\u11134\u11173\u11180-\u11181\u111B6-\u111BE\u111C9-\u111CC\u111CF\u1122F-\u11231\u11234\u11236-\u11237\u1123E\u112DF\u112E3-\u112EA\u11300-\u11301\u1133B-\u1133C\u11340\u11366-\u1136C\u11370-\u11374\u11438-\u1143F\u11442-\u11444\u11446\u1145E\u114B3-\u114B8\u114BA\u114BF-\u114C0\u114C2-\u114C3\u115B2-\u115B5\u115BC-\u115BD\u115BF-\u115C0\u115DC-\u115DD\u11633-\u1163A\u1163D\u1163F-\u11640\u116AB\u116AD\u116B0-\u116B5\u116B7\u1171D-\u1171F\u11722-\u11725\u11727-\u1172B\u1182F-\u11837\u11839-\u1183A\u1193B-\u1193C\u1193E\u11943\u119D4-\u119D7\u119DA-\u119DB\u119E0\u11A01-\u11A0A\u11A33-\u11A38\u11A3B-\u11A3E\u11A47\u11A51-\u11A56\u11A59-\u11A5B\u11A8A-\u11A96\u11A98-\u11A99\u11C30-\u11C36\u11C38-\u11C3D\u11C3F\u11C92-\u11CA7\u11CAA-\u11CB0\u11CB2-\u11CB3\u11CB5-\u11CB6\u11D31-\u11D36\u11D3A\u11D3C-\u11D3D\u11D3F-\u11D45\u11D47\u11D90-\u11D91\u11D95\u11D97\u11EF3-\u11EF4\u16AF0-\u16AF4\u16B30-\u16B36\u16F4F\u16F8F-\u16F92\u16FE4\u1BC9D-\u1BC9E\u1CF00-\u1CF2D\u1CF30-\u1CF46\u1D167-\u1D169\u1D17B-\u1D182\u1D185-\u1D18B\u1D1AA-\u1D1AD\u1D242-\u1D244\u1DA00-\u1DA36\u1DA3B-\u1DA6C\u1DA75\u1DA84\u1DA9B-\u1DA9F\u1DAA1-\u1DAAF\u1E000-\u1E006\u1E008-\u1E018\u1E01B-\u1E021\u1E023-\u1E024\u1E026-\u1E02A\u1E130-\u1E136\u1E2AE\u1E2EC-\u1E2EF\u1E8D0-\u1E8D6\u1E944-\u1E94A\uE0100-\uE01EF] +Unicode_Spacing_Mark <- [\u0903\u093B\u093E-\u0940\u0949-\u094C\u094E-\u094F\u0982-\u0983\u09BE-\u09C0\u09C7-\u09C8\u09CB-\u09CC\u09D7\u0A03\u0A3E-\u0A40\u0A83\u0ABE-\u0AC0\u0AC9\u0ACB-\u0ACC\u0B02-\u0B03\u0B3E\u0B40\u0B47-\u0B48\u0B4B-\u0B4C\u0B57\u0BBE-\u0BBF\u0BC1-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC\u0BD7\u0C01-\u0C03\u0C41-\u0C44\u0C82-\u0C83\u0CBE\u0CC0-\u0CC4\u0CC7-\u0CC8\u0CCA-\u0CCB\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D40\u0D46-\u0D48\u0D4A-\u0D4C\u0D57\u0D82-\u0D83\u0DCF-\u0DD1\u0DD8-\u0DDF\u0DF2-\u0DF3\u0F3E-\u0F3F\u0F7F\u102B-\u102C\u1031\u1038\u103B-\u103C\u1056-\u1057\u1062-\u1064\u1067-\u106D\u1083-\u1084\u1087-\u108C\u108F\u109A-\u109C\u1715\u1734\u17B6\u17BE-\u17C5\u17C7-\u17C8\u1923-\u1926\u1929-\u192B\u1930-\u1931\u1933-\u1938\u1A19-\u1A1A\u1A55\u1A57\u1A61\u1A63-\u1A64\u1A6D-\u1A72\u1B04\u1B35\u1B3B\u1B3D-\u1B41\u1B43-\u1B44\u1B82\u1BA1\u1BA6-\u1BA7\u1BAA\u1BE7\u1BEA-\u1BEC\u1BEE\u1BF2-\u1BF3\u1C24-\u1C2B\u1C34-\u1C35\u1CE1\u1CF7\u302E-\u302F\uA823-\uA824\uA827\uA880-\uA881\uA8B4-\uA8C3\uA952-\uA953\uA983\uA9B4-\uA9B5\uA9BA-\uA9BB\uA9BE-\uA9C0\uAA2F-\uAA30\uAA33-\uAA34\uAA4D\uAA7B\uAA7D\uAAEB\uAAEE-\uAAEF\uAAF5\uABE3-\uABE4\uABE6-\uABE7\uABE9-\uABEA\uABEC\u11000\u11002\u11082\u110B0-\u110B2\u110B7-\u110B8\u1112C\u11145-\u11146\u11182\u111B3-\u111B5\u111BF-\u111C0\u111CE\u1122C-\u1122E\u11232-\u11233\u11235\u112E0-\u112E2\u11302-\u11303\u1133E-\u1133F\u11341-\u11344\u11347-\u11348\u1134B-\u1134D\u11357\u11362-\u11363\u11435-\u11437\u11440-\u11441\u11445\u114B0-\u114B2\u114B9\u114BB-\u114BE\u114C1\u115AF-\u115B1\u115B8-\u115BB\u115BE\u11630-\u11632\u1163B-\u1163C\u1163E\u116AC\u116AE-\u116AF\u116B6\u11720-\u11721\u11726\u1182C-\u1182E\u11838\u11930-\u11935\u11937-\u11938\u1193D\u11940\u11942\u119D1-\u119D3\u119DC-\u119DF\u119E4\u11A39\u11A57-\u11A58\u11A97\u11C2F\u11C3E\u11CA9\u11CB1\u11CB4\u11D8A-\u11D8E\u11D93-\u11D94\u11D96\u11EF5-\u11EF6\u16F51-\u16F87\u16FF0-\u16FF1\u1D165-\u1D166\u1D16D-\u1D172] +Unicode_Enclosing_Mark <- [\u0488-\u0489\u1ABE\u20DD-\u20E0\u20E2-\u20E4\uA670-\uA672] +Unicode_Decimal_Number <- [\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19\u104A0-\u104A9\u10D30-\u10D39\u11066-\u1106F\u110F0-\u110F9\u11136-\u1113F\u111D0-\u111D9\u112F0-\u112F9\u11450-\u11459\u114D0-\u114D9\u11650-\u11659\u116C0-\u116C9\u11730-\u11739\u118E0-\u118E9\u11950-\u11959\u11C50-\u11C59\u11D50-\u11D59\u11DA0-\u11DA9\u16A60-\u16A69\u16AC0-\u16AC9\u16B50-\u16B59\u1D7CE-\u1D7FF\u1E140-\u1E149\u1E2F0-\u1E2F9\u1E950-\u1E959\u1FBF0-\u1FBF9] +Unicode_Letter_Number <- [\u16EE-\u16F0\u2160-\u2182\u2185-\u2188\u3007\u3021-\u3029\u3038-\u303A\uA6E6-\uA6EF\u10140-\u10174\u10341\u1034A\u103D1-\u103D5\u12400-\u1246E] +Unicode_Other_Number <- [\u00B2-\u00B3\u00B9\u00BC-\u00BE\u09F4-\u09F9\u0B72-\u0B77\u0BF0-\u0BF2\u0C78-\u0C7E\u0D58-\u0D5E\u0D70-\u0D78\u0F2A-\u0F33\u1369-\u137C\u17F0-\u17F9\u19DA\u2070\u2074-\u2079\u2080-\u2089\u2150-\u215F\u2189\u2460-\u249B\u24EA-\u24FF\u2776-\u2793\u2CFD\u3192-\u3195\u3220-\u3229\u3248-\u324F\u3251-\u325F\u3280-\u3289\u32B1-\u32BF\uA830-\uA835\u10107-\u10133\u10175-\u10178\u1018A-\u1018B\u102E1-\u102FB\u10320-\u10323\u10858-\u1085F\u10879-\u1087F\u108A7-\u108AF\u108FB-\u108FF\u10916-\u1091B\u109BC-\u109BD\u109C0-\u109CF\u109D2-\u109FF\u10A40-\u10A48\u10A7D-\u10A7E\u10A9D-\u10A9F\u10AEB-\u10AEF\u10B58-\u10B5F\u10B78-\u10B7F\u10BA9-\u10BAF\u10CFA-\u10CFF\u10E60-\u10E7E\u10F1D-\u10F26\u10F51-\u10F54\u10FC5-\u10FCB\u11052-\u11065\u111E1-\u111F4\u1173A-\u1173B\u118EA-\u118F2\u11C5A-\u11C6C\u11FC0-\u11FD4\u16B5B-\u16B61\u16E80-\u16E96\u1D2E0-\u1D2F3\u1D360-\u1D378\u1E8C7-\u1E8CF\u1EC71-\u1ECAB\u1ECAD-\u1ECAF\u1ECB1-\u1ECB4\u1ED01-\u1ED2D\u1ED2F-\u1ED3D\u1F100-\u1F10C] +Unicode_Connector_Punctuation <- [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F] +Unicode_Dash_Punctuation <- [\u002D\u058A\u05BE\u1400\u1806\u2010-\u2015\u2E17\u2E1A\u2E3A-\u2E3B\u2E40\u2E5D\u301C\u3030\u30A0\uFE31-\uFE32\uFE58\uFE63\uFF0D\u10EAD] +Unicode_Open_Punctuation <- [\u0028\u005B\u007B\u0F3A\u0F3C\u169B\u201A\u201E\u2045\u207D\u208D\u2308\u230A\u2329\u2768\u276A\u276C\u276E\u2770\u2772\u2774\u27C5\u27E6\u27E8\u27EA\u27EC\u27EE\u2983\u2985\u2987\u2989\u298B\u298D\u298F\u2991\u2993\u2995\u2997\u29D8\u29DA\u29FC\u2E22\u2E24\u2E26\u2E28\u2E42\u2E55\u2E57\u2E59\u2E5B\u3008\u300A\u300C\u300E\u3010\u3014\u3016\u3018\u301A\u301D\uFD3F\uFE17\uFE35\uFE37\uFE39\uFE3B\uFE3D\uFE3F\uFE41\uFE43\uFE47\uFE59\uFE5B\uFE5D\uFF08\uFF3B\uFF5B\uFF5F\uFF62] +Unicode_Close_Punctuation <- [\u0029\u005D\u007D\u0F3B\u0F3D\u169C\u2046\u207E\u208E\u2309\u230B\u232A\u2769\u276B\u276D\u276F\u2771\u2773\u2775\u27C6\u27E7\u27E9\u27EB\u27ED\u27EF\u2984\u2986\u2988\u298A\u298C\u298E\u2990\u2992\u2994\u2996\u2998\u29D9\u29DB\u29FD\u2E23\u2E25\u2E27\u2E29\u2E56\u2E58\u2E5A\u2E5C\u3009\u300B\u300D\u300F\u3011\u3015\u3017\u3019\u301B\u301E-\u301F\uFD3E\uFE18\uFE36\uFE38\uFE3A\uFE3C\uFE3E\uFE40\uFE42\uFE44\uFE48\uFE5A\uFE5C\uFE5E\uFF09\uFF3D\uFF5D\uFF60\uFF63] +Unicode_Initial_Punctuation <- [\u00AB\u2018\u201B-\u201C\u201F\u2039\u2E02\u2E04\u2E09\u2E0C\u2E1C\u2E20] +Unicode_Final_Punctuation <- [\u00BB\u2019\u201D\u203A\u2E03\u2E05\u2E0A\u2E0D\u2E1D\u2E21] +Unicode_Other_Punctuation <- [\u0021-\u0023\u0025-\u0027\u002A\u002C\u002E-\u002F\u003A-\u003B\u003F-\u0040\u005C\u00A1\u00A7\u00B6-\u00B7\u00BF\u037E\u0387\u055A-\u055F\u0589\u05C0\u05C3\u05C6\u05F3-\u05F4\u0609-\u060A\u060C-\u060D\u061B\u061D-\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964-\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A-\u0E5B\u0F04-\u0F12\u0F14\u0F85\u0FD0-\u0FD4\u0FD9-\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u166E\u16EB-\u16ED\u1735-\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u1805\u1807-\u180A\u1944-\u1945\u1A1E-\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1B7D-\u1B7E\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E-\u1C7F\u1CC0-\u1CC7\u1CD3\u2016-\u2017\u2020-\u2027\u2030-\u2038\u203B-\u203E\u2041-\u2043\u2047-\u2051\u2053\u2055-\u205E\u2CF9-\u2CFC\u2CFE-\u2CFF\u2D70\u2E00-\u2E01\u2E06-\u2E08\u2E0B\u2E0E-\u2E16\u2E18-\u2E19\u2E1B\u2E1E-\u2E1F\u2E2A-\u2E2E\u2E30-\u2E39\u2E3C-\u2E3F\u2E41\u2E43-\u2E4F\u2E52-\u2E54\u3001-\u3003\u303D\u30FB\uA4FE-\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE-\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E-\uA92F\uA95F\uA9C1-\uA9CD\uA9DE-\uA9DF\uAA5C-\uAA5F\uAADE-\uAADF\uAAF0-\uAAF1\uABEB\uFE10-\uFE16\uFE19\uFE30\uFE45-\uFE46\uFE49-\uFE4C\uFE50-\uFE52\uFE54-\uFE57\uFE5F-\uFE61\uFE68\uFE6A-\uFE6B\uFF01-\uFF03\uFF05-\uFF07\uFF0A\uFF0C\uFF0E-\uFF0F\uFF1A-\uFF1B\uFF1F-\uFF20\uFF3C\uFF61\uFF64-\uFF65\u10100-\u10102\u1039F\u103D0\u1056F\u10857\u1091F\u1093F\u10A50-\u10A58\u10A7F\u10AF0-\u10AF6\u10B39-\u10B3F\u10B99-\u10B9C\u10F55-\u10F59\u10F86-\u10F89\u11047-\u1104D\u110BB-\u110BC\u110BE-\u110C1\u11140-\u11143\u11174-\u11175\u111C5-\u111C8\u111CD\u111DB\u111DD-\u111DF\u11238-\u1123D\u112A9\u1144B-\u1144F\u1145A-\u1145B\u1145D\u114C6\u115C1-\u115D7\u11641-\u11643\u11660-\u1166C\u116B9\u1173C-\u1173E\u1183B\u11944-\u11946\u119E2\u11A3F-\u11A46\u11A9A-\u11A9C\u11A9E-\u11AA2\u11C41-\u11C45\u11C70-\u11C71\u11EF7-\u11EF8\u11FFF\u12470-\u12474\u12FF1-\u12FF2\u16A6E-\u16A6F\u16AF5\u16B37-\u16B3B\u16B44\u16E97-\u16E9A\u16FE2\u1BC9F\u1DA87-\u1DA8B\u1E95E-\u1E95F] +Unicode_Math_Symbol <- [\u002B\u003C-\u003E\u007C\u007E\u00AC\u00B1\u00D7\u00F7\u03F6\u0606-\u0608\u2044\u2052\u207A-\u207C\u208A-\u208C\u2118\u2140-\u2144\u214B\u2190-\u2194\u219A-\u219B\u21A0\u21A3\u21A6\u21AE\u21CE-\u21CF\u21D2\u21D4\u21F4-\u22FF\u2320-\u2321\u237C\u239B-\u23B3\u23DC-\u23E1\u25B7\u25C1\u25F8-\u25FF\u266F\u27C0-\u27C4\u27C7-\u27E5\u27F0-\u27FF\u2900-\u2982\u2999-\u29D7\u29DC-\u29FB\u29FE-\u2AFF\u2B30-\u2B44\u2B47-\u2B4C\uFB29\uFE62\uFE64-\uFE66\uFF0B\uFF1C-\uFF1E\uFF5C\uFF5E\uFFE2\uFFE9-\uFFEC\u1D6C1\u1D6DB\u1D6FB\u1D715\u1D735\u1D74F\u1D76F\u1D789\u1D7A9\u1D7C3\u1EEF0-\u1EEF1] +Unicode_Currency_Symbol <- [\u0024\u00A2-\u00A5\u058F\u060B\u07FE-\u07FF\u09F2-\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u17DB\u20A0-\u20C0\uA838\uFDFC\uFE69\uFF04\uFFE0-\uFFE1\uFFE5-\uFFE6\u11FDD-\u11FE0\u1E2FF\u1ECB0] +Unicode_Modifier_Symbol <- [\u005E\u0060\u00A8\u00AF\u00B4\u00B8\u02C2-\u02C5\u02D2-\u02DF\u02E5-\u02EB\u02ED\u02EF-\u02FF\u0375\u0384-\u0385\u0888\u1FBD\u1FBF-\u1FC1\u1FCD-\u1FCF\u1FDD-\u1FDF\u1FED-\u1FEF\u1FFD-\u1FFE\u309B-\u309C\uA700-\uA716\uA720-\uA721\uA789-\uA78A\uAB5B\uAB6A-\uAB6B\uFBB2-\uFBC2\uFF3E\uFF40\uFFE3\u1F3FB-\u1F3FF] +Unicode_Other_Symbol <- [\u00A6\u00A9\u00AE\u00B0\u0482\u058D-\u058E\u060E-\u060F\u06DE\u06E9\u06FD-\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE-\u0FCF\u0FD5-\u0FD8\u109E-\u109F\u1390-\u1399\u166D\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100-\u2101\u2103-\u2106\u2108-\u2109\u2114\u2116-\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A-\u213B\u214A\u214C-\u214D\u214F\u218A-\u218B\u2195-\u2199\u219C-\u219F\u21A1-\u21A2\u21A4-\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0-\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45-\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B97-\u2BFF\u2CE5-\u2CEA\u2E50-\u2E51\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012-\u3013\u3020\u3036-\u3037\u303E-\u303F\u3190-\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836-\uA837\uA839\uAA77-\uAA79\uFD40-\uFD4F\uFDCF\uFDFD-\uFDFF\uFFE4\uFFE8\uFFED-\uFFEE\uFFFC-\uFFFD\u10137-\u1013F\u10179-\u10189\u1018C-\u1018E\u10190-\u1019C\u101A0\u101D0-\u101FC\u10877-\u10878\u10AC8\u1173F\u11FD5-\u11FDC\u11FE1-\u11FF1\u16B3C-\u16B3F\u16B45\u1BC9C\u1CF50-\u1CFC3\u1D000-\u1D0F5\u1D100-\u1D126\u1D129-\u1D164\u1D16A-\u1D16C\u1D183-\u1D184\u1D18C-\u1D1A9\u1D1AE-\u1D1EA\u1D200-\u1D241\u1D245\u1D300-\u1D356\u1D800-\u1D9FF\u1DA37-\u1DA3A\u1DA6D-\u1DA74\u1DA76-\u1DA83\u1DA85-\u1DA86\u1E14F\u1ECAC\u1ED2E\u1F000-\u1F02B\u1F030-\u1F093\u1F0A0-\u1F0AE\u1F0B1-\u1F0BF\u1F0C1-\u1F0CF\u1F0D1-\u1F0F5\u1F10D-\u1F1AD\u1F1E6-\u1F202\u1F210-\u1F23B\u1F240-\u1F248\u1F250-\u1F251\u1F260-\u1F265\u1F300-\u1F3FA\u1F400-\u1F6D7\u1F6DD-\u1F6EC\u1F6F0-\u1F6FC\u1F700-\u1F773\u1F780-\u1F7D8\u1F7E0-\u1F7EB\u1F7F0\u1F800-\u1F80B\u1F810-\u1F847\u1F850-\u1F859\u1F860-\u1F887\u1F890-\u1F8AD\u1F8B0-\u1F8B1\u1F900-\u1FA53\u1FA60-\u1FA6D\u1FA70-\u1FA74\u1FA78-\u1FA7C\u1FA80-\u1FA86\u1FA90-\u1FAAC\u1FAB0-\u1FABA\u1FAC0-\u1FAC5\u1FAD0-\u1FAD9\u1FAE0-\u1FAE7\u1FAF0-\u1FAF6\u1FB00-\u1FB92\u1FB94-\u1FBCA] +Unicode_Space_Separator <- [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] +Unicode_Line_Separator <- [\u2028] +Unicode_Paragraph_Separator <- [\u2029] +Unicode_Control <- [\u0000-\u001F\u007F-\u009F] +Unicode_Format <- [\u00AD\u0600-\u0605\u061C\u06DD\u070F\u0890-\u0891\u08E2\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\uFEFF\uFFF9-\uFFFB\u110BD\u110CD\u13430-\u13438\u1BCA0-\u1BCA3\u1D173-\u1D17A\uE0001\uE0020-\uE007F] +Unicode_Surrogate <- [\uD800\uDB7F-\uDB80\uDBFF-\uDC00\uDFFF] +Unicode_Private_Use <- [\uE000\uF8FF\uF0000\uFFFFD\u100000\u10FFFD] diff --git a/misc/README.md b/misc/README.md new file mode 100644 index 0000000..79518ec --- /dev/null +++ b/misc/README.md @@ -0,0 +1,22 @@ +# Miscellaneous Tools + +## Overview + +In this directory, miscellaneous tools shown below are stored. + +## Tools + +### `unicode_general_category.py` + +#### Synopsis + +A Python script to generate a PEG file defining rules to categorize Unicode characters. +It needs internet access for fetching Unicode data from https://www.unicode.org/ . + +The Python module `requests` is required. + +#### Usage + +~~~sh +$ python unicode_general_category.py > ../import/char/unicode_general_category.peg +~~~ diff --git a/misc/unicode_general_category.py b/misc/unicode_general_category.py new file mode 100644 index 0000000..e9cd99b --- /dev/null +++ b/misc/unicode_general_category.py @@ -0,0 +1,136 @@ +#!/usr/bin/python3 + +# Copyright (c) 2024 Arihiro Yoshida. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os +import requests +import re + +ucd_url = 'https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt' +ucd_gc_list = [ + 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Me', 'Nd', 'Nl', 'No', + 'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'Sm', 'Sc', 'Sk', 'So', + 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Cs', 'Co', 'Cn' +] +ucd_gc_dict = { + 'Lu': 'Uppercase_Letter', + 'Ll': 'Lowercase_Letter', + 'Lt': 'Titlecase_Letter', + 'Lm': 'Modifier_Letter', + 'Lo': 'Other_Letter', + 'Mn': 'Nonspacing_Mark', + 'Mc': 'Spacing_Mark', + 'Me': 'Enclosing_Mark', + 'Nd': 'Decimal_Number', + 'Nl': 'Letter_Number', + 'No': 'Other_Number', + 'Pc': 'Connector_Punctuation', + 'Pd': 'Dash_Punctuation', + 'Ps': 'Open_Punctuation', + 'Pe': 'Close_Punctuation', + 'Pi': 'Initial_Punctuation', + 'Pf': 'Final_Punctuation', + 'Po': 'Other_Punctuation', + 'Sm': 'Math_Symbol', + 'Sc': 'Currency_Symbol', + 'Sk': 'Modifier_Symbol', + 'So': 'Other_Symbol', + 'Zs': 'Space_Separator', + 'Zl': 'Line_Separator', + 'Zp': 'Paragraph_Separator', + 'Cc': 'Control', + 'Cf': 'Format', + 'Cs': 'Surrogate', + 'Co': 'Private_Use', + 'Cn': 'Unassigned' +} + +def get_unicode_data(): + res = requests.get(ucd_url, stream=True) + res.raise_for_status() + txt = '' + for chunk in res.iter_content(chunk_size=1024*1024): + txt += chunk.decode() + return txt + +def generate_rules(dat): + str = ( + '# This file was generated using the script \'' + os.path.basename(__file__) + '\'.\n' + '\n' + '# This file is hereby placed in the public domain.\n' + '#\n' + '# THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS\n' + '# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n' + '# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n' + '# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE\n' + '# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n' + '# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n' + '# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\n' + '# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n' + '# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\n' + '# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,\n' + '# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n' + '\n' + 'Unicode_Letter <- Unicode_Cased_Letter / Unicode_Modifier_Letter / Unicode_Other_Letter\n' + 'Unicode_Cased_Letter <- Unicode_Uppercase_Letter / Unicode_Lowercase_Letter / Unicode_Titlecase_Letter\n' + 'Unicode_Mark <- Unicode_Nonspacing_Mark / Unicode_Spacing_Mark / Unicode_Enclosing_Mark\n' + 'Unicode_Number <- Unicode_Decimal_Number / Unicode_Letter_Number / Unicode_Other_Number\n' + 'Unicode_Punctuation <- Unicode_Connector_Punctuation / Unicode_Dash_Punctuation / Unicode_Open_Punctuation / Unicode_Close_Punctuation / Unicode_Initial_Punctuation / Unicode_Final_Punctuation / Unicode_Other_Punctuation\n' + 'Unicode_Symbol <- Unicode_Math_Symbol / Unicode_Currency_Symbol / Unicode_Modifier_Symbol / Unicode_Other_Symbol\n' + 'Unicode_Separator <- Unicode_Space_Separator / Unicode_Line_Separator / Unicode_Paragraph_Separator\n' + 'Unicode_Other <- Unicode_Control / Unicode_Format / Unicode_Surrogate / Unicode_Private_Use\n' # The category 'Unassigned' is excluded because currently it has no character. + '\n' + ) + for gc in ucd_gc_list: + if gc not in dat: + continue + cc = '' + cs = '' + cp = '' + for c in dat[gc]: + if cs == '': + cs = c + elif int(c, 16) - int(cp, 16) != 1: + cc += '\\u' + cs + if cs != cp: + cc += '-\\u' + cp + cs = c + cp = c + if cs != '': + cc += '\\u' + cs + if cs != cp: + cc += '-\\u' + cp + str += 'Unicode_' + ucd_gc_dict[gc] + ' <- [' + cc + ']\n' + return str + +def main(): + pat = re.compile(r'^([0-9a-fA-F]+);[^;]*;([0-9a-zA-Z_]+);') + dat = {} + for ent in get_unicode_data().splitlines(): + res = pat.search(ent) + cat = res.group(2) + if cat not in dat: + dat[cat] = [] + dat[cat] += [res.group(1)] + print(generate_rules(dat), end='') + +if __name__ == '__main__': + main() diff --git a/src/packcc.c b/src/packcc.c index 2ddcb02..6293978 100644 --- a/src/packcc.c +++ b/src/packcc.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #ifndef _MSC_VER @@ -68,10 +68,19 @@ static size_t strnlen_(const char *str, size_t maxlen) { #ifdef _WIN32 /* Windows including MSVC and MinGW */ #include /* _get_osfhandle() */ /* NOTE: The header "fileapi.h" causes a compiler error due to an illegal anonymous union. */ +#define DECLSPEC_IMPORT __declspec(dllimport) #define WINAPI __stdcall +#define S_OK 0 +#define CSIDL_PROFILE 0x0028 +#define CSIDL_COMMON_APPDATA 0x0023 +#define SHGFP_TYPE_DEFAULT 1 +#define MAX_PATH 260 typedef int BOOL; typedef unsigned long DWORD; +typedef char *LPSTR; +typedef long HRESULT; typedef void *HANDLE; +typedef void *HWND; typedef struct _FILETIME { DWORD dwLowDateTime; DWORD dwHighDateTime; @@ -88,7 +97,8 @@ typedef struct _BY_HANDLE_FILE_INFORMATION { DWORD nFileIndexHigh; DWORD nFileIndexLow; } BY_HANDLE_FILE_INFORMATION, *LPBY_HANDLE_FILE_INFORMATION; -BOOL WINAPI GetFileInformationByHandle(HANDLE hFile, LPBY_HANDLE_FILE_INFORMATION lpFileInformation); +DECLSPEC_IMPORT BOOL WINAPI GetFileInformationByHandle(HANDLE hFile, LPBY_HANDLE_FILE_INFORMATION lpFileInformation); +DECLSPEC_IMPORT HRESULT WINAPI SHGetFolderPathA(HWND hwnd, int csidl, HANDLE hToken, DWORD dwFlags, LPSTR pszPath); #else /* !_WIN32 */ #include /* for fstat() */ #endif @@ -100,6 +110,24 @@ BOOL WINAPI GetFileInformationByHandle(HANDLE hFile, LPBY_HANDLE_FILE_INFORMATIO #undef TRUE /* to avoid macro definition conflicts with the system header file of IBM AIX */ #undef FALSE +#ifdef _MSC_VER +#define IMPORT_DIR_SYSTEM "packcc/import" /* should be a relative path */ +#else +#define IMPORT_DIR_SYSTEM "/usr/share/packcc/import" /* should be an absolute path */ +#endif + +#define IMPORT_DIR_USER ".packcc/import" + +#ifdef _WIN32 /* Windows including MSVC and MinGW (MinGW automatically converts paths to those in Windows style) */ +#define PATH_SEP ';' +#else +#define PATH_SEP ':' +#endif + +#define ENVVAR_IMPORT_PATH "PCC_IMPORT_PATH" + +#define WEBSITE "https://github.com/arithy/packcc" + #define VERSION "2.0.0" #ifndef BUFFER_MIN_SIZE @@ -157,6 +185,12 @@ typedef struct char_array_tag { size_t len; } char_array_t; +typedef struct string_array_tag { + char **buf; + size_t max; + size_t len; +} string_array_t; + typedef struct code_block_tag { char *text; size_t len; @@ -330,6 +364,7 @@ typedef struct context_tag { char *vtype; /* the type name of the data output by the parsing API function (NULL means the default) */ char *atype; /* the type name of the user-defined data passed to the parser creation API function (NULL means the default) */ char *prefix; /* the prefix of the API function names (NULL means the default) */ + const string_array_t *dirs; /* the path names of directories to search for import files */ options_t opts; /* the options */ code_flag_t flags; /* the bitwise flags to control code generation; updated during PEG parsing */ size_t errnum; /* the current number of PEG parsing errors */ @@ -1157,6 +1192,36 @@ static void stream__write_footer(stream_t *stream, const char *ptr, size_t len, } } +static char *get_home_directory(void) { +#ifdef _MSC_VER + char s[MAX_PATH]; + return (SHGetFolderPathA(NULL, CSIDL_PROFILE, NULL, SHGFP_TYPE_DEFAULT, s) == S_OK) ? strdup_e(s) : NULL; +#else + const char *const s = getenv("HOME"); + return (s && s[0]) ? strdup_e(s) : NULL; +#endif +} + +#ifdef _MSC_VER + +static char *get_appdata_directory(void) { + char s[MAX_PATH]; + return (SHGetFolderPathA(NULL, CSIDL_COMMON_APPDATA, NULL, SHGFP_TYPE_DEFAULT, s) == S_OK) ? strdup_e(s) : NULL; +} + +#endif /* _MSC_VER */ + +static bool_t is_absolute_path(const char *path) { +#ifdef _WIN32 + return ( + path[0] == '\\' || + (((path[0] >= 'A' && path[0] <= 'Z') || (path[0] >= 'a' && path[0] <= 'z')) && path[1] == ':') + ) ? TRUE : FALSE; +#else + return (path[0] == '/') ? TRUE : FALSE; +#endif +} + static const char *extract_filename(const char *path) { size_t i = strlen(path); while (i > 0) { @@ -1170,6 +1235,31 @@ static const char *extract_filename(const char *path) { return path; } +static char *replace_filename(const char *path, const char *name) { + const char *const p = extract_filename(path); + const size_t m = p - path; + const size_t n = strlen(name); + char *const s = (char *)malloc_e(m + n + 1); + memcpy(s, path, m); + memcpy(s + m, name, n + 1); + return s; +} + +static char *add_filename(const char *path, const char *name) { + const size_t m = strlen(path); + const size_t n = strlen(name); +#ifdef _WIN32 + const size_t d = (m > 0 && strchr("/\\:", path[m - 1]) == NULL) ? 1 : 0; +#else + const size_t d = (m > 0 && path[m - 1] != '/') ? 1 : 0; +#endif + char *const s = (char *)malloc_e(m + d + n + 1); + memcpy(s, path, m); + if (d) s[m] = '/'; + memcpy(s + m + d, name, n + 1); + return s; +} + static const char *extract_fileext(const char *path) { const size_t n = strlen(path); size_t i = n; @@ -1253,7 +1343,7 @@ static bool_t file_id_array__add_if_not_yet(file_id_array_t *array, const file_i if (m == 0) m = BUFFER_MIN_SIZE; while (m < n && m != 0) m <<= 1; if (m == 0) m = n; /* in case of shift overflow */ - array->buf = (file_id_t *)realloc_e(array->buf, m); + array->buf = (file_id_t *)realloc_e(array->buf, sizeof(file_id_t) * m); array->max = m; } array->buf[array->len++] = *id; @@ -1287,6 +1377,31 @@ static void char_array__term(char_array_t *array) { free(array->buf); } +static void string_array__init(string_array_t *array) { + array->len = 0; + array->max = 0; + array->buf = NULL; +} + +static void string_array__add(string_array_t *array, const char *str, size_t len) { + if (array->max <= array->len) { + const size_t n = array->len + 1; + size_t m = array->max; + if (m == 0) m = BUFFER_MIN_SIZE; + while (m < n && m != 0) m <<= 1; + if (m == 0) m = n; /* in case of shift overflow */ + array->buf = (char **)realloc_e(array->buf, sizeof(char *) * m); + array->max = m; + } + array->buf[array->len++] = (len == VOID_VALUE) ? strdup_e(str) : strndup_e(str, len); +} + +static void string_array__term(string_array_t *array) { + size_t i; + for (i = 0; i < array->len; i++) free(array->buf[i]); + free(array->buf); +} + static void code_block__init(code_block_t *code) { code->text = NULL; code->len = 0; @@ -1422,14 +1537,15 @@ static bool_t is_in_imported_input(const input_state_t *input) { return input->parent ? TRUE : FALSE; } -static context_t *create_context(const char *ipath, const char *opath, const options_t *opts) { +static context_t *create_context(const char *ipath, const char *opath, const string_array_t *dirs, const options_t *opts) { context_t *const ctx = (context_t *)malloc_e(sizeof(context_t)); ctx->spath = (opath && opath[0]) ? add_fileext(opath, "c") : replace_fileext((ipath && ipath[0]) ? ipath : "-", "c"); ctx->hpath = (opath && opath[0]) ? add_fileext(opath, "h") : replace_fileext((ipath && ipath[0]) ? ipath : "-", "h"); - ctx->hid = strdup_e(ctx->hpath); make_header_identifier(ctx->hid); + ctx->hid = strdup_e(extract_filename(ctx->hpath)); make_header_identifier(ctx->hid); ctx->vtype = NULL; ctx->atype = NULL; ctx->prefix = NULL; + ctx->dirs = dirs; ctx->opts = *opts; ctx->flags = CODE_FLAG__NONE; ctx->errnum = 0; @@ -2893,13 +3009,79 @@ static void parse_file_(context_t *ctx) { bool_t b = TRUE; match_spaces(ctx->input); for (;;) { + char *s = NULL; size_t l, m, n, o; if (match_eof(ctx->input) || parse_footer_(ctx->input, &ctx->fsource)) break; l = ctx->input->linenum; m = column_number(ctx->input); n = ctx->input->charnum; o = ctx->input->linepos; - if ( + if (parse_directive_string_(ctx->input, "%import", &s, STRING_FLAG__NOTEMPTY)) { + if (s) { + if (is_absolute_path(s)) { + FILE *const file = fopen(s, "rb"); + if (file) { + ctx->input = create_input_state(s, file, ctx->input, &ctx->opts); + parse_file_(ctx); + ctx->input = destroy_input_state(ctx->input); + } + else { + if (errno != ENOENT) { + print_error( + "%s:" FMT_LU ":" FMT_LU ": Cannot open file to read: %s\n", + ctx->input->path, (ulong_t)(l + 1), (ulong_t)(m + 1), + s + ); + } + else { + print_error( + "%s:" FMT_LU ":" FMT_LU ": File not found: %s\n", + ctx->input->path, (ulong_t)(l + 1), (ulong_t)(m + 1), + s + ); + } + ctx->input->errnum++; + } + } + else { + size_t i = 0; + char *path = replace_filename(ctx->input->path, s); + FILE *file = fopen(path, "rb"); + while (file == NULL) { + if (errno != ENOENT) { + print_error( + "%s:" FMT_LU ":" FMT_LU ": Cannot open file to read: %s\n", + ctx->input->path, (ulong_t)(l + 1), (ulong_t)(m + 1), + path + ); + ctx->input->errnum++; + break; + } + if (i >= ctx->dirs->len) { + print_error( + "%s:" FMT_LU ":" FMT_LU ": File not found: %s\n", + ctx->input->path, (ulong_t)(l + 1), (ulong_t)(m + 1), + s + ); + ctx->input->errnum++; + break; + } + free(path); + path = add_filename(ctx->dirs->buf[i++], s); + file = fopen(path, "rb"); + } + if (file) { + ctx->input = create_input_state(path, file, ctx->input, &ctx->opts); + parse_file_(ctx); + ctx->input = destroy_input_state(ctx->input); + } + free(path); + } + free(s); + } + b = TRUE; + } + else if ( parse_directive_block_(ctx->input, "%earlysource", &ctx->esource, NULL) || parse_directive_block_(ctx->input, "%earlyheader", &ctx->eheader, NULL) || parse_directive_block_(ctx->input, "%earlycommon", &ctx->esource, &ctx->eheader) || @@ -5319,28 +5501,32 @@ static bool_t generate(context_t *ctx) { static void print_version(FILE *output) { fprintf(output, "%s version %s\n", g_cmdname, VERSION); - fprintf(output, "Copyright (c) 2014, 2019-2022 Arihiro Yoshida. All rights reserved.\n"); + fprintf(output, "Copyright (c) 2014, 2019-2024 Arihiro Yoshida. All rights reserved.\n"); } static void print_usage(FILE *output) { fprintf(output, "Usage: %s [OPTIONS] [FILE]\n", g_cmdname); fprintf(output, "Generates a packrat parser for C.\n"); fprintf(output, "\n"); - fprintf(output, " -o BASENAME specify a base name of output source and header files\n"); + fprintf(output, "Options:\n"); + fprintf(output, " -o BASENAME specify a base name of output source and header files;\n"); + fprintf(output, " can be used only once\n"); + fprintf(output, " -I DIRNAME specify a directory name to search for import files;\n"); + fprintf(output, " can be used as many times as needed to add directories\n"); fprintf(output, " -a, --ascii disable UTF-8 support\n"); fprintf(output, " -l, --lines add #line directives\n"); fprintf(output, " -d, --debug with debug information\n"); fprintf(output, " -h, --help print this help message and exit\n"); fprintf(output, " -v, --version print the version and exit\n"); + fprintf(output, "\n"); + fprintf(output, "Environment Variable:\n"); + fprintf(output, " %s\n", ENVVAR_IMPORT_PATH); + fprintf(output, " specify directory names to search for import files, delimited by '%c'\n", PATH_SEP); + fprintf(output, "\n"); + fprintf(output, "Full documentation at: <%s>\n", WEBSITE); } int main(int argc, char **argv) { - const char *ipath = NULL; - const char *opath = NULL; - options_t opts; - opts.ascii = FALSE; - opts.lines = FALSE; - opts.debug = FALSE; #ifdef _MSC_VER #ifdef _DEBUG _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); @@ -5350,88 +5536,148 @@ int main(int argc, char **argv) { #endif g_cmdname = extract_filename(argv[0]); { - const char *path = NULL; - const char *opt_o = NULL; - bool_t opt_a = FALSE; - bool_t opt_l = FALSE; - bool_t opt_d = FALSE; - bool_t opt_h = FALSE; - bool_t opt_v = FALSE; - int i; - for (i = 1; i < argc; i++) { - if (argv[i][0] != '-') { - break; - } - else if (strcmp(argv[i], "--") == 0) { - i++; break; - } - else if (argv[i][1] == 'o') { - const char *const o = (argv[i][2] != '\0') ? argv[i] + 2 : (++i < argc) ? argv[i] : NULL; - if (o == NULL) { - print_error("Output base name missing\n"); - fprintf(stderr, "\n"); - print_usage(stderr); - exit(1); + const char *ipath = NULL; + const char *opath = NULL; + options_t opts = { 0 }; + string_array_t dirs; + string_array__init(&dirs); + opts.ascii = FALSE; + opts.lines = FALSE; + opts.debug = FALSE; + { + const char *path = NULL; + const char *opt_o = NULL; + bool_t opt_a = FALSE; + bool_t opt_l = FALSE; + bool_t opt_d = FALSE; + bool_t opt_h = FALSE; + bool_t opt_v = FALSE; + int i; + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-') { + break; + } + else if (strcmp(argv[i], "--") == 0) { + i++; break; + } + else if (argv[i][1] == 'I') { + const char *const v = (argv[i][2] != '\0') ? argv[i] + 2 : (++i < argc) ? argv[i] : NULL; + if (v == NULL || v[0] == '\0') { + print_error("Import directory name missing\n"); + fprintf(stderr, "\n"); + print_usage(stderr); + exit(1); + } + string_array__add(&dirs, v, VOID_VALUE); + } + else if (argv[i][1] == 'o') { + const char *const v = (argv[i][2] != '\0') ? argv[i] + 2 : (++i < argc) ? argv[i] : NULL; + if (v == NULL || v[0] == '\0') { + print_error("Output base name missing\n"); + fprintf(stderr, "\n"); + print_usage(stderr); + exit(1); + } + if (opt_o != NULL) { + print_error("Extra output base name: '%s'\n", v); + fprintf(stderr, "\n"); + print_usage(stderr); + exit(1); + } + opt_o = v; + } + else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--ascii") == 0) { + opt_a = TRUE; + } + else if (strcmp(argv[i], "-l") == 0 || strcmp(argv[i], "--lines") == 0) { + opt_l = TRUE; + } + else if (strcmp(argv[i], "-d") == 0 || strcmp(argv[i], "--debug") == 0) { + opt_d = TRUE; + } + else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { + opt_h = TRUE; + } + else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { + opt_v = TRUE; } - if (opt_o != NULL) { - print_error("Extra output base name: '%s'\n", o); + else { + print_error("Invalid option: '%s'\n", argv[i]); fprintf(stderr, "\n"); print_usage(stderr); exit(1); } - opt_o = o; - } - else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--ascii") == 0) { - opt_a = TRUE; - } - else if (strcmp(argv[i], "-l") == 0 || strcmp(argv[i], "--lines") == 0) { - opt_l = TRUE; - } - else if (strcmp(argv[i], "-d") == 0 || strcmp(argv[i], "--debug") == 0) { - opt_d = TRUE; - } - else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { - opt_h = TRUE; } - else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { - opt_v = TRUE; - } - else { - print_error("Invalid option: '%s'\n", argv[i]); + switch (argc - i) { + case 0: + break; + case 1: + path = argv[i]; + break; + default: + print_error("Extra input file: '%s'\n", argv[i + 1]); fprintf(stderr, "\n"); print_usage(stderr); exit(1); } + if (opt_h || opt_v) { + if (opt_v) print_version(stdout); + if (opt_v && opt_h) fprintf(stdout, "\n"); + if (opt_h) print_usage(stdout); + exit(0); + } + ipath = (path && path[0]) ? path : NULL; + opath = (opt_o && opt_o[0]) ? opt_o : NULL; + opts.ascii = opt_a; + opts.lines = opt_l; + opts.debug = opt_d; } - switch (argc - i) { - case 0: - break; - case 1: - path = argv[i]; - break; - default: - print_error("Extra input file: '%s'\n", argv[i + 1]); - fprintf(stderr, "\n"); - print_usage(stderr); - exit(1); + { + const char *const v = getenv(ENVVAR_IMPORT_PATH); + if (v) { + size_t i = 0, h = 0; + for (;;) { + if (v[i] == '\0') { + if (i > h) string_array__add(&dirs, v + h, i - h); + break; + } + else if (v[i] == PATH_SEP) { + if (i > h) string_array__add(&dirs, v + h, i - h); + h = i + 1; + } + i++; + } + } } - if (opt_h || opt_v) { - if (opt_v) print_version(stdout); - if (opt_v && opt_h) fprintf(stdout, "\n"); - if (opt_h) print_usage(stdout); - exit(0); + { + char *const s = get_home_directory(); + if (s) { + char *const t = add_filename(s, IMPORT_DIR_USER); + string_array__add(&dirs, t, VOID_VALUE); + free(t); + free(s); + } } - ipath = (path && path[0]) ? path : NULL; - opath = (opt_o && opt_o[0]) ? opt_o : NULL; - opts.ascii = opt_a; - opts.lines = opt_l; - opts.debug = opt_d; - } - { - context_t *const ctx = create_context(ipath, opath, &opts); - const int b = parse(ctx) && generate(ctx); - destroy_context(ctx); - if (!b) exit(10); + { +#ifdef _MSC_VER + char *const s = get_appdata_directory(); + if (s) { + char *const t = add_filename(s, IMPORT_DIR_SYSTEM); + string_array__add(&dirs, t, VOID_VALUE); + free(t); + free(s); + } +#else + string_array__add(&dirs, IMPORT_DIR_SYSTEM, VOID_VALUE); +#endif + } + { + context_t *const ctx = create_context(ipath, opath, &dirs, &opts); + const int b = parse(ctx) && generate(ctx); + destroy_context(ctx); + if (!b) exit(10); + } + string_array__term(&dirs); } return 0; } diff --git a/tests/import.d/.gitignore b/tests/import.d/.gitignore new file mode 100644 index 0000000..b23b2a6 --- /dev/null +++ b/tests/import.d/.gitignore @@ -0,0 +1,2 @@ +t_*/ +t_*.* diff --git a/tests/import.d/check_line_number.py b/tests/import.d/check_line_number.py new file mode 100644 index 0000000..c63fb5b --- /dev/null +++ b/tests/import.d/check_line_number.py @@ -0,0 +1,76 @@ +#!/usr/bin/python3 + +# Copyright (c) 2024 Arihiro Yoshida. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys +import os +import re + +def main(): + args = sys.argv + if len(args) < 3: + print('Too few arguments') + sys.exit(1) + optp = args.pop(1) if args[1] == '--only-pre' else '' + word = args.pop(1) + path = args.pop(1) + with open(path, 'r') as file: + text = file.read().split('\n') + for i, s in enumerate(text): + if s.find(word) >= 0: + break + if i >= len(text): + print('Keyword not found') + sys.exit(2) + if optp == '': + if i == 0 or i == len(text) - 1: + print('Keyword found in invalid line') + sys.exit(2) + m = re.search(r'^#line ([0-9]+) "(.*)"$', text[i + 1]) + if m is None: + print('#line directive not found one line after keyword') + sys.exit(2) + if int(m.group(1)) - 1 != i + 1: + print('#line directive with inconsistent line number') + sys.exit(2) + if m.group(2) != os.path.abspath(path): + print('#line directive with inconsistent file name') + sys.exit(2) + else: + if i == 0: + print('Keyword found in invalid line') + sys.exit(2) + m = re.search(r'^#line ([0-9]+) "(.*)"$', text[i - 1]) + if m is None: + print('#line directive not found one line before keyword') + sys.exit(2) + with open(m.group(2), 'r') as file: + text = file.read().split('\n') + j = int(m.group(1)) - 1 + if j < 0 or j >= len(text): + print('#line directive with invalid line number') + sys.exit(2) + if text[j].find(word) < 0: + print('#line directive with inconsistent line number') + sys.exit(2) + +if __name__ == '__main__': + main() diff --git a/tests/import.d/import.bats b/tests/import.d/import.bats new file mode 100644 index 0000000..0aa5ed3 --- /dev/null +++ b/tests/import.d/import.bats @@ -0,0 +1,543 @@ +#!/usr/bin/env bats + +load "$TESTDIR/utils.sh" + +make_input() { + "${PYTHON:-python3}" "$BATS_TEST_DIRNAME/make_input.py" "$BATS_TEST_DIRNAME" "$1" "$2" "$3" "$4" "$5" "$6" +} + +make_all_inputs() { + make_input t_input.peg "t_imp_0" "" "t_imp_0.peg" "" "t_dir_1/t_imp_4.peg" + make_input t_imp_0.peg "t_imp_1" "" "t_imp_1.peg" "" "" + make_input t_imp_1.peg "t_imp_2" "" "" "" "t_imp_2.peg" + make_input t_dir_0/t_imp_2.peg "t_imp_3" "" "../t_dir_0/t_imp_3.peg" "" "t_imp_3.peg" + make_input t_dir_0/t_imp_3.peg "t_imp_4" "" "" "" "" + make_input t_dir_1/t_imp_4.peg "t_imp_5" "" "t_dir_1_0/../t_imp_5.peg" "" "t_imp_5.peg" + make_input t_dir_1/t_imp_5.peg "t_imp_6" "t_imp_6.peg" "../t_dir_1/t_dir_1_0/t_imp_7.peg" "" "" + make_input t_dir_1/t_dir_1_0/t_imp_6.peg "t_imp_7" "" "" "" "" + make_input t_dir_1/t_dir_1_0/t_imp_7.peg "t_imp_8" "t_imp_8.peg" "../t_imp_a.peg" "" "" + make_input t_dir_2/t_imp_8.peg "t_imp_9" "" "../t_dir_2/t_imp_9.peg" "" "t_imp_9.peg" + make_input t_dir_2/t_imp_9.peg "t_imp_a" "" "" "" "" + make_input t_dir_3/t_imp_a.peg "t_imp_b" "" "t_dir_3_0/../t_imp_b.peg" "" "t_imp_b.peg" + make_input t_dir_3/t_imp_b.peg "t_imp_c" "t_imp_c.peg" "../t_dir_3/t_dir_3_0/t_imp_d.peg" "" "" + make_input t_dir_3/t_dir_3_0/t_imp_c.peg "t_imp_d" "" "" "" "" + make_input t_dir_3/t_dir_3_0/t_imp_d.peg "" "" "" "t_imp_c.peg" "" +} + +@test "Testing import.d - generation" { + "$PACKCC" --debug -o "$BATS_TEST_DIRNAME/t_parser" "$BATS_TEST_DIRNAME/reference.peg" > "$BATS_TEST_DIRNAME/t_expected.txt" 2>&1 + make_all_inputs + export PCC_IMPORT_PATH=$BATS_TEST_DIRNAME/t_dir_2:$BATS_TEST_DIRNAME/t_dir_3/t_dir_3_0 + run "$PACKCC" -I "$BATS_TEST_DIRNAME/t_dir_0" -I "$BATS_TEST_DIRNAME/t_dir_1/t_dir_1_0" --debug --lines -o "$BATS_TEST_DIRNAME/parser" "$BATS_TEST_DIRNAME/t_input.peg" 2>&1 + check_output "$BATS_TEST_DIRNAME/t_expected.txt" +} + +@test "Testing import.d - header ordering" { + ! in_header ":t_input_EARLYSOURCE:" + ! in_header ":t_imp_0_EARLYSOURCE:" + ! in_header ":t_imp_1_EARLYSOURCE:" + ! in_header ":t_imp_2_EARLYSOURCE:" + ! in_header ":t_imp_3_EARLYSOURCE:" + ! in_header ":t_imp_4_EARLYSOURCE:" + ! in_header ":t_imp_5_EARLYSOURCE:" + ! in_header ":t_imp_6_EARLYSOURCE:" + ! in_header ":t_imp_7_EARLYSOURCE:" + ! in_header ":t_imp_8_EARLYSOURCE:" + ! in_header ":t_imp_9_EARLYSOURCE:" + ! in_header ":t_imp_a_EARLYSOURCE:" + ! in_header ":t_imp_b_EARLYSOURCE:" + ! in_header ":t_imp_c_EARLYSOURCE:" + ! in_header ":t_imp_d_EARLYSOURCE:" + ! in_header ":t_input_SOURCE:" + ! in_header ":t_imp_0_SOURCE:" + ! in_header ":t_imp_1_SOURCE:" + ! in_header ":t_imp_2_SOURCE:" + ! in_header ":t_imp_3_SOURCE:" + ! in_header ":t_imp_4_SOURCE:" + ! in_header ":t_imp_5_SOURCE:" + ! in_header ":t_imp_6_SOURCE:" + ! in_header ":t_imp_7_SOURCE:" + ! in_header ":t_imp_8_SOURCE:" + ! in_header ":t_imp_9_SOURCE:" + ! in_header ":t_imp_a_SOURCE:" + ! in_header ":t_imp_b_SOURCE:" + ! in_header ":t_imp_c_SOURCE:" + ! in_header ":t_imp_d_SOURCE:" + ! in_header ":t_input_CODE:" + ! in_header ":t_imp_0_CODE:" + ! in_header ":t_imp_1_CODE:" + ! in_header ":t_imp_2_CODE:" + ! in_header ":t_imp_3_CODE:" + ! in_header ":t_imp_4_CODE:" + ! in_header ":t_imp_5_CODE:" + ! in_header ":t_imp_6_CODE:" + ! in_header ":t_imp_7_CODE:" + ! in_header ":t_imp_8_CODE:" + ! in_header ":t_imp_9_CODE:" + ! in_header ":t_imp_a_CODE:" + ! in_header ":t_imp_b_CODE:" + ! in_header ":t_imp_c_CODE:" + ! in_header ":t_imp_d_CODE:" + L000=$(get_line ":t_input_EARLYHEADER:" parser.h) + L001=$(get_line ":t_input_EARLYCOMMON:" parser.h) + L002=$(get_line ":t_imp_0_EARLYHEADER:" parser.h) + L003=$(get_line ":t_imp_0_EARLYCOMMON:" parser.h) + L004=$(get_line ":t_imp_1_EARLYHEADER:" parser.h) + L005=$(get_line ":t_imp_1_EARLYCOMMON:" parser.h) + L006=$(get_line ":t_imp_2_EARLYHEADER:" parser.h) + L007=$(get_line ":t_imp_2_EARLYCOMMON:" parser.h) + L008=$(get_line ":t_imp_3_EARLYHEADER:" parser.h) + L009=$(get_line ":t_imp_3_EARLYCOMMON:" parser.h) + L010=$(get_line ":t_imp_4_EARLYHEADER:" parser.h) + L011=$(get_line ":t_imp_4_EARLYCOMMON:" parser.h) + L012=$(get_line ":t_imp_5_EARLYHEADER:" parser.h) + L013=$(get_line ":t_imp_5_EARLYCOMMON:" parser.h) + L014=$(get_line ":t_imp_6_EARLYHEADER:" parser.h) + L015=$(get_line ":t_imp_6_EARLYCOMMON:" parser.h) + L016=$(get_line ":t_imp_7_EARLYHEADER:" parser.h) + L017=$(get_line ":t_imp_7_EARLYCOMMON:" parser.h) + L018=$(get_line ":t_imp_8_EARLYHEADER:" parser.h) + L019=$(get_line ":t_imp_8_EARLYCOMMON:" parser.h) + L020=$(get_line ":t_imp_9_EARLYHEADER:" parser.h) + L021=$(get_line ":t_imp_9_EARLYCOMMON:" parser.h) + L022=$(get_line ":t_imp_a_EARLYHEADER:" parser.h) + L023=$(get_line ":t_imp_a_EARLYCOMMON:" parser.h) + L024=$(get_line ":t_imp_b_EARLYHEADER:" parser.h) + L025=$(get_line ":t_imp_b_EARLYCOMMON:" parser.h) + L026=$(get_line ":t_imp_c_EARLYHEADER:" parser.h) + L027=$(get_line ":t_imp_c_EARLYCOMMON:" parser.h) + L028=$(get_line ":t_imp_d_EARLYHEADER:" parser.h) + L029=$(get_line ":t_imp_d_EARLYCOMMON:" parser.h) + L099=$(get_line "#ifndef PCC_INCLUDED_PARSER_H" parser.h) + L100=$(get_line ":t_input_HEADER:" parser.h) + L101=$(get_line ":t_input_COMMON:" parser.h) + L102=$(get_line ":t_imp_0_HEADER:" parser.h) + L103=$(get_line ":t_imp_0_COMMON:" parser.h) + L104=$(get_line ":t_imp_1_HEADER:" parser.h) + L105=$(get_line ":t_imp_1_COMMON:" parser.h) + L106=$(get_line ":t_imp_2_HEADER:" parser.h) + L107=$(get_line ":t_imp_2_COMMON:" parser.h) + L108=$(get_line ":t_imp_3_HEADER:" parser.h) + L109=$(get_line ":t_imp_3_COMMON:" parser.h) + L110=$(get_line ":t_imp_4_HEADER:" parser.h) + L111=$(get_line ":t_imp_4_COMMON:" parser.h) + L112=$(get_line ":t_imp_5_HEADER:" parser.h) + L113=$(get_line ":t_imp_5_COMMON:" parser.h) + L114=$(get_line ":t_imp_6_HEADER:" parser.h) + L115=$(get_line ":t_imp_6_COMMON:" parser.h) + L116=$(get_line ":t_imp_7_HEADER:" parser.h) + L117=$(get_line ":t_imp_7_COMMON:" parser.h) + L118=$(get_line ":t_imp_8_HEADER:" parser.h) + L119=$(get_line ":t_imp_8_COMMON:" parser.h) + L120=$(get_line ":t_imp_9_HEADER:" parser.h) + L121=$(get_line ":t_imp_9_COMMON:" parser.h) + L122=$(get_line ":t_imp_a_HEADER:" parser.h) + L123=$(get_line ":t_imp_a_COMMON:" parser.h) + L124=$(get_line ":t_imp_b_HEADER:" parser.h) + L125=$(get_line ":t_imp_b_COMMON:" parser.h) + L126=$(get_line ":t_imp_c_HEADER:" parser.h) + L127=$(get_line ":t_imp_c_COMMON:" parser.h) + L128=$(get_line ":t_imp_d_HEADER:" parser.h) + L129=$(get_line ":t_imp_d_COMMON:" parser.h) + L199=$(get_line "pcc_create" parser.h) + [ "$L000" -lt "$L001" ] + [ "$L001" -lt "$L002" ] + [ "$L002" -lt "$L003" ] + [ "$L003" -lt "$L004" ] + [ "$L004" -lt "$L005" ] + [ "$L005" -lt "$L006" ] + [ "$L006" -lt "$L007" ] + [ "$L007" -lt "$L008" ] + [ "$L008" -lt "$L009" ] + [ "$L009" -lt "$L010" ] + [ "$L010" -lt "$L011" ] + [ "$L011" -lt "$L012" ] + [ "$L012" -lt "$L013" ] + [ "$L013" -lt "$L014" ] + [ "$L014" -lt "$L015" ] + [ "$L015" -lt "$L016" ] + [ "$L016" -lt "$L017" ] + [ "$L017" -lt "$L018" ] + [ "$L018" -lt "$L019" ] + [ "$L019" -lt "$L020" ] + [ "$L020" -lt "$L021" ] + [ "$L021" -lt "$L022" ] + [ "$L022" -lt "$L023" ] + [ "$L023" -lt "$L024" ] + [ "$L024" -lt "$L025" ] + [ "$L025" -lt "$L026" ] + [ "$L026" -lt "$L027" ] + [ "$L027" -lt "$L028" ] + [ "$L028" -lt "$L029" ] + [ "$L029" -lt "$L099" ] + [ "$L099" -lt "$L100" ] + [ "$L100" -lt "$L101" ] + [ "$L101" -lt "$L102" ] + [ "$L102" -lt "$L103" ] + [ "$L103" -lt "$L104" ] + [ "$L104" -lt "$L105" ] + [ "$L105" -lt "$L106" ] + [ "$L106" -lt "$L107" ] + [ "$L107" -lt "$L108" ] + [ "$L108" -lt "$L109" ] + [ "$L109" -lt "$L110" ] + [ "$L110" -lt "$L111" ] + [ "$L111" -lt "$L112" ] + [ "$L112" -lt "$L113" ] + [ "$L113" -lt "$L114" ] + [ "$L114" -lt "$L115" ] + [ "$L115" -lt "$L116" ] + [ "$L116" -lt "$L117" ] + [ "$L117" -lt "$L118" ] + [ "$L118" -lt "$L119" ] + [ "$L119" -lt "$L120" ] + [ "$L120" -lt "$L121" ] + [ "$L121" -lt "$L122" ] + [ "$L122" -lt "$L123" ] + [ "$L123" -lt "$L124" ] + [ "$L124" -lt "$L125" ] + [ "$L125" -lt "$L126" ] + [ "$L126" -lt "$L127" ] + [ "$L127" -lt "$L128" ] + [ "$L128" -lt "$L129" ] + [ "$L129" -lt "$L199" ] +} + +@test "Testing import.d - source ordering" { + ! in_source ":t_input_EARLYHEADER:" + ! in_source ":t_imp_0_EARLYHEADER:" + ! in_source ":t_imp_1_EARLYHEADER:" + ! in_source ":t_imp_2_EARLYHEADER:" + ! in_source ":t_imp_3_EARLYHEADER:" + ! in_source ":t_imp_4_EARLYHEADER:" + ! in_source ":t_imp_5_EARLYHEADER:" + ! in_source ":t_imp_6_EARLYHEADER:" + ! in_source ":t_imp_7_EARLYHEADER:" + ! in_source ":t_imp_8_EARLYHEADER:" + ! in_source ":t_imp_9_EARLYHEADER:" + ! in_source ":t_imp_a_EARLYHEADER:" + ! in_source ":t_imp_b_EARLYHEADER:" + ! in_source ":t_imp_c_EARLYHEADER:" + ! in_source ":t_imp_d_EARLYHEADER:" + ! in_source ":t_input_HEADER:" + ! in_source ":t_imp_0_HEADER:" + ! in_source ":t_imp_1_HEADER:" + ! in_source ":t_imp_2_HEADER:" + ! in_source ":t_imp_3_HEADER:" + ! in_source ":t_imp_4_HEADER:" + ! in_source ":t_imp_5_HEADER:" + ! in_source ":t_imp_6_HEADER:" + ! in_source ":t_imp_7_HEADER:" + ! in_source ":t_imp_8_HEADER:" + ! in_source ":t_imp_9_HEADER:" + ! in_source ":t_imp_a_HEADER:" + ! in_source ":t_imp_b_HEADER:" + ! in_source ":t_imp_c_HEADER:" + ! in_source ":t_imp_d_HEADER:" + L000=$(get_line ":t_input_EARLYSOURCE:" parser.c) + L001=$(get_line ":t_input_EARLYCOMMON:" parser.c) + L002=$(get_line ":t_imp_0_EARLYSOURCE:" parser.c) + L003=$(get_line ":t_imp_0_EARLYCOMMON:" parser.c) + L004=$(get_line ":t_imp_1_EARLYSOURCE:" parser.c) + L005=$(get_line ":t_imp_1_EARLYCOMMON:" parser.c) + L006=$(get_line ":t_imp_2_EARLYSOURCE:" parser.c) + L007=$(get_line ":t_imp_2_EARLYCOMMON:" parser.c) + L008=$(get_line ":t_imp_3_EARLYSOURCE:" parser.c) + L009=$(get_line ":t_imp_3_EARLYCOMMON:" parser.c) + L010=$(get_line ":t_imp_4_EARLYSOURCE:" parser.c) + L011=$(get_line ":t_imp_4_EARLYCOMMON:" parser.c) + L012=$(get_line ":t_imp_5_EARLYSOURCE:" parser.c) + L013=$(get_line ":t_imp_5_EARLYCOMMON:" parser.c) + L014=$(get_line ":t_imp_6_EARLYSOURCE:" parser.c) + L015=$(get_line ":t_imp_6_EARLYCOMMON:" parser.c) + L016=$(get_line ":t_imp_7_EARLYSOURCE:" parser.c) + L017=$(get_line ":t_imp_7_EARLYCOMMON:" parser.c) + L018=$(get_line ":t_imp_8_EARLYSOURCE:" parser.c) + L019=$(get_line ":t_imp_8_EARLYCOMMON:" parser.c) + L020=$(get_line ":t_imp_9_EARLYSOURCE:" parser.c) + L021=$(get_line ":t_imp_9_EARLYCOMMON:" parser.c) + L022=$(get_line ":t_imp_a_EARLYSOURCE:" parser.c) + L023=$(get_line ":t_imp_a_EARLYCOMMON:" parser.c) + L024=$(get_line ":t_imp_b_EARLYSOURCE:" parser.c) + L025=$(get_line ":t_imp_b_EARLYCOMMON:" parser.c) + L026=$(get_line ":t_imp_c_EARLYSOURCE:" parser.c) + L027=$(get_line ":t_imp_c_EARLYCOMMON:" parser.c) + L028=$(get_line ":t_imp_d_EARLYSOURCE:" parser.c) + L029=$(get_line ":t_imp_d_EARLYCOMMON:" parser.c) + L098=$(get_line '#include ' parser.c) + L099=$(get_line '#include "parser.h"' parser.c) + L100=$(get_line ":t_input_SOURCE:" parser.c) + L101=$(get_line ":t_input_COMMON:" parser.c) + L102=$(get_line ":t_imp_0_SOURCE:" parser.c) + L103=$(get_line ":t_imp_0_COMMON:" parser.c) + L104=$(get_line ":t_imp_1_SOURCE:" parser.c) + L105=$(get_line ":t_imp_1_COMMON:" parser.c) + L106=$(get_line ":t_imp_2_SOURCE:" parser.c) + L107=$(get_line ":t_imp_2_COMMON:" parser.c) + L108=$(get_line ":t_imp_3_SOURCE:" parser.c) + L109=$(get_line ":t_imp_3_COMMON:" parser.c) + L110=$(get_line ":t_imp_4_SOURCE:" parser.c) + L111=$(get_line ":t_imp_4_COMMON:" parser.c) + L112=$(get_line ":t_imp_5_SOURCE:" parser.c) + L113=$(get_line ":t_imp_5_COMMON:" parser.c) + L114=$(get_line ":t_imp_6_SOURCE:" parser.c) + L115=$(get_line ":t_imp_6_COMMON:" parser.c) + L116=$(get_line ":t_imp_7_SOURCE:" parser.c) + L117=$(get_line ":t_imp_7_COMMON:" parser.c) + L118=$(get_line ":t_imp_8_SOURCE:" parser.c) + L119=$(get_line ":t_imp_8_COMMON:" parser.c) + L120=$(get_line ":t_imp_9_SOURCE:" parser.c) + L121=$(get_line ":t_imp_9_COMMON:" parser.c) + L122=$(get_line ":t_imp_a_SOURCE:" parser.c) + L123=$(get_line ":t_imp_a_COMMON:" parser.c) + L124=$(get_line ":t_imp_b_SOURCE:" parser.c) + L125=$(get_line ":t_imp_b_COMMON:" parser.c) + L126=$(get_line ":t_imp_c_SOURCE:" parser.c) + L127=$(get_line ":t_imp_c_COMMON:" parser.c) + L128=$(get_line ":t_imp_d_SOURCE:" parser.c) + L129=$(get_line ":t_imp_d_COMMON:" parser.c) + L198=$(get_line "#define PCC_BUFFER_MIN_SIZE" parser.c) + L199=$(get_line "pcc_destroy" parser.c) + L200=$(get_line ":t_imp_3_CODE:" parser.c) + L201=$(get_line ":t_imp_2_CODE:" parser.c) + L202=$(get_line ":t_imp_1_CODE:" parser.c) + L203=$(get_line ":t_imp_0_CODE:" parser.c) + L204=$(get_line ":t_imp_6_CODE:" parser.c) + L205=$(get_line ":t_imp_9_CODE:" parser.c) + L206=$(get_line ":t_imp_8_CODE:" parser.c) + L207=$(get_line ":t_imp_c_CODE:" parser.c) + L208=$(get_line ":t_imp_d_CODE:" parser.c) + L209=$(get_line ":t_imp_b_CODE:" parser.c) + L210=$(get_line ":t_imp_a_CODE:" parser.c) + L211=$(get_line ":t_imp_7_CODE:" parser.c) + L212=$(get_line ":t_imp_5_CODE:" parser.c) + L213=$(get_line ":t_imp_4_CODE:" parser.c) + L214=$(get_line ":t_input_CODE:" parser.c) + [ "$L000" -lt "$L001" ] + [ "$L001" -lt "$L002" ] + [ "$L002" -lt "$L003" ] + [ "$L003" -lt "$L004" ] + [ "$L004" -lt "$L005" ] + [ "$L005" -lt "$L006" ] + [ "$L006" -lt "$L007" ] + [ "$L007" -lt "$L008" ] + [ "$L008" -lt "$L009" ] + [ "$L009" -lt "$L010" ] + [ "$L010" -lt "$L011" ] + [ "$L011" -lt "$L012" ] + [ "$L012" -lt "$L013" ] + [ "$L013" -lt "$L014" ] + [ "$L014" -lt "$L015" ] + [ "$L015" -lt "$L016" ] + [ "$L016" -lt "$L017" ] + [ "$L017" -lt "$L018" ] + [ "$L018" -lt "$L019" ] + [ "$L019" -lt "$L020" ] + [ "$L020" -lt "$L021" ] + [ "$L021" -lt "$L022" ] + [ "$L022" -lt "$L023" ] + [ "$L023" -lt "$L024" ] + [ "$L024" -lt "$L025" ] + [ "$L025" -lt "$L026" ] + [ "$L026" -lt "$L027" ] + [ "$L027" -lt "$L028" ] + [ "$L028" -lt "$L029" ] + [ "$L029" -lt "$L098" ] + [ "$L098" -lt "$L099" ] + [ "$L099" -lt "$L100" ] + [ "$L100" -lt "$L101" ] + [ "$L101" -lt "$L102" ] + [ "$L102" -lt "$L103" ] + [ "$L103" -lt "$L104" ] + [ "$L104" -lt "$L105" ] + [ "$L105" -lt "$L106" ] + [ "$L106" -lt "$L107" ] + [ "$L107" -lt "$L108" ] + [ "$L108" -lt "$L109" ] + [ "$L109" -lt "$L110" ] + [ "$L110" -lt "$L111" ] + [ "$L111" -lt "$L112" ] + [ "$L112" -lt "$L113" ] + [ "$L113" -lt "$L114" ] + [ "$L114" -lt "$L115" ] + [ "$L115" -lt "$L116" ] + [ "$L116" -lt "$L117" ] + [ "$L117" -lt "$L118" ] + [ "$L118" -lt "$L119" ] + [ "$L119" -lt "$L120" ] + [ "$L120" -lt "$L121" ] + [ "$L121" -lt "$L122" ] + [ "$L122" -lt "$L123" ] + [ "$L123" -lt "$L124" ] + [ "$L124" -lt "$L125" ] + [ "$L125" -lt "$L126" ] + [ "$L126" -lt "$L127" ] + [ "$L127" -lt "$L128" ] + [ "$L128" -lt "$L129" ] + [ "$L129" -lt "$L198" ] + [ "$L198" -lt "$L199" ] + [ "$L199" -lt "$L200" ] + [ "$L200" -lt "$L201" ] + [ "$L201" -lt "$L202" ] + [ "$L202" -lt "$L203" ] + [ "$L203" -lt "$L204" ] + [ "$L204" -lt "$L205" ] + [ "$L205" -lt "$L206" ] + [ "$L206" -lt "$L207" ] + [ "$L207" -lt "$L208" ] + [ "$L208" -lt "$L209" ] + [ "$L209" -lt "$L210" ] + [ "$L210" -lt "$L211" ] + [ "$L211" -lt "$L212" ] + [ "$L212" -lt "$L213" ] + [ "$L213" -lt "$L214" ] +} + +check_line_number() { + "${PYTHON:-python3}" "$BATS_TEST_DIRNAME/check_line_number.py" "$1" "$BATS_TEST_DIRNAME/$2" +} + +check_line_number_pre() { + "${PYTHON:-python3}" "$BATS_TEST_DIRNAME/check_line_number.py" --only-pre "$1" "$BATS_TEST_DIRNAME/$2" +} + +@test "Testing import.d - header line numbers" { + check_line_number ":t_input_EARLYHEADER:" parser.h + check_line_number ":t_input_EARLYCOMMON:" parser.h + check_line_number ":t_imp_0_EARLYHEADER:" parser.h + check_line_number ":t_imp_0_EARLYCOMMON:" parser.h + check_line_number ":t_imp_1_EARLYHEADER:" parser.h + check_line_number ":t_imp_1_EARLYCOMMON:" parser.h + check_line_number ":t_imp_2_EARLYHEADER:" parser.h + check_line_number ":t_imp_2_EARLYCOMMON:" parser.h + check_line_number ":t_imp_3_EARLYHEADER:" parser.h + check_line_number ":t_imp_3_EARLYCOMMON:" parser.h + check_line_number ":t_imp_4_EARLYHEADER:" parser.h + check_line_number ":t_imp_4_EARLYCOMMON:" parser.h + check_line_number ":t_imp_5_EARLYHEADER:" parser.h + check_line_number ":t_imp_5_EARLYCOMMON:" parser.h + check_line_number ":t_imp_6_EARLYHEADER:" parser.h + check_line_number ":t_imp_6_EARLYCOMMON:" parser.h + check_line_number ":t_imp_7_EARLYHEADER:" parser.h + check_line_number ":t_imp_7_EARLYCOMMON:" parser.h + check_line_number ":t_imp_8_EARLYHEADER:" parser.h + check_line_number ":t_imp_8_EARLYCOMMON:" parser.h + check_line_number ":t_imp_9_EARLYHEADER:" parser.h + check_line_number ":t_imp_9_EARLYCOMMON:" parser.h + check_line_number ":t_imp_a_EARLYHEADER:" parser.h + check_line_number ":t_imp_a_EARLYCOMMON:" parser.h + check_line_number ":t_imp_b_EARLYHEADER:" parser.h + check_line_number ":t_imp_b_EARLYCOMMON:" parser.h + check_line_number ":t_imp_c_EARLYHEADER:" parser.h + check_line_number ":t_imp_c_EARLYCOMMON:" parser.h + check_line_number ":t_imp_d_EARLYHEADER:" parser.h + check_line_number ":t_imp_d_EARLYCOMMON:" parser.h + check_line_number ":t_input_HEADER:" parser.h + check_line_number ":t_input_COMMON:" parser.h + check_line_number ":t_imp_0_HEADER:" parser.h + check_line_number ":t_imp_0_COMMON:" parser.h + check_line_number ":t_imp_1_HEADER:" parser.h + check_line_number ":t_imp_1_COMMON:" parser.h + check_line_number ":t_imp_2_HEADER:" parser.h + check_line_number ":t_imp_2_COMMON:" parser.h + check_line_number ":t_imp_3_HEADER:" parser.h + check_line_number ":t_imp_3_COMMON:" parser.h + check_line_number ":t_imp_4_HEADER:" parser.h + check_line_number ":t_imp_4_COMMON:" parser.h + check_line_number ":t_imp_5_HEADER:" parser.h + check_line_number ":t_imp_5_COMMON:" parser.h + check_line_number ":t_imp_6_HEADER:" parser.h + check_line_number ":t_imp_6_COMMON:" parser.h + check_line_number ":t_imp_7_HEADER:" parser.h + check_line_number ":t_imp_7_COMMON:" parser.h + check_line_number ":t_imp_8_HEADER:" parser.h + check_line_number ":t_imp_8_COMMON:" parser.h + check_line_number ":t_imp_9_HEADER:" parser.h + check_line_number ":t_imp_9_COMMON:" parser.h + check_line_number ":t_imp_a_HEADER:" parser.h + check_line_number ":t_imp_a_COMMON:" parser.h + check_line_number ":t_imp_b_HEADER:" parser.h + check_line_number ":t_imp_b_COMMON:" parser.h + check_line_number ":t_imp_c_HEADER:" parser.h + check_line_number ":t_imp_c_COMMON:" parser.h + check_line_number ":t_imp_d_HEADER:" parser.h + check_line_number ":t_imp_d_COMMON:" parser.h +} + +@test "Testing import.d - source line numbers" { + check_line_number ":t_input_EARLYSOURCE:" parser.c + check_line_number ":t_input_EARLYCOMMON:" parser.c + check_line_number ":t_imp_0_EARLYSOURCE:" parser.c + check_line_number ":t_imp_0_EARLYCOMMON:" parser.c + check_line_number ":t_imp_1_EARLYSOURCE:" parser.c + check_line_number ":t_imp_1_EARLYCOMMON:" parser.c + check_line_number ":t_imp_2_EARLYSOURCE:" parser.c + check_line_number ":t_imp_2_EARLYCOMMON:" parser.c + check_line_number ":t_imp_3_EARLYSOURCE:" parser.c + check_line_number ":t_imp_3_EARLYCOMMON:" parser.c + check_line_number ":t_imp_4_EARLYSOURCE:" parser.c + check_line_number ":t_imp_4_EARLYCOMMON:" parser.c + check_line_number ":t_imp_5_EARLYSOURCE:" parser.c + check_line_number ":t_imp_5_EARLYCOMMON:" parser.c + check_line_number ":t_imp_6_EARLYSOURCE:" parser.c + check_line_number ":t_imp_6_EARLYCOMMON:" parser.c + check_line_number ":t_imp_7_EARLYSOURCE:" parser.c + check_line_number ":t_imp_7_EARLYCOMMON:" parser.c + check_line_number ":t_imp_8_EARLYSOURCE:" parser.c + check_line_number ":t_imp_8_EARLYCOMMON:" parser.c + check_line_number ":t_imp_9_EARLYSOURCE:" parser.c + check_line_number ":t_imp_9_EARLYCOMMON:" parser.c + check_line_number ":t_imp_a_EARLYSOURCE:" parser.c + check_line_number ":t_imp_a_EARLYCOMMON:" parser.c + check_line_number ":t_imp_b_EARLYSOURCE:" parser.c + check_line_number ":t_imp_b_EARLYCOMMON:" parser.c + check_line_number ":t_imp_c_EARLYSOURCE:" parser.c + check_line_number ":t_imp_c_EARLYCOMMON:" parser.c + check_line_number ":t_imp_d_EARLYSOURCE:" parser.c + check_line_number ":t_imp_d_EARLYCOMMON:" parser.c + check_line_number ":t_input_SOURCE:" parser.c + check_line_number ":t_input_COMMON:" parser.c + check_line_number ":t_imp_0_SOURCE:" parser.c + check_line_number ":t_imp_0_COMMON:" parser.c + check_line_number ":t_imp_1_SOURCE:" parser.c + check_line_number ":t_imp_1_COMMON:" parser.c + check_line_number ":t_imp_2_SOURCE:" parser.c + check_line_number ":t_imp_2_COMMON:" parser.c + check_line_number ":t_imp_3_SOURCE:" parser.c + check_line_number ":t_imp_3_COMMON:" parser.c + check_line_number ":t_imp_4_SOURCE:" parser.c + check_line_number ":t_imp_4_COMMON:" parser.c + check_line_number ":t_imp_5_SOURCE:" parser.c + check_line_number ":t_imp_5_COMMON:" parser.c + check_line_number ":t_imp_6_SOURCE:" parser.c + check_line_number ":t_imp_6_COMMON:" parser.c + check_line_number ":t_imp_7_SOURCE:" parser.c + check_line_number ":t_imp_7_COMMON:" parser.c + check_line_number ":t_imp_8_SOURCE:" parser.c + check_line_number ":t_imp_8_COMMON:" parser.c + check_line_number ":t_imp_9_SOURCE:" parser.c + check_line_number ":t_imp_9_COMMON:" parser.c + check_line_number ":t_imp_a_SOURCE:" parser.c + check_line_number ":t_imp_a_COMMON:" parser.c + check_line_number ":t_imp_b_SOURCE:" parser.c + check_line_number ":t_imp_b_COMMON:" parser.c + check_line_number ":t_imp_c_SOURCE:" parser.c + check_line_number ":t_imp_c_COMMON:" parser.c + check_line_number ":t_imp_d_SOURCE:" parser.c + check_line_number ":t_imp_d_COMMON:" parser.c + check_line_number_pre ":t_imp_3_CODE:" parser.c + check_line_number_pre ":t_imp_2_CODE:" parser.c + check_line_number_pre ":t_imp_1_CODE:" parser.c + check_line_number_pre ":t_imp_0_CODE:" parser.c + check_line_number_pre ":t_imp_6_CODE:" parser.c + check_line_number_pre ":t_imp_9_CODE:" parser.c + check_line_number_pre ":t_imp_8_CODE:" parser.c + check_line_number_pre ":t_imp_c_CODE:" parser.c + check_line_number_pre ":t_imp_d_CODE:" parser.c + check_line_number_pre ":t_imp_b_CODE:" parser.c + check_line_number_pre ":t_imp_a_CODE:" parser.c + check_line_number_pre ":t_imp_7_CODE:" parser.c + check_line_number_pre ":t_imp_5_CODE:" parser.c + check_line_number_pre ":t_imp_4_CODE:" parser.c + check_line_number_pre ":t_input_CODE:" parser.c +} diff --git a/tests/import.d/make_input.py b/tests/import.d/make_input.py new file mode 100644 index 0000000..69a65ce --- /dev/null +++ b/tests/import.d/make_input.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 + +# Copyright (c) 2024 Arihiro Yoshida. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys +import os +import re + +def main(): + args = sys.argv + if len(args) < 4: + print('Too few arguments') + sys.exit(1) + root = args.pop(1) + path = args.pop(1) + more = args.pop(1) + os.makedirs(os.path.dirname(root + '/' + path), exist_ok=True) + id = re.sub(r'[^_a-zA-Z0-9]', '_', re.sub(r'\.peg$', '', os.path.basename(path))) + with open(root + '/template.peg', 'r') as file: + text = file.read() + text = text.replace('${ID}', id + '_') + text = text.replace('${MORE}', ' / ' + more + '_FILE' if more != '' else '') + for i in range(4): + imp = args.pop(1) if len(args) > 1 else '' + text = text.replace('${IMPORT_' + str(i) + '}', '%import "' + imp + '"' if imp != '' else '') + with open(root + '/' + path, 'w', newline='\n') as file: + file.write(text) + +if __name__ == '__main__': + main() diff --git a/tests/import.d/reference.peg b/tests/import.d/reference.peg new file mode 100644 index 0000000..f57b8c5 --- /dev/null +++ b/tests/import.d/reference.peg @@ -0,0 +1,526 @@ +%header { /* :t_input_HEADER: */ } + +%source { + /* :t_input_SOURCE: */ +} + +%common { + /* :t_input_COMMON: */ +} + +%earlyheader { /* :t_input_EARLYHEADER: */ } + +%earlysource { /* :t_input_EARLYSOURCE: */ } + +%earlycommon { + /* :t_input_EARLYCOMMON: */ +} + +t_input_FILE + <- t_input_RULE1 + { + /* :t_input_RULE1: */ + } + / t_input_RULE2 + { /* :t_input_RULE2: */ } + / t_input_RULE3 { /* :t_input_RULE3: */ } + / t_imp_0_FILE + +t_input_RULE1 <- '1' + +%header { /* :t_imp_0_HEADER: */ } + +%source { + /* :t_imp_0_SOURCE: */ +} + +%common { + /* :t_imp_0_COMMON: */ +} + +%earlyheader { /* :t_imp_0_EARLYHEADER: */ } + +%earlysource { /* :t_imp_0_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_0_EARLYCOMMON: */ +} + +t_imp_0_FILE + <- t_imp_0_RULE1 + { + /* :t_imp_0_RULE1: */ + } + / t_imp_0_RULE2 + { /* :t_imp_0_RULE2: */ } + / t_imp_0_RULE3 { /* :t_imp_0_RULE3: */ } + / t_imp_1_FILE + +t_imp_0_RULE1 <- '1' + +%header { /* :t_imp_1_HEADER: */ } + +%source { + /* :t_imp_1_SOURCE: */ +} + +%common { + /* :t_imp_1_COMMON: */ +} + +%earlyheader { /* :t_imp_1_EARLYHEADER: */ } + +%earlysource { /* :t_imp_1_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_1_EARLYCOMMON: */ +} + +t_imp_1_FILE + <- t_imp_1_RULE1 + { + /* :t_imp_1_RULE1: */ + } + / t_imp_1_RULE2 + { /* :t_imp_1_RULE2: */ } + / t_imp_1_RULE3 { /* :t_imp_1_RULE3: */ } + / t_imp_2_FILE + +t_imp_1_RULE1 <- '1' + +t_imp_1_RULE2 <- '2' + +t_imp_1_RULE3 <- '3' + +%header { /* :t_imp_2_HEADER: */ } + +%source { + /* :t_imp_2_SOURCE: */ +} + +%common { + /* :t_imp_2_COMMON: */ +} + +%earlyheader { /* :t_imp_2_EARLYHEADER: */ } + +%earlysource { /* :t_imp_2_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_2_EARLYCOMMON: */ +} + +t_imp_2_FILE + <- t_imp_2_RULE1 + { + /* :t_imp_2_RULE1: */ + } + / t_imp_2_RULE2 + { /* :t_imp_2_RULE2: */ } + / t_imp_2_RULE3 { /* :t_imp_2_RULE3: */ } + / t_imp_3_FILE + +t_imp_2_RULE1 <- '1' + +%header { /* :t_imp_3_HEADER: */ } + +%source { + /* :t_imp_3_SOURCE: */ +} + +%common { + /* :t_imp_3_COMMON: */ +} + +%earlyheader { /* :t_imp_3_EARLYHEADER: */ } + +%earlysource { /* :t_imp_3_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_3_EARLYCOMMON: */ +} + +t_imp_3_FILE + <- t_imp_3_RULE1 + { + /* :t_imp_3_RULE1: */ + } + / t_imp_3_RULE2 + { /* :t_imp_3_RULE2: */ } + / t_imp_3_RULE3 { /* :t_imp_3_RULE3: */ } + / t_imp_4_FILE + +t_imp_3_RULE1 <- '1' + +t_imp_3_RULE2 <- '2' + +t_imp_3_RULE3 <- '3' + +t_imp_2_RULE2 <- '2' + +t_imp_2_RULE3 <- '3' + +t_imp_0_RULE2 <- '2' + +t_imp_0_RULE3 <- '3' + + +t_input_RULE2 <- '2' + +t_input_RULE3 <- '3' + +%header { /* :t_imp_4_HEADER: */ } + +%source { + /* :t_imp_4_SOURCE: */ +} + +%common { + /* :t_imp_4_COMMON: */ +} + +%earlyheader { /* :t_imp_4_EARLYHEADER: */ } + +%earlysource { /* :t_imp_4_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_4_EARLYCOMMON: */ +} + +t_imp_4_FILE + <- t_imp_4_RULE1 + { + /* :t_imp_4_RULE1: */ + } + / t_imp_4_RULE2 + { /* :t_imp_4_RULE2: */ } + / t_imp_4_RULE3 { /* :t_imp_4_RULE3: */ } + / t_imp_5_FILE + +t_imp_4_RULE1 <- '1' + +%header { /* :t_imp_5_HEADER: */ } + +%source { + /* :t_imp_5_SOURCE: */ +} + +%common { + /* :t_imp_5_COMMON: */ +} + +%earlyheader { /* :t_imp_5_EARLYHEADER: */ } + +%earlysource { /* :t_imp_5_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_5_EARLYCOMMON: */ +} + +t_imp_5_FILE + <- t_imp_5_RULE1 + { + /* :t_imp_5_RULE1: */ + } + / t_imp_5_RULE2 + { /* :t_imp_5_RULE2: */ } + / t_imp_5_RULE3 { /* :t_imp_5_RULE3: */ } + / t_imp_6_FILE + +%header { /* :t_imp_6_HEADER: */ } + +%source { + /* :t_imp_6_SOURCE: */ +} + +%common { + /* :t_imp_6_COMMON: */ +} + +%earlyheader { /* :t_imp_6_EARLYHEADER: */ } + +%earlysource { /* :t_imp_6_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_6_EARLYCOMMON: */ +} + +t_imp_6_FILE + <- t_imp_6_RULE1 + { + /* :t_imp_6_RULE1: */ + } + / t_imp_6_RULE2 + { /* :t_imp_6_RULE2: */ } + / t_imp_6_RULE3 { /* :t_imp_6_RULE3: */ } + / t_imp_7_FILE + +t_imp_6_RULE1 <- '1' + +t_imp_6_RULE2 <- '2' + +t_imp_6_RULE3 <- '3' + +t_imp_5_RULE1 <- '1' + +%header { /* :t_imp_7_HEADER: */ } + +%source { + /* :t_imp_7_SOURCE: */ +} + +%common { + /* :t_imp_7_COMMON: */ +} + +%earlyheader { /* :t_imp_7_EARLYHEADER: */ } + +%earlysource { /* :t_imp_7_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_7_EARLYCOMMON: */ +} + +t_imp_7_FILE + <- t_imp_7_RULE1 + { + /* :t_imp_7_RULE1: */ + } + / t_imp_7_RULE2 + { /* :t_imp_7_RULE2: */ } + / t_imp_7_RULE3 { /* :t_imp_7_RULE3: */ } + / t_imp_8_FILE + +%header { /* :t_imp_8_HEADER: */ } + +%source { + /* :t_imp_8_SOURCE: */ +} + +%common { + /* :t_imp_8_COMMON: */ +} + +%earlyheader { /* :t_imp_8_EARLYHEADER: */ } + +%earlysource { /* :t_imp_8_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_8_EARLYCOMMON: */ +} + +t_imp_8_FILE + <- t_imp_8_RULE1 + { + /* :t_imp_8_RULE1: */ + } + / t_imp_8_RULE2 + { /* :t_imp_8_RULE2: */ } + / t_imp_8_RULE3 { /* :t_imp_8_RULE3: */ } + / t_imp_9_FILE + +t_imp_8_RULE1 <- '1' + +%header { /* :t_imp_9_HEADER: */ } + +%source { + /* :t_imp_9_SOURCE: */ +} + +%common { + /* :t_imp_9_COMMON: */ +} + +%earlyheader { /* :t_imp_9_EARLYHEADER: */ } + +%earlysource { /* :t_imp_9_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_9_EARLYCOMMON: */ +} + +t_imp_9_FILE + <- t_imp_9_RULE1 + { + /* :t_imp_9_RULE1: */ + } + / t_imp_9_RULE2 + { /* :t_imp_9_RULE2: */ } + / t_imp_9_RULE3 { /* :t_imp_9_RULE3: */ } + / t_imp_a_FILE + +t_imp_9_RULE1 <- '1' + +t_imp_9_RULE2 <- '2' + +t_imp_9_RULE3 <- '3' + +t_imp_8_RULE2 <- '2' + +t_imp_8_RULE3 <- '3' + +t_imp_7_RULE1 <- '1' + +%header { /* :t_imp_a_HEADER: */ } + +%source { + /* :t_imp_a_SOURCE: */ +} + +%common { + /* :t_imp_a_COMMON: */ +} + +%earlyheader { /* :t_imp_a_EARLYHEADER: */ } + +%earlysource { /* :t_imp_a_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_a_EARLYCOMMON: */ +} + +t_imp_a_FILE + <- t_imp_a_RULE1 + { + /* :t_imp_a_RULE1: */ + } + / t_imp_a_RULE2 + { /* :t_imp_a_RULE2: */ } + / t_imp_a_RULE3 { /* :t_imp_a_RULE3: */ } + / t_imp_b_FILE + +t_imp_a_RULE1 <- '1' + +%header { /* :t_imp_b_HEADER: */ } + +%source { + /* :t_imp_b_SOURCE: */ +} + +%common { + /* :t_imp_b_COMMON: */ +} + +%earlyheader { /* :t_imp_b_EARLYHEADER: */ } + +%earlysource { /* :t_imp_b_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_b_EARLYCOMMON: */ +} + +t_imp_b_FILE + <- t_imp_b_RULE1 + { + /* :t_imp_b_RULE1: */ + } + / t_imp_b_RULE2 + { /* :t_imp_b_RULE2: */ } + / t_imp_b_RULE3 { /* :t_imp_b_RULE3: */ } + / t_imp_c_FILE + +%header { /* :t_imp_c_HEADER: */ } + +%source { + /* :t_imp_c_SOURCE: */ +} + +%common { + /* :t_imp_c_COMMON: */ +} + +%earlyheader { /* :t_imp_c_EARLYHEADER: */ } + +%earlysource { /* :t_imp_c_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_c_EARLYCOMMON: */ +} + +t_imp_c_FILE + <- t_imp_c_RULE1 + { + /* :t_imp_c_RULE1: */ + } + / t_imp_c_RULE2 + { /* :t_imp_c_RULE2: */ } + / t_imp_c_RULE3 { /* :t_imp_c_RULE3: */ } + / t_imp_d_FILE + +t_imp_c_RULE1 <- '1' + +t_imp_c_RULE2 <- '2' + +t_imp_c_RULE3 <- '3' + +t_imp_b_RULE1 <- '1' + +%header { /* :t_imp_d_HEADER: */ } + +%source { + /* :t_imp_d_SOURCE: */ +} + +%common { + /* :t_imp_d_COMMON: */ +} + +%earlyheader { /* :t_imp_d_EARLYHEADER: */ } + +%earlysource { /* :t_imp_d_EARLYSOURCE: */ } + +%earlycommon { + /* :t_imp_d_EARLYCOMMON: */ +} + +t_imp_d_FILE + <- t_imp_d_RULE1 + { + /* :t_imp_d_RULE1: */ + } + / t_imp_d_RULE2 + { /* :t_imp_d_RULE2: */ } + / t_imp_d_RULE3 { /* :t_imp_d_RULE3: */ } + +t_imp_d_RULE1 <- '1' + +t_imp_d_RULE2 <- '2' + +t_imp_d_RULE3 <- '3' + +t_imp_b_RULE2 <- '2' + +t_imp_b_RULE3 <- '3' + +t_imp_a_RULE2 <- '2' + +t_imp_a_RULE3 <- '3' + +t_imp_7_RULE2 <- '2' + +t_imp_7_RULE3 <- '3' + +t_imp_5_RULE2 <- '2' + +t_imp_5_RULE3 <- '3' + +t_imp_4_RULE2 <- '2' + +t_imp_4_RULE3 <- '3' + +%% +/* :t_imp_3_CODE: */ +/* :t_imp_2_CODE: */ +/* :t_imp_1_CODE: */ +/* :t_imp_0_CODE: */ +/* :t_imp_6_CODE: */ +/* :t_imp_9_CODE: */ +/* :t_imp_8_CODE: */ +/* :t_imp_c_CODE: */ +/* :t_imp_d_CODE: */ +/* :t_imp_b_CODE: */ +/* :t_imp_a_CODE: */ +/* :t_imp_7_CODE: */ +/* :t_imp_5_CODE: */ +/* :t_imp_4_CODE: */ +/* :t_input_CODE: */ diff --git a/tests/import.d/template.peg b/tests/import.d/template.peg new file mode 100644 index 0000000..9fc123d --- /dev/null +++ b/tests/import.d/template.peg @@ -0,0 +1,37 @@ +%header { /* :${ID}HEADER: */ } + +%source { + /* :${ID}SOURCE: */ +} + +%common { + /* :${ID}COMMON: */ +} + +%earlyheader { /* :${ID}EARLYHEADER: */ } + +%earlysource { /* :${ID}EARLYSOURCE: */ } + +%earlycommon { + /* :${ID}EARLYCOMMON: */ +} + +${ID}FILE + <- ${ID}RULE1 + { + /* :${ID}RULE1: */ + } + / ${ID}RULE2 + { /* :${ID}RULE2: */ } + / ${ID}RULE3 { /* :${ID}RULE3: */ } +${MORE} +${IMPORT_0} +${ID}RULE1 <- '1' +${IMPORT_1} +${ID}RULE2 <- '2' +${IMPORT_2} +${ID}RULE3 <- '3' +${IMPORT_3} + +%% +/* :${ID}CODE: */