/**
   @file sequoyah.l

   @brief sequoyah - convert between Cherokee syllabary and transliterations

   @author Paul Hardy, August 2023 - February 2024

   @copyright 2023, 2024

   This program reads in ASCII syllables for Cherokee, as given in the
   Unicode Cherokee ranges, and produces UTF-8 output in Cherokee
   syllables.  Example: "Gadugi" or "GAdugi" produces "Ꭶꮪꭹ".

   This program will also read in Cherokee syllables and produce an
   ASCII transliterated output.  Example: "Ꭶꮪꭹ" produces "GAdugi".

   Ꭰꮎ ꮎꮝꭹꮎꭲ.
*/
/*
   LICENSE:

      This program is free software: you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published by
      the Free Software Foundation, either version 2 of the License, or
      (at your option) any later version.

      This program is distributed in the hope that it will be useful,
      but WITHOUT ANY WARRANTY; without even the implied warranty of
      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      GNU General Public License for more details.

      You should have received a copy of the GNU General Public License
      along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

%{
#include <stdio.h>
#include <unistd.h>
#include <ctype.h>

#include "config.h"

#define YY_NO_INPUT
#define YY_NO_UNPUT

/*
   The rest of this section contains several definitions for compatibility
   with the Unibetacode package, also by Paul Hardy.  However, the two
   packages are not currently merged in any way.
*/
/*
   Definitions for Beta Code encoding.
*/
#define LANG_GREEK  0x0000  ///< Define for Greek output (unused).
#define LANG_LATIN  0x1000  ///< Define for Latin output (unused).
#define LANG_COPTIC 0x2000  ///< Define for Coptic output (unused).
#define LANG_ARABIC 0x4000  /* Defined by Thesaurus Linguae Graecae but not in their corpus */
#define LANG_HEBREW 0x8000  ///< Define for Hebrew output (unused).

#define LANG_CHEROKEE	0x10000 ///< Define for Cherokee output (default).

int bom_out = 0;  /* =1 to begin UTF-8 output with a UTF-8 Byte Order Mark */
int lang_type = LANG_CHEROKEE;  /* for selecting additional languages */

int doubleq_style=6;  /* style for double quotation marks (Greek double quotes) */
int singleq_style=7;  /* style for single quotation marks (Greek single quotes) */
/*
   State for quotation type 0 through 9, inclusive; Beta
   Code only uses quotation types 1 through 8, inclusive.

      0 = open quote not active
      1 = open quote active, so next encounter will close this quote
*/
int quote_state[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

/* Unicode character for an opening quote for styles 0 through 9 */
int quote_open[10] = {
   0x201C,  /* 0 */  /* LANG_LATIN:  U+201C LEFT DOUBLE QUOTATION MARK  */
   0x201E,  /* 1 */  /* LANG_HEBREW: U+201E DOUBLE LOW-9 QUOTATION MARK */
   0x201E,  /* 2 */  /*              U+201E DOUBLE LOW-9 QUOTATION MARK (not in TLG spec) */
   0x2018,  /* 3 */  /* LANG_LATIN:  U+2018 LEFT SINGLE QUOTATION MARK  */
   /* 0x02BB   3 */  /* LANG_LATIN:  U+02BB Alternative - MODIFIER LETTER TURNED COMMA */
   0x201A,  /* 4 */  /* LANG_HEBREW: U+201A SINGLE LOW-9 QUOTATION MARK */
   0x2018,  /* 5 */  /*              U+2018 LEFT SINGLE QUOTATION MARK  (not in TLG spec) */
   0x00AB,  /* 6 */  /* LANG_GREEK:
                        LANG_COPTIC: U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */
   0x02BB,  /* 7 */  /* LANG_GREEK:
                        LANG_COPTIC: U+02BB MODIFIER LETTER TURNED COMMA
                                     Alternative - U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
                     */
   0x201C,  /* 8 */  /*              U+201C LEFT DOUBLE QUOTATION MARK (not implemented) */
   0x0022   /* 9 */  /*              U+0022 QUOTATION MARK (not implemented) */
};

/* Unicode character for a closing quote for styles 0 through 9 */
int quote_close[10] = {
   0x201D,  /* 0 */  /* LANG_LATIN:  U+201D RIGHT DOUBLE QUOTATION MARK */
   0x201E,  /* 1 */  /* LANG_HEBREW: U+201E DOUBLE LOW-9 QUOTATION MARK */
   0x201C,  /* 2 */  /*              U+201C LEFT DOUBLE QUOTATION MARK (not paired in TLG spec) */
   0x2019,  /* 3 */  /* LANG_LATIN:  U+2019 RIGHT SINGLE QUOTATION MARK */
   /* 0x02BC   3 */  /* LANG_LATIN:  U+02BC Alternative - MODIFIER LETTER APOSTROPHE */
   0x201A,  /* 4 */  /* LANG_HEBREW: U+201A SINGLE LOW-9 QUOTATION MARK */
   0x201B,  /* 5 */  /*              U+201B SINGLE HIGH-REVERSED-9 QUOTATION MARK (not paired in TLG spec) */
   0x00BB,  /* 6 */  /* LANG_GREEK:
                        LANG_COPTIC: U+00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */
   0x02BC,  /* 7 */  /* LANG_GREEK:
                        LANG_COPTIC: U+02BC MODIFIER LETTER APOSTROPHE
                                     Alternative - U+203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
                     */
   0x201E,  /* 8 */  /*              U+201E DOUBLE LOW-9 QUOTATION MARK (not implemented) */
   0x0022   /* 9 */  /*              U+0022 QUOTATION MARK (not implemented) */
};


void print_ascii   (char *);    /* print a string that's inside '{'...'}' pair     */
void print_unicode (char *);    /* print a Unicode code point in the form "\uXXXX" */
void print_quote   (char *);    /* print open or close quoation mark styles        */
void print_utf8    (uint32_t);  /* Print a Unicode code point in UTF-8             */
void print_capital (char *);    /* Print Greek, Hebrew, or Coptic capital letter   */
void print_small   (char *);    /* Print Greek, Hebrew, or Coptic small letter     */
void print_pattern (char *, uint32_t);  /* Print yytext in Latin mode or print a Unicode code point in UTF-8 */
/*
   Print a letter in one of four language modes:

        Latin, Greek, Coptic, and Hebrew, respectively.

   Currently this is only needed to handle 'S' and 's'
   because of Greek context-dependent middle and final sigma.
*/
void print_letter  (uint32_t, uint32_t, uint32_t, uint32_t);


uint32_t ascii2greek_capital[128]={128*0};
uint32_t ascii2greek_small[128]={128*0};
uint32_t ascii2coptic[128]={128*0};
uint32_t ascii2hebrew[128]={128*0};

%}

%option noyywrap

ESCAPE		\{[^\{\}]*\}
QUOTE		(\"|`|')
DECIMAL_DIGIT	[0-9]

%%
{ESCAPE}	{  /*
	              Print escape-delimited string of ASCII and/or
	              special Unicode symbols of the form "\ux...x"
	           */
	           yytext [strlen (yytext) - 1] = '\0';
	           /* fprintf (yyout, "%s", &yytext[1]); */
	           print_ascii (&yytext[1]);
	        }

	/* Convert transliterated ASCII to UTF-8 Cherokee. */
A	print_utf8 (0x13A0);	/* CHEROKEE LETTER A   */
E	print_utf8 (0x13A1);	/* CHEROKEE LETTER E   */
I	print_utf8 (0x13A2);	/* CHEROKEE LETTER I   */
O	print_utf8 (0x13A3);	/* CHEROKEE LETTER O   */
U	print_utf8 (0x13A4);	/* CHEROKEE LETTER U   */
V	print_utf8 (0x13A5);	/* CHEROKEE LETTER V   */
GA	print_utf8 (0x13A6);	/* CHEROKEE LETTER GA  */
Ga	print_utf8 (0x13A6);	/* CHEROKEE LETTER GA  */
KA	print_utf8 (0x13A7);	/* CHEROKEE LETTER KA  */
Ka	print_utf8 (0x13A7);	/* CHEROKEE LETTER KA  */
GE	print_utf8 (0x13A8);	/* CHEROKEE LETTER GE  */
Ge	print_utf8 (0x13A8);	/* CHEROKEE LETTER GE  */
GI	print_utf8 (0x13A9);	/* CHEROKEE LETTER GI  */
Gi	print_utf8 (0x13A9);	/* CHEROKEE LETTER GI  */
GO	print_utf8 (0x13AA);	/* CHEROKEE LETTER GO  */
Go	print_utf8 (0x13AA);	/* CHEROKEE LETTER GO  */
GU	print_utf8 (0x13AB);	/* CHEROKEE LETTER GU  */
Gu	print_utf8 (0x13AB);	/* CHEROKEE LETTER GU  */
GV	print_utf8 (0x13AC);	/* CHEROKEE LETTER GV  */
Gv	print_utf8 (0x13AC);	/* CHEROKEE LETTER GV  */
HA	print_utf8 (0x13AD);	/* CHEROKEE LETTER HA  */
Ha	print_utf8 (0x13AD);	/* CHEROKEE LETTER HA  */
HE	print_utf8 (0x13AE);	/* CHEROKEE LETTER HE  */
He	print_utf8 (0x13AE);	/* CHEROKEE LETTER HE  */
HI	print_utf8 (0x13AF);	/* CHEROKEE LETTER HI  */
Hi	print_utf8 (0x13AF);	/* CHEROKEE LETTER HI  */
HO	print_utf8 (0x13B0);	/* CHEROKEE LETTER HO  */
Ho	print_utf8 (0x13B0);	/* CHEROKEE LETTER HO  */
HU	print_utf8 (0x13B1);	/* CHEROKEE LETTER HU  */
Hu	print_utf8 (0x13B1);	/* CHEROKEE LETTER HU  */
HV	print_utf8 (0x13B2);	/* CHEROKEE LETTER HV  */
Hv	print_utf8 (0x13B2);	/* CHEROKEE LETTER HV  */
LA	print_utf8 (0x13B3);	/* CHEROKEE LETTER LA  */
La	print_utf8 (0x13B3);	/* CHEROKEE LETTER LA  */
LE	print_utf8 (0x13B4);	/* CHEROKEE LETTER LE  */
Le	print_utf8 (0x13B4);	/* CHEROKEE LETTER LE  */
LI	print_utf8 (0x13B5);	/* CHEROKEE LETTER LI  */
Li	print_utf8 (0x13B5);	/* CHEROKEE LETTER LI  */
LO	print_utf8 (0x13B6);	/* CHEROKEE LETTER LO  */
Lo	print_utf8 (0x13B6);	/* CHEROKEE LETTER LO  */
LU	print_utf8 (0x13B7);	/* CHEROKEE LETTER LU  */
Lu	print_utf8 (0x13B7);	/* CHEROKEE LETTER LU  */
LV	print_utf8 (0x13B8);	/* CHEROKEE LETTER LV  */
Lv	print_utf8 (0x13B8);	/* CHEROKEE LETTER LV  */
MA	print_utf8 (0x13B9);	/* CHEROKEE LETTER MA  */
Ma	print_utf8 (0x13B9);	/* CHEROKEE LETTER MA  */
ME	print_utf8 (0x13BA);	/* CHEROKEE LETTER ME  */
Me	print_utf8 (0x13BA);	/* CHEROKEE LETTER ME  */
MI	print_utf8 (0x13BB);	/* CHEROKEE LETTER MI  */
Mi	print_utf8 (0x13BB);	/* CHEROKEE LETTER MI  */
MO	print_utf8 (0x13BC);	/* CHEROKEE LETTER MO  */
Mo	print_utf8 (0x13BC);	/* CHEROKEE LETTER MO  */
MU	print_utf8 (0x13BD);	/* CHEROKEE LETTER MU  */
Mu	print_utf8 (0x13BD);	/* CHEROKEE LETTER MU  */
NA	print_utf8 (0x13BE);	/* CHEROKEE LETTER NA  */
Na	print_utf8 (0x13BE);	/* CHEROKEE LETTER NA  */
HNA	print_utf8 (0x13BF);	/* CHEROKEE LETTER HNA */
Hna	print_utf8 (0x13BF);	/* CHEROKEE LETTER HNA */
NAH	print_utf8 (0x13C0);	/* CHEROKEE LETTER NAH */
Nah	print_utf8 (0x13C0);	/* CHEROKEE LETTER NAH */
NE	print_utf8 (0x13C1);	/* CHEROKEE LETTER NE  */
Ne	print_utf8 (0x13C1);	/* CHEROKEE LETTER NE  */
NI	print_utf8 (0x13C2);	/* CHEROKEE LETTER NI  */
Ni	print_utf8 (0x13C2);	/* CHEROKEE LETTER NI  */
NO	print_utf8 (0x13C3);	/* CHEROKEE LETTER NO  */
No	print_utf8 (0x13C3);	/* CHEROKEE LETTER NO  */
NU	print_utf8 (0x13C4);	/* CHEROKEE LETTER NU  */
Nu	print_utf8 (0x13C4);	/* CHEROKEE LETTER NU  */
NV	print_utf8 (0x13C5);	/* CHEROKEE LETTER NV  */
Nv	print_utf8 (0x13C5);	/* CHEROKEE LETTER NV  */
QUA	print_utf8 (0x13C6);	/* CHEROKEE LETTER QUA */
Qua	print_utf8 (0x13C6);	/* CHEROKEE LETTER QUA */
QUE	print_utf8 (0x13C7);	/* CHEROKEE LETTER QUE */
Que	print_utf8 (0x13C7);	/* CHEROKEE LETTER QUE */
QUI	print_utf8 (0x13C8);	/* CHEROKEE LETTER QUI */
Qui	print_utf8 (0x13C8);	/* CHEROKEE LETTER QUI */
QUO	print_utf8 (0x13C9);	/* CHEROKEE LETTER QUO */
Quo	print_utf8 (0x13C9);	/* CHEROKEE LETTER QUO */
QUU	print_utf8 (0x13CA);	/* CHEROKEE LETTER QUU */
Quu	print_utf8 (0x13CA);	/* CHEROKEE LETTER QUU */
QUV	print_utf8 (0x13CB);	/* CHEROKEE LETTER QUV */
Quv	print_utf8 (0x13CB);	/* CHEROKEE LETTER QUV */
SA	print_utf8 (0x13CC);	/* CHEROKEE LETTER SA  */
Sa	print_utf8 (0x13CC);	/* CHEROKEE LETTER SA  */
S	print_utf8 (0x13CD);	/* CHEROKEE LETTER S   */
SE	print_utf8 (0x13CE);	/* CHEROKEE LETTER SE  */
Se	print_utf8 (0x13CE);	/* CHEROKEE LETTER SE  */
SI	print_utf8 (0x13CF);	/* CHEROKEE LETTER SI  */
Si	print_utf8 (0x13CF);	/* CHEROKEE LETTER SI  */
SO	print_utf8 (0x13D0);	/* CHEROKEE LETTER SO  */
So	print_utf8 (0x13D0);	/* CHEROKEE LETTER SO  */
SU	print_utf8 (0x13D1);	/* CHEROKEE LETTER SU  */
Su	print_utf8 (0x13D1);	/* CHEROKEE LETTER SU  */
SV	print_utf8 (0x13D2);	/* CHEROKEE LETTER SV  */
Sv	print_utf8 (0x13D2);	/* CHEROKEE LETTER SV  */
DA	print_utf8 (0x13D3);	/* CHEROKEE LETTER DA  */
Da	print_utf8 (0x13D3);	/* CHEROKEE LETTER DA  */
TA	print_utf8 (0x13D4);	/* CHEROKEE LETTER TA  */
Ta	print_utf8 (0x13D4);	/* CHEROKEE LETTER TA  */
DE	print_utf8 (0x13D5);	/* CHEROKEE LETTER DE  */
De	print_utf8 (0x13D5);	/* CHEROKEE LETTER DE  */
TE	print_utf8 (0x13D6);	/* CHEROKEE LETTER TE  */
Te	print_utf8 (0x13D6);	/* CHEROKEE LETTER TE  */
DI	print_utf8 (0x13D7);	/* CHEROKEE LETTER DI  */
Di	print_utf8 (0x13D7);	/* CHEROKEE LETTER DI  */
TI	print_utf8 (0x13D8);	/* CHEROKEE LETTER TI  */
Ti	print_utf8 (0x13D8);	/* CHEROKEE LETTER TI  */
DO	print_utf8 (0x13D9);	/* CHEROKEE LETTER DO  */
Do	print_utf8 (0x13D9);	/* CHEROKEE LETTER DO  */
DU	print_utf8 (0x13DA);	/* CHEROKEE LETTER DU  */
Du	print_utf8 (0x13DA);	/* CHEROKEE LETTER DU  */
DV	print_utf8 (0x13DB);	/* CHEROKEE LETTER DV  */
Dv	print_utf8 (0x13DB);	/* CHEROKEE LETTER DV  */
DLA	print_utf8 (0x13DC);	/* CHEROKEE LETTER DLA */
Dla	print_utf8 (0x13DC);	/* CHEROKEE LETTER DLA */
TLA	print_utf8 (0x13DD);	/* CHEROKEE LETTER TLA */
Tla	print_utf8 (0x13DD);	/* CHEROKEE LETTER TLA */
TLE	print_utf8 (0x13DE);	/* CHEROKEE LETTER TLE */
Tle	print_utf8 (0x13DE);	/* CHEROKEE LETTER TLE */
TLI	print_utf8 (0x13DF);	/* CHEROKEE LETTER TLI */
Tli	print_utf8 (0x13DF);	/* CHEROKEE LETTER TLI */
TLO	print_utf8 (0x13E0);	/* CHEROKEE LETTER TLO */
Tlo	print_utf8 (0x13E0);	/* CHEROKEE LETTER TLO */
TLU	print_utf8 (0x13E1);	/* CHEROKEE LETTER TLU */
Tlu	print_utf8 (0x13E1);	/* CHEROKEE LETTER TLU */
TLV	print_utf8 (0x13E2);	/* CHEROKEE LETTER TLV */
Tlv	print_utf8 (0x13E2);	/* CHEROKEE LETTER TLV */
TSA	print_utf8 (0x13E3);	/* CHEROKEE LETTER TSA */
Tsa	print_utf8 (0x13E3);	/* CHEROKEE LETTER TSA */
TSE	print_utf8 (0x13E4);	/* CHEROKEE LETTER TSE */
Tse	print_utf8 (0x13E4);	/* CHEROKEE LETTER TSE */
TSI	print_utf8 (0x13E5);	/* CHEROKEE LETTER TSI */
Tsi	print_utf8 (0x13E5);	/* CHEROKEE LETTER TSI */
TSO	print_utf8 (0x13E6);	/* CHEROKEE LETTER TSO */
Tso	print_utf8 (0x13E6);	/* CHEROKEE LETTER TSO */
TSU	print_utf8 (0x13E7);	/* CHEROKEE LETTER TSU */
Tsu	print_utf8 (0x13E7);	/* CHEROKEE LETTER TSU */
TSV	print_utf8 (0x13E8);	/* CHEROKEE LETTER TSV */
Tsv	print_utf8 (0x13E8);	/* CHEROKEE LETTER TSV */
WA	print_utf8 (0x13E9);	/* CHEROKEE LETTER WA  */
Wa	print_utf8 (0x13E9);	/* CHEROKEE LETTER WA  */
WE	print_utf8 (0x13EA);	/* CHEROKEE LETTER WE  */
We	print_utf8 (0x13EA);	/* CHEROKEE LETTER WE  */
WI	print_utf8 (0x13EB);	/* CHEROKEE LETTER WI  */
Wi	print_utf8 (0x13EB);	/* CHEROKEE LETTER WI  */
WO	print_utf8 (0x13EC);	/* CHEROKEE LETTER WO  */
Wo	print_utf8 (0x13EC);	/* CHEROKEE LETTER WO  */
WU	print_utf8 (0x13ED);	/* CHEROKEE LETTER WU  */
Wu	print_utf8 (0x13ED);	/* CHEROKEE LETTER WU  */
WV	print_utf8 (0x13EE);	/* CHEROKEE LETTER WV  */
Wv	print_utf8 (0x13EE);	/* CHEROKEE LETTER WV  */
YA	print_utf8 (0x13EF);	/* CHEROKEE LETTER YA  */
Ya	print_utf8 (0x13EF);	/* CHEROKEE LETTER YA  */
YE	print_utf8 (0x13F0);	/* CHEROKEE LETTER YE  */
Ye	print_utf8 (0x13F0);	/* CHEROKEE LETTER YE  */
YI	print_utf8 (0x13F1);	/* CHEROKEE LETTER YI  */
Yi	print_utf8 (0x13F1);	/* CHEROKEE LETTER YI  */
YO	print_utf8 (0x13F2);	/* CHEROKEE LETTER YO  */
Yo	print_utf8 (0x13F2);	/* CHEROKEE LETTER YO  */
YU	print_utf8 (0x13F3);	/* CHEROKEE LETTER YU  */
Yu	print_utf8 (0x13F3);	/* CHEROKEE LETTER YU  */
YV	print_utf8 (0x13F4);	/* CHEROKEE LETTER YV  */
Yv	print_utf8 (0x13F4);	/* CHEROKEE LETTER YV  */
MV	print_utf8 (0x13F5);	/* CHEROKEE LETTER MV  */
Mv	print_utf8 (0x13F5);	/* CHEROKEE LETTER MV  */
ye	print_utf8 (0x13F8);	/* CHEROKEE SMALL LETTER YE  */
yi	print_utf8 (0x13F9);	/* CHEROKEE SMALL LETTER YI  */
yo	print_utf8 (0x13FA);	/* CHEROKEE SMALL LETTER YO  */
yu	print_utf8 (0x13FB);	/* CHEROKEE SMALL LETTER YU  */
yv	print_utf8 (0x13FC);	/* CHEROKEE SMALL LETTER YV  */
mv	print_utf8 (0x13FD);	/* CHEROKEE SMALL LETTER MV  */
a	print_utf8 (0xAB70);	/* CHEROKEE SMALL LETTER A   */
e	print_utf8 (0xAB71);	/* CHEROKEE SMALL LETTER E   */
i	print_utf8 (0xAB72);	/* CHEROKEE SMALL LETTER I   */
o	print_utf8 (0xAB73);	/* CHEROKEE SMALL LETTER O   */
u	print_utf8 (0xAB74);	/* CHEROKEE SMALL LETTER U   */
v	print_utf8 (0xAB75);	/* CHEROKEE SMALL LETTER V   */
ga	print_utf8 (0xAB76);	/* CHEROKEE SMALL LETTER GA  */
ka	print_utf8 (0xAB77);	/* CHEROKEE SMALL LETTER KA  */
ge	print_utf8 (0xAB78);	/* CHEROKEE SMALL LETTER GE  */
gi	print_utf8 (0xAB79);	/* CHEROKEE SMALL LETTER GI  */
go	print_utf8 (0xAB7A);	/* CHEROKEE SMALL LETTER GO  */
gu	print_utf8 (0xAB7B);	/* CHEROKEE SMALL LETTER GU  */
gv	print_utf8 (0xAB7C);	/* CHEROKEE SMALL LETTER GV  */
ha	print_utf8 (0xAB7D);	/* CHEROKEE SMALL LETTER HA  */
he	print_utf8 (0xAB7E);	/* CHEROKEE SMALL LETTER HE  */
hi	print_utf8 (0xAB7F);	/* CHEROKEE SMALL LETTER HI  */
ho	print_utf8 (0xAB80);	/* CHEROKEE SMALL LETTER HO  */
hu	print_utf8 (0xAB81);	/* CHEROKEE SMALL LETTER HU  */
hv	print_utf8 (0xAB82);	/* CHEROKEE SMALL LETTER HV  */
la	print_utf8 (0xAB83);	/* CHEROKEE SMALL LETTER LA  */
le	print_utf8 (0xAB84);	/* CHEROKEE SMALL LETTER LE  */
li	print_utf8 (0xAB85);	/* CHEROKEE SMALL LETTER LI  */
lo	print_utf8 (0xAB86);	/* CHEROKEE SMALL LETTER LO  */
lu	print_utf8 (0xAB87);	/* CHEROKEE SMALL LETTER LU  */
lv	print_utf8 (0xAB88);	/* CHEROKEE SMALL LETTER LV  */
ma	print_utf8 (0xAB89);	/* CHEROKEE SMALL LETTER MA  */
me	print_utf8 (0xAB8A);	/* CHEROKEE SMALL LETTER ME  */
mi	print_utf8 (0xAB8B);	/* CHEROKEE SMALL LETTER MI  */
mo	print_utf8 (0xAB8C);	/* CHEROKEE SMALL LETTER MO  */
mu	print_utf8 (0xAB8D);	/* CHEROKEE SMALL LETTER MU  */
na	print_utf8 (0xAB8E);	/* CHEROKEE SMALL LETTER NA  */
hna	print_utf8 (0xAB8F);	/* CHEROKEE SMALL LETTER HNA */
nah	print_utf8 (0xAB90);	/* CHEROKEE SMALL LETTER NAH */
ne	print_utf8 (0xAB91);	/* CHEROKEE SMALL LETTER NE  */
ni	print_utf8 (0xAB92);	/* CHEROKEE SMALL LETTER NI  */
no	print_utf8 (0xAB93);	/* CHEROKEE SMALL LETTER NO  */
nu	print_utf8 (0xAB94);	/* CHEROKEE SMALL LETTER NU  */
nv	print_utf8 (0xAB95);	/* CHEROKEE SMALL LETTER NV  */
qua	print_utf8 (0xAB96);	/* CHEROKEE SMALL LETTER QUA */
que	print_utf8 (0xAB97);	/* CHEROKEE SMALL LETTER QUE */
qui	print_utf8 (0xAB98);	/* CHEROKEE SMALL LETTER QUI */
quo	print_utf8 (0xAB99);	/* CHEROKEE SMALL LETTER QUO */
quu	print_utf8 (0xAB9A);	/* CHEROKEE SMALL LETTER QUU */
quv	print_utf8 (0xAB9B);	/* CHEROKEE SMALL LETTER QUV */
sa	print_utf8 (0xAB9C);	/* CHEROKEE SMALL LETTER SA  */
s	print_utf8 (0xAB9D);	/* CHEROKEE SMALL LETTER S   */
se	print_utf8 (0xAB9E);	/* CHEROKEE SMALL LETTER SE  */
si	print_utf8 (0xAB9F);	/* CHEROKEE SMALL LETTER SI  */
so	print_utf8 (0xABA0);	/* CHEROKEE SMALL LETTER SO  */
su	print_utf8 (0xABA1);	/* CHEROKEE SMALL LETTER SU  */
sv	print_utf8 (0xABA2);	/* CHEROKEE SMALL LETTER SV  */
da	print_utf8 (0xABA3);	/* CHEROKEE SMALL LETTER DA  */
ta	print_utf8 (0xABA4);	/* CHEROKEE SMALL LETTER TA  */
de	print_utf8 (0xABA5);	/* CHEROKEE SMALL LETTER DE  */
te	print_utf8 (0xABA6);	/* CHEROKEE SMALL LETTER TE  */
di	print_utf8 (0xABA7);	/* CHEROKEE SMALL LETTER DI  */
ti	print_utf8 (0xABA8);	/* CHEROKEE SMALL LETTER TI  */
do	print_utf8 (0xABA9);	/* CHEROKEE SMALL LETTER DO  */
du	print_utf8 (0xABAA);	/* CHEROKEE SMALL LETTER DU  */
dv	print_utf8 (0xABAB);	/* CHEROKEE SMALL LETTER DV  */
dla	print_utf8 (0xABAC);	/* CHEROKEE SMALL LETTER DLA */
tla	print_utf8 (0xABAD);	/* CHEROKEE SMALL LETTER TLA */
tle	print_utf8 (0xABAE);	/* CHEROKEE SMALL LETTER TLE */
tli	print_utf8 (0xABAF);	/* CHEROKEE SMALL LETTER TLI */
tlo	print_utf8 (0xABB0);	/* CHEROKEE SMALL LETTER TLO */
tlu	print_utf8 (0xABB1);	/* CHEROKEE SMALL LETTER TLU */
tlv	print_utf8 (0xABB2);	/* CHEROKEE SMALL LETTER TLV */
tsa	print_utf8 (0xABB3);	/* CHEROKEE SMALL LETTER TSA */
tse	print_utf8 (0xABB4);	/* CHEROKEE SMALL LETTER TSE */
tsi	print_utf8 (0xABB5);	/* CHEROKEE SMALL LETTER TSI */
tso	print_utf8 (0xABB6);	/* CHEROKEE SMALL LETTER TSO */
tsu	print_utf8 (0xABB7);	/* CHEROKEE SMALL LETTER TSU */
tsv	print_utf8 (0xABB8);	/* CHEROKEE SMALL LETTER TSV */
wa	print_utf8 (0xABB9);	/* CHEROKEE SMALL LETTER WA  */
we	print_utf8 (0xABBA);	/* CHEROKEE SMALL LETTER WE  */
wi	print_utf8 (0xABBB);	/* CHEROKEE SMALL LETTER WI  */
wo	print_utf8 (0xABBC);	/* CHEROKEE SMALL LETTER WO  */
wu	print_utf8 (0xABBD);	/* CHEROKEE SMALL LETTER WU  */
wv	print_utf8 (0xABBE);	/* CHEROKEE SMALL LETTER WV  */
ya	print_utf8 (0xABBF);	/* CHEROKEE SMALL LETTER YA  */

	/* Convert UTF-8 Cherokee to transliterated ASCII. */
\341\216\240	fprintf (yyout, "A");	/* U+13A0 CHEROKEE LETTER A   */
\341\216\241	fprintf (yyout, "E");	/* U+13A1 CHEROKEE LETTER E   */
\341\216\242	fprintf (yyout, "I");	/* U+13A2 CHEROKEE LETTER I   */
\341\216\243	fprintf (yyout, "O");	/* U+13A3 CHEROKEE LETTER O   */
\341\216\244	fprintf (yyout, "U");	/* U+13A4 CHEROKEE LETTER U   */
\341\216\245	fprintf (yyout, "V");	/* U+13A5 CHEROKEE LETTER V   */
\341\216\246	fprintf (yyout, "Ga");	/* U+13A6 CHEROKEE LETTER GA  */
\341\216\247	fprintf (yyout, "Ka");	/* U+13A7 CHEROKEE LETTER KA  */
\341\216\250	fprintf (yyout, "Ge");	/* U+13A8 CHEROKEE LETTER GE  */
\341\216\251	fprintf (yyout, "Gi");	/* U+13A9 CHEROKEE LETTER GI  */
\341\216\252	fprintf (yyout, "Go");	/* U+13AA CHEROKEE LETTER GO  */
\341\216\253	fprintf (yyout, "Gu");	/* U+13AB CHEROKEE LETTER GU  */
\341\216\254	fprintf (yyout, "Gv");	/* U+13AC CHEROKEE LETTER GV  */
\341\216\255	fprintf (yyout, "Ha");	/* U+13AD CHEROKEE LETTER HA  */
\341\216\256	fprintf (yyout, "He");	/* U+13AE CHEROKEE LETTER HE  */
\341\216\257	fprintf (yyout, "Hi");	/* U+13AF CHEROKEE LETTER HI  */
\341\216\260	fprintf (yyout, "Ho");	/* U+13B0 CHEROKEE LETTER HO  */
\341\216\261	fprintf (yyout, "Hu");	/* U+13B1 CHEROKEE LETTER HU  */
\341\216\262	fprintf (yyout, "Hv");	/* U+13B2 CHEROKEE LETTER HV  */
\341\216\263	fprintf (yyout, "La");	/* U+13B3 CHEROKEE LETTER LA  */
\341\216\264	fprintf (yyout, "Le");	/* U+13B4 CHEROKEE LETTER LE  */
\341\216\265	fprintf (yyout, "Li");	/* U+13B5 CHEROKEE LETTER LI  */
\341\216\266	fprintf (yyout, "Lo");	/* U+13B6 CHEROKEE LETTER LO  */
\341\216\267	fprintf (yyout, "Lu");	/* U+13B7 CHEROKEE LETTER LU  */
\341\216\270	fprintf (yyout, "Lv");	/* U+13B8 CHEROKEE LETTER LV  */
\341\216\271	fprintf (yyout, "Ma");	/* U+13B9 CHEROKEE LETTER MA  */
\341\216\272	fprintf (yyout, "Me");	/* U+13BA CHEROKEE LETTER ME  */
\341\216\273	fprintf (yyout, "Mi");	/* U+13BB CHEROKEE LETTER MI  */
\341\216\274	fprintf (yyout, "Mo");	/* U+13BC CHEROKEE LETTER MO  */
\341\216\275	fprintf (yyout, "Mu");	/* U+13BD CHEROKEE LETTER MU  */
\341\216\276	fprintf (yyout, "Na");	/* U+13BE CHEROKEE LETTER NA  */
\341\216\277	fprintf (yyout, "Hna");	/* U+13BF CHEROKEE LETTER HNA */
\341\217\200	fprintf (yyout, "Nah");	/* U+13C0 CHEROKEE LETTER NAH */
\341\217\201	fprintf (yyout, "Ne");	/* U+13C1 CHEROKEE LETTER NE  */
\341\217\202	fprintf (yyout, "Ni");	/* U+13C2 CHEROKEE LETTER NI  */
\341\217\203	fprintf (yyout, "No");	/* U+13C3 CHEROKEE LETTER NO  */
\341\217\204	fprintf (yyout, "Nu");	/* U+13C4 CHEROKEE LETTER NU  */
\341\217\205	fprintf (yyout, "Nv");	/* U+13C5 CHEROKEE LETTER NV  */
\341\217\206	fprintf (yyout, "Qua");	/* U+13C6 CHEROKEE LETTER QUA */
\341\217\207	fprintf (yyout, "Que");	/* U+13C7 CHEROKEE LETTER QUE */
\341\217\210	fprintf (yyout, "Qui");	/* U+13C8 CHEROKEE LETTER QUI */
\341\217\211	fprintf (yyout, "Quo");	/* U+13C9 CHEROKEE LETTER QUO */
\341\217\212	fprintf (yyout, "Quu");	/* U+13CA CHEROKEE LETTER QUU */
\341\217\213	fprintf (yyout, "Quv");	/* U+13CB CHEROKEE LETTER QUV */
\341\217\214	fprintf (yyout, "Sa");	/* U+13CC CHEROKEE LETTER SA  */
\341\217\215	fprintf (yyout, "S");	/* U+13CD CHEROKEE LETTER S   */
\341\217\216	fprintf (yyout, "Se");	/* U+13CE CHEROKEE LETTER SE  */
\341\217\217	fprintf (yyout, "Si");	/* U+13CF CHEROKEE LETTER SI  */
\341\217\220	fprintf (yyout, "So");	/* U+13D0 CHEROKEE LETTER SO  */
\341\217\221	fprintf (yyout, "Su");	/* U+13D1 CHEROKEE LETTER SU  */
\341\217\222	fprintf (yyout, "Sv");	/* U+13D2 CHEROKEE LETTER SV  */
\341\217\223	fprintf (yyout, "Da");	/* U+13D3 CHEROKEE LETTER DA  */
\341\217\224	fprintf (yyout, "Ta");	/* U+13D4 CHEROKEE LETTER TA  */
\341\217\225	fprintf (yyout, "De");	/* U+13D5 CHEROKEE LETTER DE  */
\341\217\226	fprintf (yyout, "Te");	/* U+13D6 CHEROKEE LETTER TE  */
\341\217\227	fprintf (yyout, "Di");	/* U+13D7 CHEROKEE LETTER DI  */
\341\217\230	fprintf (yyout, "Ti");	/* U+13D8 CHEROKEE LETTER TI  */
\341\217\231	fprintf (yyout, "Do");	/* U+13D9 CHEROKEE LETTER DO  */
\341\217\232	fprintf (yyout, "Du");	/* U+13DA CHEROKEE LETTER DU  */
\341\217\233	fprintf (yyout, "Dv");	/* U+13DB CHEROKEE LETTER DV  */
\341\217\234	fprintf (yyout, "Dla");	/* U+13DC CHEROKEE LETTER DLA */
\341\217\235	fprintf (yyout, "Tla");	/* U+13DD CHEROKEE LETTER TLA */
\341\217\236	fprintf (yyout, "Tle");	/* U+13DE CHEROKEE LETTER TLE */
\341\217\237	fprintf (yyout, "Tli");	/* U+13DF CHEROKEE LETTER TLI */
\341\217\240	fprintf (yyout, "Tlo");	/* U+13E0 CHEROKEE LETTER TLO */
\341\217\241	fprintf (yyout, "Tlu");	/* U+13E1 CHEROKEE LETTER TLU */
\341\217\242	fprintf (yyout, "Tlv");	/* U+13E2 CHEROKEE LETTER TLV */
\341\217\243	fprintf (yyout, "Tsa");	/* U+13E3 CHEROKEE LETTER TSA */
\341\217\244	fprintf (yyout, "Tse");	/* U+13E4 CHEROKEE LETTER TSE */
\341\217\245	fprintf (yyout, "Tsi");	/* U+13E5 CHEROKEE LETTER TSI */
\341\217\246	fprintf (yyout, "Tso");	/* U+13E6 CHEROKEE LETTER TSO */
\341\217\247	fprintf (yyout, "Tsu");	/* U+13E7 CHEROKEE LETTER TSU */
\341\217\250	fprintf (yyout, "Tsv");	/* U+13E8 CHEROKEE LETTER TSV */
\341\217\251	fprintf (yyout, "Wa");	/* U+13E9 CHEROKEE LETTER WA  */
\341\217\252	fprintf (yyout, "We");	/* U+13EA CHEROKEE LETTER WE  */
\341\217\253	fprintf (yyout, "Wi");	/* U+13EB CHEROKEE LETTER WI  */
\341\217\254	fprintf (yyout, "Wo");	/* U+13EC CHEROKEE LETTER WO  */
\341\217\255	fprintf (yyout, "Wu");	/* U+13ED CHEROKEE LETTER WU  */
\341\217\256	fprintf (yyout, "Wv");	/* U+13EE CHEROKEE LETTER WV  */
\341\217\257	fprintf (yyout, "Ya");	/* U+13EF CHEROKEE LETTER YA  */
\341\217\260	fprintf (yyout, "Ye");	/* U+13F0 CHEROKEE LETTER YE  */
\341\217\261	fprintf (yyout, "Yi");	/* U+13F1 CHEROKEE LETTER YI  */
\341\217\262	fprintf (yyout, "Yo");	/* U+13F2 CHEROKEE LETTER YO  */
\341\217\263	fprintf (yyout, "Yu");	/* U+13F3 CHEROKEE LETTER YU  */
\341\217\264	fprintf (yyout, "Yv");	/* U+13F4 CHEROKEE LETTER YV  */
\341\217\265	fprintf (yyout, "Mv");	/* U+13F5 CHEROKEE LETTER MV  */
\341\217\270	fprintf (yyout, "ye");	/* U+13F8 CHEROKEE SMALL LETTER YE  */
\341\217\271	fprintf (yyout, "yi");	/* U+13F9 CHEROKEE SMALL LETTER YI  */
\341\217\272	fprintf (yyout, "yo");	/* U+13FA CHEROKEE SMALL LETTER YO  */
\341\217\273	fprintf (yyout, "yu");	/* U+13FB CHEROKEE SMALL LETTER YU  */
\341\217\274	fprintf (yyout, "yv");	/* U+13FC CHEROKEE SMALL LETTER YV  */
\341\217\275	fprintf (yyout, "mv");	/* U+13FD CHEROKEE SMALL LETTER MV  */
\352\255\260	fprintf (yyout, "a");	/* U+AB70 CHEROKEE SMALL LETTER A   */
\352\255\261	fprintf (yyout, "e");	/* U+AB71 CHEROKEE SMALL LETTER E   */
\352\255\262	fprintf (yyout, "i");	/* U+AB72 CHEROKEE SMALL LETTER I   */
\352\255\263	fprintf (yyout, "o");	/* U+AB73 CHEROKEE SMALL LETTER O   */
\352\255\264	fprintf (yyout, "u");	/* U+AB74 CHEROKEE SMALL LETTER U   */
\352\255\265	fprintf (yyout, "v");	/* U+AB75 CHEROKEE SMALL LETTER V   */
\352\255\266	fprintf (yyout, "ga");	/* U+AB76 CHEROKEE SMALL LETTER GA  */
\352\255\267	fprintf (yyout, "ka");	/* U+AB77 CHEROKEE SMALL LETTER KA  */
\352\255\270	fprintf (yyout, "ge");	/* U+AB78 CHEROKEE SMALL LETTER GE  */
\352\255\271	fprintf (yyout, "gi");	/* U+AB79 CHEROKEE SMALL LETTER GI  */
\352\255\272	fprintf (yyout, "go");	/* U+AB7A CHEROKEE SMALL LETTER GO  */
\352\255\273	fprintf (yyout, "gu");	/* U+AB7B CHEROKEE SMALL LETTER GU  */
\352\255\274	fprintf (yyout, "gv");	/* U+AB7C CHEROKEE SMALL LETTER GV  */
\352\255\275	fprintf (yyout, "ha");	/* U+AB7D CHEROKEE SMALL LETTER HA  */
\352\255\276	fprintf (yyout, "he");	/* U+AB7E CHEROKEE SMALL LETTER HE  */
\352\255\277	fprintf (yyout, "hi");	/* U+AB7F CHEROKEE SMALL LETTER HI  */
\352\256\200	fprintf (yyout, "ho");	/* U+AB80 CHEROKEE SMALL LETTER HO  */
\352\256\201	fprintf (yyout, "hu");	/* U+AB81 CHEROKEE SMALL LETTER HU  */
\352\256\202	fprintf (yyout, "hv");	/* U+AB82 CHEROKEE SMALL LETTER HV  */
\352\256\203	fprintf (yyout, "la");	/* U+AB83 CHEROKEE SMALL LETTER LA  */
\352\256\204	fprintf (yyout, "le");	/* U+AB84 CHEROKEE SMALL LETTER LE  */
\352\256\205	fprintf (yyout, "li");	/* U+AB85 CHEROKEE SMALL LETTER LI  */
\352\256\206	fprintf (yyout, "lo");	/* U+AB86 CHEROKEE SMALL LETTER LO  */
\352\256\207	fprintf (yyout, "lu");	/* U+AB87 CHEROKEE SMALL LETTER LU  */
\352\256\210	fprintf (yyout, "lv");	/* U+AB88 CHEROKEE SMALL LETTER LV  */
\352\256\211	fprintf (yyout, "ma");	/* U+AB89 CHEROKEE SMALL LETTER MA  */
\352\256\212	fprintf (yyout, "me");	/* U+AB8A CHEROKEE SMALL LETTER ME  */
\352\256\213	fprintf (yyout, "mi");	/* U+AB8B CHEROKEE SMALL LETTER MI  */
\352\256\214	fprintf (yyout, "mo");	/* U+AB8C CHEROKEE SMALL LETTER MO  */
\352\256\215	fprintf (yyout, "mu");	/* U+AB8D CHEROKEE SMALL LETTER MU  */
\352\256\216	fprintf (yyout, "na");	/* U+AB8E CHEROKEE SMALL LETTER NA  */
\352\256\217	fprintf (yyout, "hna");	/* U+AB8F CHEROKEE SMALL LETTER HNA */
\352\256\220	fprintf (yyout, "nah");	/* U+AB90 CHEROKEE SMALL LETTER NAH */
\352\256\221	fprintf (yyout, "ne");	/* U+AB91 CHEROKEE SMALL LETTER NE  */
\352\256\222	fprintf (yyout, "ni");	/* U+AB92 CHEROKEE SMALL LETTER NI  */
\352\256\223	fprintf (yyout, "no");	/* U+AB93 CHEROKEE SMALL LETTER NO  */
\352\256\224	fprintf (yyout, "nu");	/* U+AB94 CHEROKEE SMALL LETTER NU  */
\352\256\225	fprintf (yyout, "nv");	/* U+AB95 CHEROKEE SMALL LETTER NV  */
\352\256\226	fprintf (yyout, "qua");	/* U+AB96 CHEROKEE SMALL LETTER QUA */
\352\256\227	fprintf (yyout, "que");	/* U+AB97 CHEROKEE SMALL LETTER QUE */
\352\256\230	fprintf (yyout, "qui");	/* U+AB98 CHEROKEE SMALL LETTER QUI */
\352\256\231	fprintf (yyout, "quo");	/* U+AB99 CHEROKEE SMALL LETTER QUO */
\352\256\232	fprintf (yyout, "quu");	/* U+AB9A CHEROKEE SMALL LETTER QUU */
\352\256\233	fprintf (yyout, "quv");	/* U+AB9B CHEROKEE SMALL LETTER QUV */
\352\256\234	fprintf (yyout, "sa");	/* U+AB9C CHEROKEE SMALL LETTER SA  */
\352\256\235	fprintf (yyout, "s");	/* U+AB9D CHEROKEE SMALL LETTER S   */
\352\256\236	fprintf (yyout, "se");	/* U+AB9E CHEROKEE SMALL LETTER SE  */
\352\256\237	fprintf (yyout, "si");	/* U+AB9F CHEROKEE SMALL LETTER SI  */
\352\256\240	fprintf (yyout, "so");	/* U+ABA0 CHEROKEE SMALL LETTER SO  */
\352\256\241	fprintf (yyout, "su");	/* U+ABA1 CHEROKEE SMALL LETTER SU  */
\352\256\242	fprintf (yyout, "sv");	/* U+ABA2 CHEROKEE SMALL LETTER SV  */
\352\256\243	fprintf (yyout, "da");	/* U+ABA3 CHEROKEE SMALL LETTER DA  */
\352\256\244	fprintf (yyout, "ta");	/* U+ABA4 CHEROKEE SMALL LETTER TA  */
\352\256\245	fprintf (yyout, "de");	/* U+ABA5 CHEROKEE SMALL LETTER DE  */
\352\256\246	fprintf (yyout, "te");	/* U+ABA6 CHEROKEE SMALL LETTER TE  */
\352\256\247	fprintf (yyout, "di");	/* U+ABA7 CHEROKEE SMALL LETTER DI  */
\352\256\250	fprintf (yyout, "ti");	/* U+ABA8 CHEROKEE SMALL LETTER TI  */
\352\256\251	fprintf (yyout, "do");	/* U+ABA9 CHEROKEE SMALL LETTER DO  */
\352\256\252	fprintf (yyout, "du");	/* U+ABAA CHEROKEE SMALL LETTER DU  */
\352\256\253	fprintf (yyout, "dv");	/* U+ABAB CHEROKEE SMALL LETTER DV  */
\352\256\254	fprintf (yyout, "dla");	/* U+ABAC CHEROKEE SMALL LETTER DLA */
\352\256\255	fprintf (yyout, "tla");	/* U+ABAD CHEROKEE SMALL LETTER TLA */
\352\256\256	fprintf (yyout, "tle");	/* U+ABAE CHEROKEE SMALL LETTER TLE */
\352\256\257	fprintf (yyout, "tli");	/* U+ABAF CHEROKEE SMALL LETTER TLI */
\352\256\260	fprintf (yyout, "tlo");	/* U+ABB0 CHEROKEE SMALL LETTER TLO */
\352\256\261	fprintf (yyout, "tlu");	/* U+ABB1 CHEROKEE SMALL LETTER TLU */
\352\256\262	fprintf (yyout, "tlv");	/* U+ABB2 CHEROKEE SMALL LETTER TLV */
\352\256\263	fprintf (yyout, "tsa");	/* U+ABB3 CHEROKEE SMALL LETTER TSA */
\352\256\264	fprintf (yyout, "tse");	/* U+ABB4 CHEROKEE SMALL LETTER TSE */
\352\256\265	fprintf (yyout, "tsi");	/* U+ABB5 CHEROKEE SMALL LETTER TSI */
\352\256\266	fprintf (yyout, "tso");	/* U+ABB6 CHEROKEE SMALL LETTER TSO */
\352\256\267	fprintf (yyout, "tsu");	/* U+ABB7 CHEROKEE SMALL LETTER TSU */
\352\256\270	fprintf (yyout, "tsv");	/* U+ABB8 CHEROKEE SMALL LETTER TSV */
\352\256\271	fprintf (yyout, "wa");	/* U+ABB9 CHEROKEE SMALL LETTER WA  */
\352\256\272	fprintf (yyout, "we");	/* U+ABBA CHEROKEE SMALL LETTER WE  */
\352\256\273	fprintf (yyout, "wi");	/* U+ABBB CHEROKEE SMALL LETTER WI  */
\352\256\274	fprintf (yyout, "wo");	/* U+ABBC CHEROKEE SMALL LETTER WO  */
\352\256\275	fprintf (yyout, "wu");	/* U+ABBD CHEROKEE SMALL LETTER WU  */
\352\256\276	fprintf (yyout, "wv");	/* U+ABBE CHEROKEE SMALL LETTER WV  */
\352\256\277	fprintf (yyout, "ya");	/* U+ABBF CHEROKEE SMALL LETTER YA  */
%%

int
main (int argc, char *argv[])
{
   int i;            /* loop variable       */
   int exit_status;  /* program exit status */

   void print_help (char *);

   exit_status = EXIT_SUCCESS;
   yyin  = stdin;
   yyout = stdout;

   for (i = 1; i < argc; i++) {
      /*
         Parse options.  If an invalid command line argument
         was given, print a help menu and exit with error status.
      */
      if (argv[i][0] == '-') {
         switch (argv[i][1]) {
                      /* Check for "--version" */
            case '-': if (strncmp (argv[i], "--version", 9) == 0) {
                         printf ("sequoyah Version %s\n", VERSION);
                         printf ("Copyright (C) 2003, 2004 Paul Hardy\n");
                         exit (EXIT_SUCCESS);
                      }
                      /* Begin output with Byte Order Mark, U+FFFE */
            case 'b': bom_out = 1;
                      break;
                      /*
                         input file format; file name follows
                         in next parameter, so increment i
                      */
            case 'i': yyin  = fopen (argv[++i], "r");
                      break;
                      /*
                         output file format; file name follows
                         in next parameter, so increment i
                      */
            case 'o': yyout = fopen (argv[++i], "w");
                      break;
                      /* Check for "-v" */
            case 'v': printf ("sequoyah Version %s\n", VERSION);
                      exit (EXIT_SUCCESS);
                      /* quote mark style for open & close quotes */
            default:  print_help (argv[0]);
                      exit_status = EXIT_FAILURE;
                      break;
         }
      }
      else {
         print_help (argv[0]);
         exit_status = EXIT_FAILURE;
      }
   }

   if (exit_status == EXIT_SUCCESS) {
      if (bom_out != 0) {
         print_utf8 (0xFFFE); /* Unicode Byte Order Mark */
      }

      yylex ();
   }

   exit (exit_status);
}


/*
   Print a help message.  The parameter is the program name,
   taken from argv[0].
*/
void
print_help (char * progname)
{

   fprintf (stderr, "\nUnknown command line parameter.\n\n");
   fprintf (stderr, "Syntax: %s [-b] [-i input_file] [-o output_file]\n\n",
            progname);
   fprintf (stderr, "    -b: begin output with UTF-8 Byte Order Mark\n\n");
   fprintf (stderr, "    -i: specify input file name\n\n");
   fprintf (stderr, "    -o: specify output file name\n\n");

   return;
}


/*
   Print a pattern that was read as ASCII if in Latin mode.
   Otherwise, print the UTF-8 code point.
*/
void
print_pattern (char *intext, uint32_t codept)
{

   void print_ascii (char *);
   void print_utf8  (uint32_t);

   if (lang_type == LANG_LATIN)
      print_ascii (intext);
   else
      print_utf8 (codept);

   return;
}


/*
   Print an ASCII sequence that appeared inside braces, '{'...'}'.
*/
void
print_ascii (char *intext)
{
   int i, j, k;  /* loop variables */
   char unicode_string[7]; /* up to six hexadecimal digits, null-terminated */

   for (i = 0; intext[i] != '\0'; i++) {
      /*
         Scan for a backslash, looking for an escape sequence.
         At present, the only recognized escape sequence is "\u"
         to represent a Unicode hexadecimal code point of the
         form "\uX...X", where "X...X" is a string of one to six
         hexadecimal digits that specify a valid Unicode code point.
      */
      for (j = i;
           intext[j] != '\0' && intext[j] != '\\';
           j++);

      if (intext [j] == '\0') {  /* this is probably the most frequent case */
         fprintf (yyout, "%s", &intext [i]);
         i = j-1;  /* so the outer i loop will terminate */
      }
      /*
         Found a backslash, so look for a following 'u'.
      */
      else if (intext [j+1] == 'u') {
         /* print the string up to but not including the backslash */
         intext[j] = '\0';
         fprintf (yyout, "%s", &intext[i]);
         i = j + 2; /* i points to first digit in Unicode code point */
         unicode_string [0] = '\0';  /* start building the Unicode code point string */
         /*
            scan to end of hexadecimal digits, up to six digits
         */
         for (k = 0;
              k < 6 &&  /* allow up to six hexadecimal digits */
              (isdigit (intext [i]) ||
               (intext [i] >= 'A' && intext [i] <= 'F') ||
               (intext [i] >= 'a' && intext [i] <= 'f'));
              k++) {

            unicode_string [k]     = intext[i];
            unicode_string [k + 1] = '\0';  /* make sure string stays null-terminated */
            i++;
         }
         print_unicode (unicode_string);
         /* intext [i] points to the remainder of the input string */
         i--;  /* it will be incremented again next i loop iteration */
      }  /* intext [j+1] == 'u' */
      /*
         Otherwise, this was not a recognized '\' sequence,
         so print string up to the backslash and keep going.
      */
      else {
         intext [j] = '\0';  /* replace '\\' with null to print up to this location */
         fprintf (yyout, "%s\\", &intext [i]);
         i = j;  /* keep scanning intext[i] until the end is reached */
      }
   }

   return;
}


/*
   Print a Unicode code point in the form "X...X",
   where "X...X" is a string of one to six hexadeimcal
   digits that describe a valid Unicode code point.
*/
void
print_unicode (char *intext)
{
   int i;  /* loop variable */
   uint32_t this_digit;   /* current ASCII hexadecimal digit being converted */
   uint32_t codept;  /* the Unicode code point to output */

   void print_utf8 (uint32_t);

   codept = 0;
   for (i = 0; intext[i] != '\0'; i++) {
      codept <<= 4;  /* shift one hexadecimal digit to the left */
      this_digit = intext[i];
      if (this_digit >= 'a')
         this_digit = this_digit - 'a' + 10;
      else if (this_digit >= 'A')
         this_digit = this_digit - 'A' + 10;
      else
         this_digit -= '0';

      codept |= this_digit;
   }  /* for i */

   print_utf8 (codept);

   return;
}


/*
   Print an open or close quote dependent on language mode.

        intext  character string starting with '"'
                or "`" or "'".
*/
void
print_quote (char *intext)
{

   void print_utf8 (uint32_t);

   /* Double qoute, the most common case */
   if (intext[0] == '"') {
      if (quote_state[doubleq_style] == 0) {  /* print opening quote */
         print_utf8 (quote_open[doubleq_style]);
         quote_state[doubleq_style] = 1;  /* now entering a quote style */
      }
      else {                          /* print closing quote */
         print_utf8 (quote_close[doubleq_style]);
         quote_state[doubleq_style] = 0;  /* now leaving a quote style */
      }
   }
   else { /* open ("`") or close ("'") single quote */
      if (intext[0] == '`') { /* open quote */
         if (singleq_style == 0)      /* Latin  */
            print_utf8 (0x02BB);
         else if (singleq_style == 4) /* Hebrew */
            print_utf8 (0x201A);
         else                         /* Greek, Coptic, or Demotic */
            print_utf8 (0x02BB);
      }
      else { /* close quote, "'" */
         if (singleq_style == 0)      /* Latin  */
            print_utf8 (0x02BC);
         else if (singleq_style == 4) /* Hebrew */
            print_utf8 (0x2018);
         else                         /* Greek, Coptic, or Demotic */
            print_utf8 (0x02BC);
      }
   }

   return;
}


/*
   Print Greek, Latin, Coptic, or Hebrew capital letter.

   This is passed yytext, so the first character in the
   input string is a '*'; skip over it for indexing.
*/
void
print_capital (char *intext)
{
   int test_char;  /* character to test */

   test_char = intext[1] & 0x7F;

   switch (lang_type) {
      case LANG_GREEK:
           /* First check for Greek varia (grave accent) on vowel */
           if (intext[2] == '\\') { /* intext[2] should either be '\\' or '\0' */
              test_char = tolower (test_char);
              switch (test_char) {
                 case 'a':
                      print_utf8 (0x1FBA);  /* GREEK CAPITAL LETTER ALPHA WITH VARIA   */
                      break;
                 case 'e':
                      print_utf8 (0x1FC8);  /* GREEK CAPITAL LETTER EPSILON WITH VARIA */
                      break;
                 case 'h':
                      print_utf8 (0x1FCA);  /* GREEK CAPITAL LETTER ETA WITH VARIA     */
                      break;
                 case 'i':
                      print_utf8 (0x1FDA);  /* GREEK CAPITAL LETTER IOTA WITH VARIA    */
                      break;
                 case 'o':
                      print_utf8 (0x1FEA);  /* GREEK CAPITAL LETTER UPSILON WITH VARIA */
                      break;
                 case 'u':
                      print_utf8 (0x1FF8);  /* GREEK CAPITAL LETTER OMICRON WITH VARIA */
                      break;
                 case 'w':
                      print_utf8 (0x1FFA);  /* GREEK CAPITAL LETTER OMEGA WITH VARIA   */
                      break;
                 default:
                      fprintf (yyout, "%s", intext);  /* unexpected combination */
                      break;
              }
           }
           else {
              /*
                 ascii2greek_cap contains Unicode encodings for
                 capital Greek letters.
              */
              print_utf8 (ascii2greek_capital[test_char]);
           }
           break;
      case LANG_COPTIC:
           print_utf8 (ascii2coptic[test_char]);
           /* Now check for Coptic jinkim (grave accent) on letter */
           if (intext[2] == '\\')
              print_utf8 (0x0300);  /* COMBINING GRAVE ACCENT */
           break;
      case LANG_HEBREW: /* Hebrew Beta Code doesn't use '*'; we should not reach this point */
           break;
      case LANG_LATIN:
           fprintf (yyout, "%s", intext);
           break;
      default:
           break;
   }

   return;
}


/*
   Print Greek, Latin, Coptic, or Hebrew small letter.
*/
void
print_small (char *intext)
{
   int test_char;   /* character to test */
   int letter_form; /* =1 if letter is the final form, 2 if not; for Hebrew */

   test_char = intext[0] & 0x7F;

   switch (lang_type) {
      case LANG_GREEK:
           /* First check for varia (grave accent) on vowel */
           if (intext[1] == '\\') {  /* intext[1] should either be '\\' or '\0' */
              test_char = tolower (test_char);
              switch (test_char) {
                 case 'a':
                      print_utf8 (0x1F70);  /* GREEK SMALL LETTER ALPHA WITH VARIA   */
                      break;
                 case 'e':
                      print_utf8 (0x1F72);  /* GREEK SMALL LETTER EPSILON WITH VARIA */
                      break;
                 case 'h':
                      print_utf8 (0x1F74);  /* GREEK SMALL LETTER ETA WITH VARIA     */
                      break;
                 case 'i':
                      print_utf8 (0x1F76);  /* GREEK SMALL LETTER IOTA WITH VARIA    */
                      break;
                 case 'o':
                      print_utf8 (0x1F78);  /* GREEK SMALL LETTER OMICRON WITH VARIA */
                      break;
                 case 'u':
                      print_utf8 (0x1F7A);  /* GREEK SMALL LETTER UPSILON WITH VARIA */
                      break;
                 case 'w':
                      print_utf8 (0x1F7C);  /* GREEK SMALL LETTER OMEGA WITH VARIA   */
                      break;
                 default:
                      fprintf (yyout, "%s", intext);  /* unexpected combination */
                      break;
              }
           }
           else {
              print_utf8 (ascii2greek_small[test_char]);
           }
           break;
      case LANG_COPTIC:
           /*
              Small Coptic letters are one code point above
              the corresponding capital letter contained in
              the ascii2coptic array, so add one for print_utf8.
           */
           print_utf8 (ascii2coptic[test_char] + 1);
           if (intext[1] == '\\')
              print_utf8 (0x0300);  /* COMBINING GRAVE ACCENT */
           break;
      case LANG_HEBREW:
           test_char = intext[0];
           /*
              If this is a letter that has middle and final forms,
              look at next character for the digit '1' (final form)
              or '2' (middle form).
           */
           if (test_char == 'k' || test_char == 'm' || test_char == 'n' ||
               test_char == 'p' || test_char == 'T') {
              letter_form = yytext[1];
              if (letter_form == '2') {
                 switch (test_char) {
                    case 'k':
                         print_utf8 (0x5DA); /* HEBREW LETTER FINAL KAF   */
                         break;
                    case 'm':
                         print_utf8 (0x5DD); /* HEBREW LETTER FINAL MEM   */
                         break;
                    case 'n':
                         print_utf8 (0x5DF); /* HEBREW LETTER FINAL NUN   */
                         break;
                    case 'p':
                         print_utf8 (0x5E3); /* HEBREW LETTER FINAL PE    */
                         break;
                    case 'T':
                         print_utf8 (0x5E5); /* HEBREW LETTER FINAL TSADI */
                         break;
                    default:
                         fprintf (yyout, "%s", intext);
                         break;
                 }
              }
              else {  /* a '2' was not the next character, so not final form */
                 /*
                    Print the middle form of the letter, even if
                    it was not given correctly with a '1' appended.
                 */
                 switch (test_char) {
                    case 'k':
                         print_utf8 (0x5DB); /* HEBREW LETTER KAF   */
                         break;
                    case 'm':
                         print_utf8 (0x5DE); /* HEBREW LETTER MEM   */
                         break;
                    case 'n':
                         print_utf8 (0x5E0); /* HEBREW LETTER NUN   */
                         break;
                    case 'p':
                         print_utf8 (0x5E4); /* HEBREW LETTER PE    */
                         break;
                    case 'T':
                         print_utf8 (0x5E6); /* HEBREW LETTER TSADI */
                         break;
                    default:
                         fprintf (yyout, "%s", intext);
                         break;
                 }
              }
           }
           else {  /* it's a Hebrew letter that only has one form */
              print_utf8 (ascii2hebrew[test_char]);
           }
           break;
      case LANG_LATIN:
           fprintf (yyout, "%s", intext);
           break;
      default:
           break;
   }

   return;
}


/*
   Print one of four letter choices depending on whether the
   language mode is Latin, Greek, Coptic, or Hebrew, respectively.
*/
void
print_letter (uint32_t latin,  uint32_t greek,
              uint32_t coptic, uint32_t hebrew)
{

   switch (lang_type) {
      case LANG_LATIN:
           print_utf8 (latin);
           break;
      case LANG_GREEK:
           print_utf8 (greek);
           break;
      case LANG_COPTIC:
           print_utf8 (coptic);
           break;
      case LANG_HEBREW:
           print_utf8 (hebrew);
           break;
      default:
           print_utf8 (greek);
           break;
   }

   return;
}


/*
   Convert a UTF-32 code point to a UTF-8 string.
*/
void
print_utf8 (uint32_t codept)
{
   int i;              /* loop variable                              */
   int bin_length;     /* number of binary digits, for forming UTF-8 */
   int byte_length;    /* numberof bytes of UTF-8                    */
   char utf8_bytes[4]; /* temporary array of UTF-8 output bytes      */

   int bin_digits (uint32_t);

   byte_length = 0;

   /*
      If within valid 0x2039Unicode range of U+0000..U+10FFFF, proceed
   */
   if (codept <= 0x10FFFF) {
      bin_length = bin_digits (codept);
      if (bin_length < 8) {        /* U+0000..U+007F */
         byte_length = 1;
         utf8_bytes [0] = codept;
      }
      else if (bin_length < 12) {  /* U+0080..U+07FF */
         byte_length = 2;
         utf8_bytes [0] = 0xC0 | ((codept >>  6) & 0x1F);
         utf8_bytes [1] = 0x80 | ( codept        & 0x3F);
      }
      else if (bin_length < 17) {  /* U+0800..U+FFFF */
         byte_length = 3;
         utf8_bytes [0] = 0xE0 | ((codept >> 12) & 0x0F);
         utf8_bytes [1] = 0x80 | ((codept >>  6) & 0x3F);
         utf8_bytes [2] = 0x80 | ( codept        & 0x3F);
      }
      else if (bin_length < 22) {  /* U+010000..U+10FFFF */
         byte_length = 4;
         utf8_bytes [0] = 0xF0 | ((codept >> 18) & 0x07);
         utf8_bytes [1] = 0x80 | ((codept >> 12) & 0x3F);
         utf8_bytes [2] = 0x80 | ((codept >>  6) & 0x3F);
         utf8_bytes [3] = 0x80 | ( codept        & 0x3F);
      }
      else {
         fprintf (stderr,
                  "Internal error forming UTF-8 in print_utf8() for U+%04X\n",
                  codept);
      }

      for (i = 0; i < byte_length; i++) fputc (utf8_bytes [i], yyout);
   }
   else {
      fprintf (stderr,
               "print_utf8() called with illegal Unicode code point U+%06X\n",
               codept);
   }

   return;
}


/*
   Return the number of significant binary digits in an unsigned number.
*/
int
bin_digits (uint32_t itest)
{
   uint32_t i;
   int result;

   i = 0x80000000;  /* mask highest uint32_t bit */
   result = 32;
   while (  (i != 0) && ((itest & i) == 0) ) {
       i >>= 1;
       result--;
   }

   return result;
}

