GNU Unifont 15.1.04
Pan-Unicode font with complete Unicode Plane 0 coverage and partial coverage of higher planes
unigenwidth.c File Reference

unigenwidth - IEEE 1003.1-2008 setup to calculate wchar_t string widths More...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Include dependency graph for unigenwidth.c:

Go to the source code of this file.

Macros

#define MAXSTRING   256
 Maximum input line length - 1.
 
#define PIKTO_START   0x0F0E70
 Start of Pikto code point range.
 
#define PIKTO_END   0x0F11EF
 End of Pikto code point range.
 
#define PIKTO_SIZE   (PIKTO_END - PIKTO_START + 1)
 

Functions

int main (int argc, char **argv)
 The main function.
 

Detailed Description

unigenwidth - IEEE 1003.1-2008 setup to calculate wchar_t string widths

Author
Paul Hardy.

All glyphs are treated as 16 pixels high, and can be 8, 16, 24, or 32 pixels wide (resulting in widths of 1, 2, 3, or 4, respectively).

Definition in file unigenwidth.c.

Macro Definition Documentation

◆ MAXSTRING

#define MAXSTRING   256

Maximum input line length - 1.

Definition at line 46 of file unigenwidth.c.

◆ PIKTO_END

#define PIKTO_END   0x0F11EF

End of Pikto code point range.

Definition at line 50 of file unigenwidth.c.

◆ PIKTO_SIZE

#define PIKTO_SIZE   (PIKTO_END - PIKTO_START + 1)

Number of code points in Pikto range.

Definition at line 52 of file unigenwidth.c.

◆ PIKTO_START

#define PIKTO_START   0x0F0E70

Start of Pikto code point range.

Definition at line 49 of file unigenwidth.c.

Function Documentation

◆ main()

int main ( int  argc,
char **  argv 
)

The main function.

Parameters
[in]argcThe count of command line arguments.
[in]argvPointer to array of command line arguments.
Returns
This program exits with status EXIT_SUCCESS.

Definition at line 63 of file unigenwidth.c.

64{
65
66 int i; /* loop variable */
67
68 char teststring[MAXSTRING];
69 int loc;
70 char *gstart;
71
72 char glyph_width[0x20000];
73 char pikto_width[PIKTO_SIZE];
74
75 FILE *infilefp;
76
77 if (argc != 3) {
78 fprintf (stderr, "\n\nUsage: %s <unifont.hex> <combining.txt>\n\n", argv[0]);
79 exit (EXIT_FAILURE);
80 }
81
82 /*
83 Read the collection of hex glyphs.
84 */
85 if ((infilefp = fopen (argv[1],"r")) == NULL) {
86 fprintf (stderr,"ERROR - hex input file %s not found.\n\n", argv[1]);
87 exit (EXIT_FAILURE);
88 }
89
90 /* Flag glyph as non-existent until found. */
91 memset (glyph_width, -1, 0x20000 * sizeof (char));
92 memset (pikto_width, -1, (PIKTO_SIZE) * sizeof (char));
93
94 teststring[MAXSTRING-1] = '\0';
95 while (fgets (teststring, MAXSTRING-1, infilefp) != NULL) {
96 sscanf (teststring, "%X:%*s", &loc);
97 if (loc < 0x20000) {
98 gstart = strchr (teststring,':') + 1;
99 /*
100 16 rows per glyph, 2 ASCII hexadecimal digits per byte,
101 so divide number of digits by 32 (shift right 5 bits).
102 */
103 glyph_width[loc] = (strlen (gstart) - 1) >> 5;
104 }
105 else if ((loc >= PIKTO_START) && (loc <= PIKTO_END)) {
106 gstart = strchr (teststring,':') + 1;
107 pikto_width[loc - PIKTO_START] = strlen (gstart) <= 34 ? 1 : 2;
108 }
109 }
110
111 fclose (infilefp);
112
113 /*
114 Now read the combining character code points. These have width of 0.
115 */
116 if ((infilefp = fopen (argv[2],"r")) == NULL) {
117 fprintf (stderr,"ERROR - combining characters file %s not found.\n\n", argv[2]);
118 exit (EXIT_FAILURE);
119 }
120
121 while (fgets (teststring, MAXSTRING-1, infilefp) != NULL) {
122 sscanf (teststring, "%X:%*s", &loc);
123 if (loc < 0x20000) glyph_width[loc] = 0;
124 }
125
126 fclose (infilefp);
127
128 /*
129 Code Points with Unusual Properties (Unicode Standard, Chapter 4).
130
131 As of Unifont 10.0.04, use the widths in the "*-nonprinting.hex"
132 files. If an application is smart enough to know how to handle
133 these special cases, it will not render the "nonprinting" glyph
134 and will treat the code point as being zero-width.
135 */
136// glyph_width[0]=0; /* NULL character */
137// for (i = 0x0001; i <= 0x001F; i++) glyph_width[i]=-1; /* Control Characters */
138// for (i = 0x007F; i <= 0x009F; i++) glyph_width[i]=-1; /* Control Characters */
139
140// glyph_width[0x034F]=0; /* combining grapheme joiner */
141// glyph_width[0x180B]=0; /* Mongolian free variation selector one */
142// glyph_width[0x180C]=0; /* Mongolian free variation selector two */
143// glyph_width[0x180D]=0; /* Mongolian free variation selector three */
144// glyph_width[0x180E]=0; /* Mongolian vowel separator */
145// glyph_width[0x200B]=0; /* zero width space */
146// glyph_width[0x200C]=0; /* zero width non-joiner */
147// glyph_width[0x200D]=0; /* zero width joiner */
148// glyph_width[0x200E]=0; /* left-to-right mark */
149// glyph_width[0x200F]=0; /* right-to-left mark */
150// glyph_width[0x202A]=0; /* left-to-right embedding */
151// glyph_width[0x202B]=0; /* right-to-left embedding */
152// glyph_width[0x202C]=0; /* pop directional formatting */
153// glyph_width[0x202D]=0; /* left-to-right override */
154// glyph_width[0x202E]=0; /* right-to-left override */
155// glyph_width[0x2060]=0; /* word joiner */
156// glyph_width[0x2061]=0; /* function application */
157// glyph_width[0x2062]=0; /* invisible times */
158// glyph_width[0x2063]=0; /* invisible separator */
159// glyph_width[0x2064]=0; /* invisible plus */
160// glyph_width[0x206A]=0; /* inhibit symmetric swapping */
161// glyph_width[0x206B]=0; /* activate symmetric swapping */
162// glyph_width[0x206C]=0; /* inhibit arabic form shaping */
163// glyph_width[0x206D]=0; /* activate arabic form shaping */
164// glyph_width[0x206E]=0; /* national digit shapes */
165// glyph_width[0x206F]=0; /* nominal digit shapes */
166
167// /* Variation Selector-1 to Variation Selector-16 */
168// for (i = 0xFE00; i <= 0xFE0F; i++) glyph_width[i] = 0;
169
170// glyph_width[0xFEFF]=0; /* zero width no-break space */
171// glyph_width[0xFFF9]=0; /* interlinear annotation anchor */
172// glyph_width[0xFFFA]=0; /* interlinear annotation separator */
173// glyph_width[0xFFFB]=0; /* interlinear annotation terminator */
174 /*
175 Let glyph widths represent 0xFFFC (object replacement character)
176 and 0xFFFD (replacement character).
177 */
178
179 /*
180 Hangul Jamo:
181
182 Leading Consonant (Choseong): leave spacing as is.
183
184 Hangul Choseong Filler (U+115F): set width to 2.
185
186 Hangul Jungseong Filler, Hangul Vowel (Jungseong), and
187 Final Consonant (Jongseong): set width to 0, because these
188 combine with the leading consonant as one composite syllabic
189 glyph. As of Unicode 5.2, the Hangul Jamo block (U+1100..U+11FF)
190 is completely filled.
191 */
192 // for (i = 0x1160; i <= 0x11FF; i++) glyph_width[i]=0; /* Vowels & Final Consonants */
193
194 /*
195 Private Use Area -- the width is undefined, but likely
196 to be 2 charcells wide either from a graphic glyph or
197 from a four-digit hexadecimal glyph representing the
198 code point. Therefore if any PUA glyph does not have
199 a non-zero width yet, assign it a default width of 2.
200 The Unicode Standard allows giving PUA characters
201 default property values; see for example The Unicode
202 Standard Version 5.0, p. 91. This same default is
203 used for higher plane PUA code points below.
204 */
205 // for (i = 0xE000; i <= 0xF8FF; i++) {
206 // if (glyph_width[i] == 0) glyph_width[i]=2;
207 // }
208
209 /*
210 <not a character>
211 */
212 for (i = 0xFDD0; i <= 0xFDEF; i++) glyph_width[i] = -1;
213 glyph_width[0xFFFE] = -1; /* Byte Order Mark */
214 glyph_width[0xFFFF] = -1; /* Byte Order Mark */
215
216 /* Surrogate Code Points */
217 for (i = 0xD800; i <= 0xDFFF; i++) glyph_width[i]=-1;
218
219 /* CJK Code Points */
220 for (i = 0x4E00; i <= 0x9FFF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
221 for (i = 0x3400; i <= 0x4DBF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
222 for (i = 0xF900; i <= 0xFAFF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
223
224 /*
225 Now generate the output file.
226 */
227 printf ("/*\n");
228 printf (" wcwidth and wcswidth functions, as per IEEE 1003.1-2008\n");
229 printf (" System Interfaces, pp. 2241 and 2251.\n\n");
230 printf (" Author: Paul Hardy, 2013\n\n");
231 printf (" Copyright (c) 2013 Paul Hardy\n\n");
232 printf (" LICENSE:\n");
233 printf ("\n");
234 printf (" This program is free software: you can redistribute it and/or modify\n");
235 printf (" it under the terms of the GNU General Public License as published by\n");
236 printf (" the Free Software Foundation, either version 2 of the License, or\n");
237 printf (" (at your option) any later version.\n");
238 printf ("\n");
239 printf (" This program is distributed in the hope that it will be useful,\n");
240 printf (" but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
241 printf (" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n");
242 printf (" GNU General Public License for more details.\n");
243 printf ("\n");
244 printf (" You should have received a copy of the GNU General Public License\n");
245 printf (" along with this program. If not, see <http://www.gnu.org/licenses/>.\n");
246 printf ("*/\n\n");
247
248 printf ("#include <wchar.h>\n\n");
249 printf ("/* Definitions for Pikto CSUR Private Use Area glyphs */\n");
250 printf ("#define PIKTO_START\t0x%06X\n", PIKTO_START);
251 printf ("#define PIKTO_END\t0x%06X\n", PIKTO_END);
252 printf ("#define PIKTO_SIZE\t(PIKTO_END - PIKTO_START + 1)\n");
253 printf ("\n\n");
254 printf ("/* wcwidth -- return charcell positions of one code point */\n");
255 printf ("inline int\nwcwidth (wchar_t wc)\n{\n");
256 printf (" return (wcswidth (&wc, 1));\n");
257 printf ("}\n");
258 printf ("\n\n");
259 printf ("int\nwcswidth (const wchar_t *pwcs, size_t n)\n{\n\n");
260 printf (" int i; /* loop variable */\n");
261 printf (" unsigned codept; /* Unicode code point of current character */\n");
262 printf (" unsigned plane; /* Unicode plane, 0x00..0x10 */\n");
263 printf (" unsigned lower17; /* lower 17 bits of Unicode code point */\n");
264 printf (" unsigned lower16; /* lower 16 bits of Unicode code point */\n");
265 printf (" int lowpt, midpt, highpt; /* for binary searching in plane1zeroes[] */\n");
266 printf (" int found; /* for binary searching in plane1zeroes[] */\n");
267 printf (" int totalwidth; /* total width of string, in charcells (1 or 2/glyph) */\n");
268 printf (" int illegalchar; /* Whether or not this code point is illegal */\n");
269 putchar ('\n');
270
271 /*
272 Print the glyph_width[] array for glyphs widths in the
273 Basic Multilingual Plane (Plane 0).
274 */
275 printf (" char glyph_width[0x20000] = {");
276 for (i = 0; i < 0x10000; i++) {
277 if ((i & 0x1F) == 0)
278 printf ("\n /* U+%04X */ ", i);
279 printf ("%d,", glyph_width[i]);
280 }
281 for (i = 0x10000; i < 0x20000; i++) {
282 if ((i & 0x1F) == 0)
283 printf ("\n /* U+%06X */ ", i);
284 printf ("%d", glyph_width[i]);
285 if (i < 0x1FFFF) putchar (',');
286 }
287 printf ("\n };\n\n");
288
289 /*
290 Print the pikto_width[] array for Pikto glyph widths.
291 */
292 printf (" char pikto_width[PIKTO_SIZE] = {");
293 for (i = 0; i < PIKTO_SIZE; i++) {
294 if ((i & 0x1F) == 0)
295 printf ("\n /* U+%06X */ ", PIKTO_START + i);
296 printf ("%d", pikto_width[i]);
297 if ((PIKTO_START + i) < PIKTO_END) putchar (',');
298 }
299 printf ("\n };\n\n");
300
301 /*
302 Execution part of wcswidth.
303 */
304 printf ("\n");
305 printf (" illegalchar = totalwidth = 0;\n");
306 printf (" for (i = 0; !illegalchar && i < n; i++) {\n");
307 printf (" codept = pwcs[i];\n");
308 printf (" plane = codept >> 16;\n");
309 printf (" lower17 = codept & 0x1FFFF;\n");
310 printf (" lower16 = codept & 0xFFFF;\n");
311 printf (" if (plane < 2) { /* the most common case */\n");
312 printf (" if (glyph_width[lower17] < 0) illegalchar = 1;\n");
313 printf (" else totalwidth += glyph_width[lower17];\n");
314 printf (" }\n");
315 printf (" else { /* a higher plane or beyond Unicode range */\n");
316 printf (" if ((lower16 == 0xFFFE) || (lower16 == 0xFFFF)) {\n");
317 printf (" illegalchar = 1;\n");
318 printf (" }\n");
319 printf (" else if (plane < 4) { /* Ideographic Plane */\n");
320 printf (" totalwidth += 2; /* Default ideographic width */\n");
321 printf (" }\n");
322 printf (" else if (plane == 0x0F) { /* CSUR Private Use Area */\n");
323 printf (" if (lower16 <= 0x0E6F) { /* Kinya */\n");
324 printf (" totalwidth++; /* all Kinya syllables have width 1 */\n");
325 printf (" }\n");
326 printf (" else if (lower16 <= (PIKTO_END & 0xFFFF)) { /* Pikto */\n");
327 printf (" if (pikto_width[lower16 - (PIKTO_START & 0xFFFF)] < 0) illegalchar = 1;\n");
328 printf (" else totalwidth += pikto_width[lower16 - (PIKTO_START & 0xFFFF)];\n");
329 printf (" }\n");
330 printf (" }\n");
331 printf (" else if (plane > 0x10) {\n");
332 printf (" illegalchar = 1;\n");
333 printf (" }\n");
334 printf (" /* Other non-printing in higher planes; return -1 as per IEEE 1003.1-2008. */\n");
335 printf (" else if (/* language tags */\n");
336 printf (" codept == 0x0E0001 || (codept >= 0x0E0020 && codept <= 0x0E007F) ||\n");
337 printf (" /* variation selectors, 0x0E0100..0x0E01EF */\n");
338 printf (" (codept >= 0x0E0100 && codept <= 0x0E01EF)) {\n");
339 printf (" illegalchar = 1;\n");
340 printf (" }\n");
341 printf (" /*\n");
342 printf (" Unicode plane 0x02..0x10 printing character\n");
343 printf (" */\n");
344 printf (" else {\n");
345 printf (" illegalchar = 1; /* code is not in font */\n");
346 printf (" }\n");
347 printf ("\n");
348 printf (" }\n");
349 printf (" }\n");
350 printf (" if (illegalchar) totalwidth = -1;\n");
351 printf ("\n");
352 printf (" return (totalwidth);\n");
353 printf ("\n");
354 printf ("}\n");
355
356 exit (EXIT_SUCCESS);
357}
#define MAXSTRING
Maximum input line length - 1.
Definition: unigenwidth.c:46
#define PIKTO_START
Start of Pikto code point range.
Definition: unigenwidth.c:49
#define PIKTO_SIZE
Definition: unigenwidth.c:52
#define PIKTO_END
End of Pikto code point range.
Definition: unigenwidth.c:50