AJA NTV2 SDK  17.0.1.1246
NTV2 SDK 17.0.1.1246
ntv2utf8.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: MIT */
8 #include <string.h>
9 #include <stdio.h>
10 #include "ntv2utf8.h"
11 
12 #if defined (NTV2_NUB_CLIENT_SUPPORT)
13 
14 static const char trailingBytesForUTF8[256] = {
15  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
16  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
17  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
18  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
19  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
20  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
21  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
22  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
23 };
24 
25 /* returns length of next utf-8 sequence */
26 static int u8_seqlen(const char *s)
27 {
28  return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1;
29 }
30 
31 
32 /* copy src to dest and truncate only on utf-8 boundaries */
33 void strncpyasutf8(char *dest, const char *src, int dest_buf_size)
34 {
35 
36  int bufleft = dest_buf_size -1;
37  if (!dest_buf_size)
38  return;
39 
40  memset(dest, 0, dest_buf_size);
41 
42  while (bufleft && *src)
43  {
44  // printf("\nbufleft=%d\n", bufleft);
45  int u8_len = u8_seqlen(src);
46  // printf("u8_len =%d\n", u8_len);
47 
48  if (bufleft < u8_len)
49  break;
50 
51  for(int j = 0; j < u8_len; j++)
52  {
53  // printf("%02x ", (unsigned int)(unsigned char)*src);
54  *dest++ = *src++;
55  }
56  bufleft -= u8_len;
57  }
58  // printf("\n\nexited loop with bufleft=%d\n", bufleft);
59 
60  // *dest = '\0'; /* Taken care of by memset/0 at entry */
61  // printf ("%02x\n", *dest);
62 }
63 
64 /* Convert UTF8 char to CodePage 437 char */
65 bool map_utf8_to_codepage437(const char *src, int u8_len, unsigned char *cp437equiv)
66 {
67  unsigned char first = *src++;
68  unsigned char second = *src++;
69 
70  *cp437equiv = 0;
71 
72  if (u8_len ==2)
73  {
74  if (first == 0xC6 && second == 0x92)
75  {
76  *cp437equiv = 0x9F; // LATIN SMALL LETTER F WITH HOOK
77  }
78  else
79  switch(first)
80  {
81  case 0xc2:
82  switch(second)
83  {
84  case 0xa2: *cp437equiv = 0x9B; break; // CENT SIGN
85  case 0xA3: *cp437equiv = 0x9C; break; // POUND SIGN
86  case 0xA5: *cp437equiv = 0x9D; break; // YEN SIGN
87 
88  case 0xAA: *cp437equiv = 0xA6; break; // FEMININE ORDINAL INDICATOR
89  case 0xBA: *cp437equiv = 0xA7; break; // MASCULINE ORDINAL INDICATOR
90  case 0xBF: *cp437equiv = 0xA8; break; // INVERTED QUESTION MARK
91 
92  case 0xAC: *cp437equiv = 0xAA; break; // NOT SIGN
93  case 0xBD: *cp437equiv = 0xAB; break; // VULGAR FRACTION ONE HALF
94  case 0xBC: *cp437equiv = 0xAC; break; // VULGAR FRACTION ONE QUARTER
95  case 0xA1: *cp437equiv = 0xAD; break; // INVERTED EXCLAMATION MARK
96  case 0xAB: *cp437equiv = 0xAE; break; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
97  case 0xBB: *cp437equiv = 0xAF; break; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
98 
99  case 0xB5: *cp437equiv = 0xE6; break; // MICRO SIGN
100 
101  case 0xB1: *cp437equiv = 0xF1; break; // PLUS-MINUS SIGN
102 
103  case 0xB0: *cp437equiv = 0xF8; break; // DEGREE SIGN
104 
105  case 0xB7: *cp437equiv = 0xFA; break; // MIDDLE DOT
106 
107  case 0xB2: *cp437equiv = 0xFD; break; // SUPERSCRIPT TWO
108  }
109  break;
110 
111  case 0xc3:
112  switch(second)
113  {
114  case 0x87: *cp437equiv = 0x80; break; // LATIN CAPITAL LETTER C WITH CEDILLA
115  case 0xBC: *cp437equiv = 0x81; break; // LATIN SMALL LETTER U WITH DIAERESIS
116  case 0xA9: *cp437equiv = 0x82; break; // LATIN SMALL LETTER E WITH ACUTE
117  case 0xA2: *cp437equiv = 0x83; break; // LATIN SMALL LETTER A WITH CIRCUMFLEX
118  case 0xA4: *cp437equiv = 0x84; break; // LATIN SMALL LETTER A WITH DIAERESIS
119  case 0xA0: *cp437equiv = 0x85; break; // LATIN SMALL LETTER A WITH GRAVE
120  case 0xA5: *cp437equiv = 0x86; break; // LATIN SMALL LETTER A WITH RING ABOVE
121  case 0xA7: *cp437equiv = 0x87; break; // LATIN SMALL LETTER C WITH CEDILLA
122  case 0xAA: *cp437equiv = 0x88; break; // LATIN SMALL LETTER E WITH CIRCUMFLEX
123  case 0xAB: *cp437equiv = 0x89; break; // LATIN SMALL LETTER E WITH DIAERESIS
124  case 0xA8: *cp437equiv = 0x8A; break; // LATIN SMALL LETTER E WITH GRAVE
125  case 0xAF: *cp437equiv = 0x8B; break; // LATIN SMALL LETTER I WITH DIAERESIS
126  case 0xAE: *cp437equiv = 0x8C; break; // LATIN SMALL LETTER I WITH CIRCUMFLEX
127  case 0xAC: *cp437equiv = 0x8D; break; // LATIN SMALL LETTER I WITH GRAVE
128  case 0x84: *cp437equiv = 0x8E; break; // LATIN CAPITAL LETTER A WITH DIAERESIS
129  case 0x85: *cp437equiv = 0x8F; break; // LATIN CAPITAL LETTER A WITH RING ABOVE
130  case 0x89: *cp437equiv = 0x90; break; // LATIN CAPITAL LETTER E WITH ACUTE
131  case 0xA6: *cp437equiv = 0x91; break; // LATIN SMALL LETTER AE
132  case 0x86: *cp437equiv = 0x92; break; // LATIN CAPITAL LETTER AE
133  case 0xB4: *cp437equiv = 0x93; break; // LATIN SMALL LETTER O WITH CIRCUMFLEX
134  case 0xB6: *cp437equiv = 0x94; break; // LATIN SMALL LETTER O WITH DIAERESIS
135  case 0xB2: *cp437equiv = 0x95; break; // LATIN SMALL LETTER O WITH GRAVE
136  case 0xBB: *cp437equiv = 0x96; break; // LATIN SMALL LETTER U WITH CIRCUMFLEX
137  case 0xB9: *cp437equiv = 0x97; break; // LATIN SMALL LETTER U WITH GRAVE
138  case 0xBF: *cp437equiv = 0x98; break; // LATIN SMALL LETTER Y WITH DIAERESIS
139  case 0x96: *cp437equiv = 0x99; break; // LATIN CAPITAL LETTER O WITH DIAERESIS
140  case 0x9C: *cp437equiv = 0x9A; break; // LATIN CAPITAL LETTER U WITH DIAERESIS
141 
142  case 0xA1: *cp437equiv = 0xA0; break; // LATIN SMALL LETTER A WITH ACUTE
143  case 0xAD: *cp437equiv = 0xA1; break; // LATIN SMALL LETTER I WITH ACUTE
144  case 0xB3: *cp437equiv = 0xA2; break; // LATIN SMALL LETTER O WITH ACUTE
145  case 0xBA: *cp437equiv = 0xA3; break; // LATIN SMALL LETTER U WITH ACUTE
146  case 0xB1: *cp437equiv = 0xA4; break; // LATIN SMALL LETTER N WITH TILDE
147  case 0x91: *cp437equiv = 0xA5; break; // LATIN CAPITAL LETTER N WITH TILDE
148 
149  case 0x9F: *cp437equiv = 0xE1; break; // LATIN SMALL LETTER SHARP S
150 
151  case 0xB7: *cp437equiv = 0xF6; break; // DIVISION SIGN
152  }
153  break;
154 
155  case 0xce:
156  switch(second)
157  {
158  case 0xB1: *cp437equiv = 0xE0; break; // GREEK SMALL LETTER ALPHA
159 
160  case 0x93: *cp437equiv = 0xE2; break; // GREEK CAPITAL LETTER GAMMA
161 
162  case 0xA3: *cp437equiv = 0xE4; break; // GREEK CAPITAL LETTER SIGMA
163 
164  case 0xA6: *cp437equiv = 0xE8; break; // GREEK CAPITAL LETTER PHI
165  case 0x98: *cp437equiv = 0xE9; break; // GREEK CAPITAL LETTER THETA
166  case 0xA9: *cp437equiv = 0xEA; break; // GREEK CAPITAL LETTER OMEGA
167  case 0xB4: *cp437equiv = 0xEB; break; // GREEK SMALL LETTER DELTA
168 
169  case 0xB5: *cp437equiv = 0xEE; break; // GREEK SMALL LETTER EPSILON
170  }
171  break;
172 
173  case 0xCF:
174  switch(second)
175  {
176  case 0x80: *cp437equiv = 0xE3; break; // GREEK SMALL LETTER PI
177 
178  case 0x83: *cp437equiv = 0xE5; break; // GREEK SMALL LETTER SIGMA
179 
180  case 0x84: *cp437equiv = 0xE7; break; // GREEK SMALL LETTER TAU
181 
182  case 0x86: *cp437equiv = 0xED; break; // GREEK SMALL LETTER PHI
183  }
184  break;
185 
186 
187  }
188  }
189  else if (u8_len == 3 && first == 0xE2)
190  {
191  unsigned char third = *src;
192  switch(second)
193  {
194  case 0x81:
195  if (third == 0xBF)
196  *cp437equiv = 0xFC; // SUPERSCRIPT LATIN SMALL LETTER N
197  break;
198 
199  case 0x82:
200  if (third == 0xa7)
201  *cp437equiv = 0x9e; // PESETA SIGN
202  break;
203 
204  case 0x8c:
205  switch(third)
206  {
207  case 0x90: *cp437equiv = 0xA9; break; // REVERSED NOT SIGN
208  case 0xA0: *cp437equiv = 0xF4; break; // TOP HALF INTEGRAL
209  case 0xA1: *cp437equiv = 0xF5; break; // BOTTOM HALF INTEGRAL
210  }
211  break;
212 
213  case 0x88:
214  switch(third)
215  {
216  case 0x9E: *cp437equiv = 0xEC; break; // INFINITY
217 
218  case 0xA9: *cp437equiv = 0xEF; break; // INTERSECTION
219 
220  case 0x99: *cp437equiv = 0xF9; break; // BULLET OPERATOR
221 
222  case 0x9A: *cp437equiv = 0xFB; break; // SQUARE ROOT
223  }
224  break;
225 
226  case 0x89:
227  switch(third)
228  {
229  case 0xA1: *cp437equiv = 0xF0; break; // IDENTICAL TO
230 
231  case 0xA5: *cp437equiv = 0xF2; break; // GREATER-THAN OR EQUAL TO
232  case 0xA4: *cp437equiv = 0xF3; break; // LESS-THAN OR EQUAL TO
233 
234  case 0x88: *cp437equiv = 0xF7; break; // ALMOST EQUAL TO
235  }
236  break;
237 
238 
239 
240  case 0x94:
241  switch(third)
242  {
243  case 0x82: *cp437equiv = 0xB3; break; // BOX DRAWINGS LIGHT VERTICAL
244  case 0xA4: *cp437equiv = 0xB4; break; // BOX DRAWINGS LIGHT VERTICAL AND LEFT
245 
246  case 0x90: *cp437equiv = 0xBF; break; // BOX DRAWINGS LIGHT DOWN AND LEFT
247  case 0x94: *cp437equiv = 0xC0; break; // BOX DRAWINGS LIGHT UP AND RIGHT
248  case 0xB4: *cp437equiv = 0xC1; break; // BOX DRAWINGS LIGHT UP AND HORIZONTAL
249  case 0xAC: *cp437equiv = 0xC2; break; // BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
250  case 0x9C: *cp437equiv = 0xC3; break; // BOX DRAWINGS LIGHT VERTICAL AND RIGHT
251  case 0x80: *cp437equiv = 0xC4; break; // BOX DRAWINGS LIGHT HORIZONTAL
252  case 0xBC: *cp437equiv = 0xC5; break; // BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
253 
254  case 0x98: *cp437equiv = 0xD9; break; // BOX DRAWINGS LIGHT UP AND LEFT
255  case 0x8C: *cp437equiv = 0xDA; break; // BOX DRAWINGS LIGHT DOWN AND RIGHT
256  }
257  break;
258 
259  case 0x95:
260  switch(third)
261  {
262  case 0xA1: *cp437equiv = 0xB5; break; // BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
263  case 0xA2: *cp437equiv = 0xB6; break; // BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
264  case 0x96: *cp437equiv = 0xB7; break; // BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
265  case 0x95: *cp437equiv = 0xB8; break; // BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
266  case 0xA3: *cp437equiv = 0xB9; break; // BOX DRAWINGS DOUBLE VERTICAL AND LEFT
267  case 0x91: *cp437equiv = 0xBA; break; // BOX DRAWINGS DOUBLE VERTICAL
268  case 0x97: *cp437equiv = 0xBB; break; // BOX DRAWINGS DOUBLE DOWN AND LEFT
269  case 0x9D: *cp437equiv = 0xBC; break; // BOX DRAWINGS DOUBLE UP AND LEFT
270  case 0x9C: *cp437equiv = 0xBD; break; // BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
271  case 0x9B: *cp437equiv = 0xBE; break; // BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
272 
273  case 0x9E: *cp437equiv = 0xC6; break; // BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
274  case 0x9F: *cp437equiv = 0xC7; break; // BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
275  case 0x9A: *cp437equiv = 0xC8; break; // BOX DRAWINGS DOUBLE UP AND RIGHT
276  case 0x94: *cp437equiv = 0xC9; break; // BOX DRAWINGS DOUBLE DOWN AND RIGHT
277  case 0xA9: *cp437equiv = 0xCA; break; // BOX DRAWINGS DOUBLE UP AND HORIZONTAL
278  case 0xA6: *cp437equiv = 0xCB; break; // BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
279  case 0xA0: *cp437equiv = 0xCC; break; // BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
280  case 0x90: *cp437equiv = 0xCD; break; // BOX DRAWINGS DOUBLE HORIZONTAL
281  case 0xAC: *cp437equiv = 0xCE; break; // BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
282  case 0xA7: *cp437equiv = 0xCF; break; // BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
283  case 0xA8: *cp437equiv = 0xD0; break; // BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
284  case 0xA4: *cp437equiv = 0xD1; break; // BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
285  case 0xA5: *cp437equiv = 0xD2; break; // BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
286  case 0x99: *cp437equiv = 0xD3; break; // BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
287  case 0x98: *cp437equiv = 0xD4; break; // BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
288  case 0x92: *cp437equiv = 0xD5; break; // BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
289  case 0x93: *cp437equiv = 0xD6; break; // BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
290  case 0xAB: *cp437equiv = 0xD7; break; // BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
291  case 0xAA: *cp437equiv = 0xD8; break; // BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
292  }
293  break;
294 
295 
296  case 0x96:
297  switch(third)
298  {
299  case 0x91: *cp437equiv = 0xB0; break; // LIGHT SHADE
300  case 0x92: *cp437equiv = 0xB1; break; // MEDIUM SHADE
301  case 0x93: *cp437equiv = 0xB2; break; // DARK SHADE
302 
303  case 0x88: *cp437equiv = 0xDB; break; // FULL BLOCK
304  case 0x84: *cp437equiv = 0xDC; break; // LOWER HALF BLOCK
305  case 0x8C: *cp437equiv = 0xDD; break; // LEFT HALF BLOCK
306  case 0x90: *cp437equiv = 0xDE; break; // RIGHT HALF BLOCK
307  case 0x80: *cp437equiv = 0xDF; break; // UPPER HALF BLOCK
308 
309  case 0xA0: *cp437equiv = 0xFE; break; // BLACK SQUARE
310  }
311  break;
312  }
313  }
314 
315  return (*cp437equiv != 0)? true : false;
316 }
317 
318 
319 /* Convert UTF8 string to CodePage 437 string. */
320 /* Used to display some common UTF8 characters on embedded displays with CP437 support. - STC */
321 void strncpyasutf8_map_cp437(char *dest, const char *src, int dest_buf_size)
322 {
323  int bufleft = dest_buf_size -1;
324  if (!dest_buf_size)
325  return;
326 
327  while (bufleft)
328  {
329  int u8_len = u8_seqlen(src);
330 
331  if (bufleft < u8_len)
332  break;
333 
334  unsigned char cp437equiv;
335  if (map_utf8_to_codepage437(src, u8_len, &cp437equiv))
336  {
337  src += u8_len;
338  *dest++ = cp437equiv;
339  // printf("%02x ", (unsigned char)cp437equiv);
340  bufleft--;
341  }
342  else
343  {
344  for(int j = 0; j < u8_len; j++)
345  {
346  // printf("%02x ", (unsigned char)*src);
347  *dest++ = *src++;
348  }
349  bufleft -= u8_len;
350  }
351  }
352  *dest = '\0';
353  // printf("%02x\n", *dest);
354 }
355 
356 #endif // defined (NTV2_NUB_CLIENT_SUPPORT)
trailingBytesForUTF8
static const char trailingBytesForUTF8[256]
Definition: ntv2utf8.cpp:14
map_utf8_to_codepage437
bool map_utf8_to_codepage437(const char *src, int u8_len, unsigned char *cp437equiv)
Definition: ntv2utf8.cpp:65
ntv2utf8.h
Declares the bare-bones UTF8 support functions (for the nub).
u8_seqlen
static int u8_seqlen(const char *s)
Definition: ntv2utf8.cpp:26
strncpyasutf8
void strncpyasutf8(char *dest, const char *src, int dest_buf_size)
Definition: ntv2utf8.cpp:33
strncpyasutf8_map_cp437
void strncpyasutf8_map_cp437(char *dest, const char *src, int dest_buf_size)
Definition: ntv2utf8.cpp:321
true
#define true
Definition: ntv2devicefeatures.h:26