27const std::array<Interval, 142> g_combining_characters = {
28 Interval{0x0300, 0x036F}, Interval{0x0483, 0x0486},
29 Interval{0x0488, 0x0489}, Interval{0x0591, 0x05BD},
30 Interval{0x05BF, 0x05BF}, Interval{0x05C1, 0x05C2},
31 Interval{0x05C4, 0x05C5}, Interval{0x05C7, 0x05C7},
32 Interval{0x0600, 0x0603}, Interval{0x0610, 0x0615},
33 Interval{0x064B, 0x065E}, Interval{0x0670, 0x0670},
34 Interval{0x06D6, 0x06E4}, Interval{0x06E7, 0x06E8},
35 Interval{0x06EA, 0x06ED}, Interval{0x070F, 0x070F},
36 Interval{0x0711, 0x0711}, Interval{0x0730, 0x074A},
37 Interval{0x07A6, 0x07B0}, Interval{0x07EB, 0x07F3},
38 Interval{0x0901, 0x0902}, Interval{0x093C, 0x093C},
39 Interval{0x0941, 0x0948}, Interval{0x094D, 0x094D},
40 Interval{0x0951, 0x0954}, Interval{0x0962, 0x0963},
41 Interval{0x0981, 0x0981}, Interval{0x09BC, 0x09BC},
42 Interval{0x09C1, 0x09C4}, Interval{0x09CD, 0x09CD},
43 Interval{0x09E2, 0x09E3}, Interval{0x0A01, 0x0A02},
44 Interval{0x0A3C, 0x0A3C}, Interval{0x0A41, 0x0A42},
45 Interval{0x0A47, 0x0A48}, Interval{0x0A4B, 0x0A4D},
46 Interval{0x0A70, 0x0A71}, Interval{0x0A81, 0x0A82},
47 Interval{0x0ABC, 0x0ABC}, Interval{0x0AC1, 0x0AC5},
48 Interval{0x0AC7, 0x0AC8}, Interval{0x0ACD, 0x0ACD},
49 Interval{0x0AE2, 0x0AE3}, Interval{0x0B01, 0x0B01},
50 Interval{0x0B3C, 0x0B3C}, Interval{0x0B3F, 0x0B3F},
51 Interval{0x0B41, 0x0B43}, Interval{0x0B4D, 0x0B4D},
52 Interval{0x0B56, 0x0B56}, Interval{0x0B82, 0x0B82},
53 Interval{0x0BC0, 0x0BC0}, Interval{0x0BCD, 0x0BCD},
54 Interval{0x0C3E, 0x0C40}, Interval{0x0C46, 0x0C48},
55 Interval{0x0C4A, 0x0C4D}, Interval{0x0C55, 0x0C56},
56 Interval{0x0CBC, 0x0CBC}, Interval{0x0CBF, 0x0CBF},
57 Interval{0x0CC6, 0x0CC6}, Interval{0x0CCC, 0x0CCD},
58 Interval{0x0CE2, 0x0CE3}, Interval{0x0D41, 0x0D43},
59 Interval{0x0D4D, 0x0D4D}, Interval{0x0DCA, 0x0DCA},
60 Interval{0x0DD2, 0x0DD4}, Interval{0x0DD6, 0x0DD6},
61 Interval{0x0E31, 0x0E31}, Interval{0x0E34, 0x0E3A},
62 Interval{0x0E47, 0x0E4E}, Interval{0x0EB1, 0x0EB1},
63 Interval{0x0EB4, 0x0EB9}, Interval{0x0EBB, 0x0EBC},
64 Interval{0x0EC8, 0x0ECD}, Interval{0x0F18, 0x0F19},
65 Interval{0x0F35, 0x0F35}, Interval{0x0F37, 0x0F37},
66 Interval{0x0F39, 0x0F39}, Interval{0x0F71, 0x0F7E},
67 Interval{0x0F80, 0x0F84}, Interval{0x0F86, 0x0F87},
68 Interval{0x0F90, 0x0F97}, Interval{0x0F99, 0x0FBC},
69 Interval{0x0FC6, 0x0FC6}, Interval{0x102D, 0x1030},
70 Interval{0x1032, 0x1032}, Interval{0x1036, 0x1037},
71 Interval{0x1039, 0x1039}, Interval{0x1058, 0x1059},
72 Interval{0x1160, 0x11FF}, Interval{0x135F, 0x135F},
73 Interval{0x1712, 0x1714}, Interval{0x1732, 0x1734},
74 Interval{0x1752, 0x1753}, Interval{0x1772, 0x1773},
75 Interval{0x17B4, 0x17B5}, Interval{0x17B7, 0x17BD},
76 Interval{0x17C6, 0x17C6}, Interval{0x17C9, 0x17D3},
77 Interval{0x17DD, 0x17DD}, Interval{0x180B, 0x180D},
78 Interval{0x18A9, 0x18A9}, Interval{0x1920, 0x1922},
79 Interval{0x1927, 0x1928}, Interval{0x1932, 0x1932},
80 Interval{0x1939, 0x193B}, Interval{0x1A17, 0x1A18},
81 Interval{0x1B00, 0x1B03}, Interval{0x1B34, 0x1B34},
82 Interval{0x1B36, 0x1B3A}, Interval{0x1B3C, 0x1B3C},
83 Interval{0x1B42, 0x1B42}, Interval{0x1B6B, 0x1B73},
84 Interval{0x1DC0, 0x1DCA}, Interval{0x1DFE, 0x1DFF},
85 Interval{0x200B, 0x200F}, Interval{0x202A, 0x202E},
86 Interval{0x2060, 0x2063}, Interval{0x206A, 0x206F},
87 Interval{0x20D0, 0x20EF}, Interval{0x302A, 0x302F},
88 Interval{0x3099, 0x309A}, Interval{0xA806, 0xA806},
89 Interval{0xA80B, 0xA80B}, Interval{0xA825, 0xA826},
90 Interval{0xFB1E, 0xFB1E}, Interval{0xFE00, 0xFE0F},
91 Interval{0xFE20, 0xFE23}, Interval{0xFEFF, 0xFEFF},
92 Interval{0xFFF9, 0xFFFB}, Interval{0x10A01, 0x10A03},
93 Interval{0x10A05, 0x10A06}, Interval{0x10A0C, 0x10A0F},
94 Interval{0x10A38, 0x10A3A}, Interval{0x10A3F, 0x10A3F},
95 Interval{0x1D167, 0x1D169}, Interval{0x1D173, 0x1D182},
96 Interval{0x1D185, 0x1D18B}, Interval{0x1D1AA, 0x1D1AD},
97 Interval{0x1D242, 0x1D244}, Interval{0xE0001, 0xE0001},
98 Interval{0xE0020, 0xE007F}, Interval{0xE0100, 0xE01EF},
101const std::array<Interval, 13> g_full_width_characters = {
102 Interval{0x1100, 0x115f}, Interval{0x2329, 0x2329},
103 Interval{0x232a, 0x232a}, Interval{0x2e80, 0x303e},
104 Interval{0x3040, 0xa4cf}, Interval{0xac00, 0xd7a3},
105 Interval{0xf900, 0xfaff}, Interval{0xfe10, 0xfe19},
106 Interval{0xfe30, 0xfe6f}, Interval{0xff00, 0xff60},
107 Interval{0xffe0, 0xffe6}, Interval{0x20000, 0x2fffd},
108 Interval{0x30000, 0x3fffd},
112bool Bisearch(uint32_t ucs,
const Interval* table,
int max) {
113 if (ucs < table[0].first || ucs > table[max].last) {
119 int mid = (min + max) / 2;
120 if (ucs > table[mid].last) {
122 }
else if (ucs < table[mid].first) {
132bool IsCombining(uint32_t ucs) {
133 return Bisearch(ucs, g_combining_characters.data(),
134 g_combining_characters.size() - 1);
137bool IsFullWidth(uint32_t ucs) {
141 return Bisearch(ucs, g_full_width_characters.data(),
142 g_full_width_characters.size() - 1);
145bool IsControl(uint32_t ucs) {
152 if (ucs >= 0x7f && ucs < 0xa0) {
158int codepoint_width(uint32_t ucs) {
159 if (IsControl(ucs)) {
163 if (IsCombining(ucs)) {
167 if (IsFullWidth(ucs)) {
178bool EatCodePoint(
const std::string& input,
182 if (start >= input.size()) {
186 uint8_t byte_1 = input[start];
189 if ((byte_1 & 0b1000'0000) == 0b0000'0000) {
190 *ucs = byte_1 & 0b0111'1111;
196 if ((byte_1 & 0b1110'0000) == 0b1100'0000 &&
197 start + 1 < input.size()) {
198 uint8_t byte_2 = input[start + 1];
200 *ucs += byte_1 & 0b0001'1111;
202 *ucs += byte_2 & 0b0011'1111;
208 if ((byte_1 & 0b1111'0000) == 0b1110'0000 &&
209 start + 2 < input.size()) {
210 uint8_t byte_2 = input[start + 1];
211 uint8_t byte_3 = input[start + 2];
213 *ucs += byte_1 & 0b0000'1111;
215 *ucs += byte_2 & 0b0011'1111;
217 *ucs += byte_3 & 0b0011'1111;
223 if ((byte_1 & 0b1111'1000) == 0b1111'0000 &&
224 start + 3 < input.size()) {
225 uint8_t byte_2 = input[start + 1];
226 uint8_t byte_3 = input[start + 2];
227 uint8_t byte_4 = input[start + 3];
229 *ucs += byte_1 & 0b0000'0111;
231 *ucs += byte_2 & 0b0011'1111;
233 *ucs += byte_3 & 0b0011'1111;
235 *ucs += byte_4 & 0b0011'1111;
248 return codepoint_width(uint32_t(ucs));
254 for (
const wchar_t& it :
text) {
267 while (start < input.size()) {
268 uint32_t codepoint = 0;
269 if (!EatCodePoint(input, start, &start, &codepoint)) {
273 if (IsControl(codepoint)) {
277 if (IsCombining(codepoint)) {
281 if (IsFullWidth(codepoint)) {
292 std::vector<std::string> out;
294 out.reserve(input.size());
297 while (start < input.size()) {
298 uint32_t codepoint = 0;
299 if (!EatCodePoint(input, start, &end, &codepoint)) {
304 std::string append = input.substr(start, end - start);
308 if (IsControl(codepoint)) {
313 if (IsCombining(codepoint)) {
315 out.back() += append;
322 if (IsFullWidth(codepoint)) {
323 out.push_back(append);
324 out.emplace_back(
"");
329 out.push_back(append);
334int GlyphPosition(
const std::string& input,
size_t glyph_index,
size_t start) {
335 if (glyph_index <= 0) {
339 while (start < input.size()) {
340 uint32_t codepoint = 0;
341 bool eaten = EatCodePoint(input, start, &end, &codepoint);
344 if (!eaten || IsControl(codepoint) || IsCombining(codepoint)) {
351 if (glyph_index == 0) {
352 return static_cast<int>(start);
359 return static_cast<int>(input.size());
364 std::vector<int> out;
365 out.reserve(input.size());
368 while (start < input.size()) {
369 uint32_t codepoint = 0;
370 bool eaten = EatCodePoint(input, start, &end, &codepoint);
374 if (!eaten || IsControl(codepoint)) {
379 if (IsCombining(codepoint)) {
389 if (IsFullWidth(codepoint)) {
407 while (start < input.size()) {
408 uint32_t codepoint = 0;
409 bool eaten = EatCodePoint(input, start, &end, &codepoint);
413 if (!eaten || IsControl(codepoint)) {
419 if (IsCombining(codepoint)) {
433#pragma warning(disable : 4996)
438 std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
439 return converter.to_bytes(s);
444 std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
445 return converter.from_bytes(s);
std::vector< std::string > Utf8ToGlyphs(const std::string &input)
int string_width(const std::string &)
std::wstring to_wstring(const std::string &s)
Convert a std::wstring into a UTF8 std::string.
std::string to_string(const std::wstring &s)
Convert a UTF8 std::string into a std::wstring.
Element text(std::wstring text)
Display a piece of unicode text.
int GlyphPosition(const std::string &input, size_t glyph_index, size_t start=0)
std::vector< int > CellToGlyphIndex(const std::string &input)
int GlyphCount(const std::string &input)
Decorator size(Direction, Constraint, int value)
Apply a constraint on the size of an element.
int wstring_width(const std::wstring &)