utf8_new.c (525B)
1 /* is c the start of a utf8 sequence? */ 2 #define isutf(c) (((c)&0xC0)!=0x80) 3 4 static const uint32_t offsetsFromUTF8[6] = { 5 0x00000000UL, 0x00003080UL, 0x000E2080UL, 6 0x03C82080UL, 0xFA082080UL, 0x82082080UL 7 }; 8 9 /* next character without NUL character terminator */ 10 uint32_t u8_nextmemchar(const char *s, size_t *i) 11 { 12 uint32_t ch = 0; 13 size_t sz = 0; 14 do { 15 ch <<= 6; 16 ch += (unsigned char)s[(*i)++]; 17 sz++; 18 } while (!isutf(s[*i])); 19 ch -= offsetsFromUTF8[sz-1]; 20 21 return ch; 22 }