ltkx

GUI toolkit for X11 (WIP)
git clone git://lumidify.org/ltkx.git
Log | Files | Refs | README | LICENSE

utf8.h (3892B)


      1 #ifndef UTF8_H
      2 #define UTF8_H
      3 
      4 extern int locale_is_utf8;
      5 
      6 /* is c the start of a utf8 sequence? */
      7 #define isutf(c) (((c)&0xC0)!=0x80)
      8 
      9 #define UEOF ((uint32_t)-1)
     10 
     11 /* convert UTF-8 data to wide character */
     12 size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
     13 
     14 /* the opposite conversion */
     15 size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz);
     16 
     17 /* single character to UTF-8, returns # bytes written */
     18 size_t u8_wc_toutf8(char *dest, uint32_t ch);
     19 
     20 /* character number to byte offset */
     21 size_t u8_offset(const char *str, size_t charnum);
     22 
     23 /* byte offset to character number */
     24 size_t u8_charnum(const char *s, size_t offset);
     25 
     26 /* return next character, updating an index variable */
     27 uint32_t u8_nextchar(const char *s, size_t *i);
     28 
     29 /* next character without NUL character terminator */
     30 uint32_t u8_nextmemchar(const char *s, size_t *i);
     31 
     32 /* move to next character */
     33 void u8_inc(const char *s, size_t *i);
     34 
     35 /* move to previous character */
     36 void u8_dec(const char *s, size_t *i);
     37 
     38 /* returns length of next utf-8 sequence */
     39 size_t u8_seqlen(const char *s);
     40 
     41 /* returns the # of bytes needed to encode a certain character */
     42 size_t u8_charlen(uint32_t ch);
     43 
     44 /* computes the # of bytes needed to encode a WC string as UTF-8 */
     45 size_t u8_codingsize(uint32_t *wcstr, size_t n);
     46 
     47 char read_escape_control_char(char c);
     48 
     49 /* assuming src points to the character after a backslash, read an
     50    escape sequence, storing the result in dest and returning the number of
     51    input characters processed */
     52 size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
     53 
     54 /* given a wide character, convert it to an ASCII escape sequence stored in
     55    buf, where buf is "sz" bytes. returns the number of characters output.
     56    sz must be at least 3. */
     57 int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
     58 
     59 /* convert a string "src" containing escape sequences to UTF-8 */
     60 size_t u8_unescape(char *buf, size_t sz, const char *src);
     61 
     62 /* convert UTF-8 "src" to escape sequences.
     63 
     64    sz is buf size in bytes. must be at least 12.
     65 
     66    if escape_quotes is nonzero, quote characters will be escaped.
     67 
     68    if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
     69 
     70    starts at src[*pi], updates *pi to point to the first unprocessed
     71    byte of the input.
     72 
     73    end is one more than the last allowable value of *pi.
     74 
     75    returns number of bytes placed in buf, including a NUL terminator.
     76 */
     77 size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
     78                  int escape_quotes, int ascii);
     79 
     80 /* utility predicates used by the above */
     81 int octal_digit(char c);
     82 int hex_digit(char c);
     83 
     84 /* return a pointer to the first occurrence of ch in s, or NULL if not
     85    found. character index of found character returned in *charn. */
     86 char *u8_strchr(const char *s, uint32_t ch, size_t *charn);
     87 
     88 /* same as the above, but searches a buffer of a given size instead of
     89    a NUL-terminated string. */
     90 char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn);
     91 
     92 char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
     93 
     94 /* count the number of characters in a UTF-8 string */
     95 size_t u8_strlen(const char *s);
     96 
     97 /* number of columns occupied by a string */
     98 size_t u8_strwidth(const char *s);
     99 
    100 int u8_is_locale_utf8(const char *locale);
    101 
    102 /* printf where the format string and arguments may be in UTF-8.
    103    you can avoid this function and just use ordinary printf() if the current
    104    locale is UTF-8. */
    105 size_t u8_vprintf(const char *fmt, va_list ap);
    106 size_t u8_printf(const char *fmt, ...);
    107 
    108 /* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
    109 int u8_isvalid(const char *str, size_t length);
    110 
    111 /* reverse a UTF-8 string. len is length in bytes. dest and src must both
    112    be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */
    113 int u8_reverse(char *dest, char *src, size_t len);
    114 
    115 #endif