diff --git a/Makefile b/Makefile index 28dea3d..8bf63e2 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,10 @@ CFLAGS = -g -O2 -Wall LD = gcc LDFLAGS = +# Feature flags: +# define TLF_FONTS to use TOIlet TLF fonts +XCFLAGS = -DTLF_FONTS + # Where the executables should be put BINDIR = /usr/local/bin @@ -47,15 +51,15 @@ DEFAULTFONTFILE = standard.flf VERSION = 2.2.3 DIST = figlet-$(VERSION) -OBJS = figlet.o zipio.o crc.o inflate.o +OBJS = figlet.o zipio.o crc.o inflate.o utf8.o BINS = figlet chkfont figlist showfigfonts MANUAL = figlet.6 chkfont.6 figlist.6 showfigfonts.6 DFILES = Makefile Makefile.tc $(MANUAL) $(OBJS:.o=.c) chkfont.c \ figlist showfigfonts CHANGES FAQ README LICENSE figfont.txt \ - crc.h inflate.h zipio.h + crc.h inflate.h zipio.h utf8.h .c.o: - $(CC) -c $(CFLAGS) -DDEFAULTFONTDIR=\"$(DEFAULTFONTDIR)\" \ + $(CC) -c $(CFLAGS) $(XCFLAGS) -DDEFAULTFONTDIR=\"$(DEFAULTFONTDIR)\" \ -DDEFAULTFONTFILE=\"$(DEFAULTFONTFILE)\" -o $*.o $< all: $(BINS) diff --git a/figlet.c b/figlet.c index e26d42b..19742e0 100644 --- a/figlet.c +++ b/figlet.c @@ -68,6 +68,12 @@ #include <sys/ioctl.h> /* Needed for get_columns */ #endif +#ifdef TLF_FONTS +#include <wchar.h> +#include <wctype.h> +#include "utf8.h" +#endif + #include "zipio.h" /* Package for reading compressed files */ #define MYSTRLEN(x) ((int)strlen(x)) /* Eliminate ANSI problem */ @@ -84,6 +90,16 @@ Note: '/' also used in filename in get_columns(). */ #define CONTROLFILEMAGICNUMBER "flc2" /* no longer used in 2.2 */ #define CSUFFIXLEN MYSTRLEN(CONTROLFILESUFFIX) #define DEFAULTCOLUMNS 80 +#define MAXLEN 255 /* Maximum character width */ + +/* Add support for Sam Hocevar's TOIlet fonts */ +#ifdef TLF_FONTS +#define TOILETFILESUFFIX ".tlf" +#define TOILETFILEMAGICNUMBER "tlf2" +#define TSUFFIXLEN MYSTRLEN(TOILETFILESUFFIX) + +int toiletfont; /* true if font is a TOIlet TLF font */ +#endif /**************************************************************************** @@ -123,17 +139,31 @@ char **Myargv; ****************************************************************************/ +#ifdef TLF_FONTS +typedef wchar_t outchr; /* "char" written to stdout */ +#define STRLEN(x) wcslen(x) +#define STRCPY(x,y) wcscpy((x),(y)) +#define STRCAT(x,y) wcscat((x),(y)) +#define ISSPACE(x) iswspace(x) +#else +typedef char outchr; /* "char" written to stdout */ +#define STRLEN(x) MYSTRLEN(x) +#define STRCPY(x,y) strcpy((x),(y)) +#define STRCAT(x,y) strcat((x),(y)) +#define ISSPACE(x) isspace(x) +#endif + typedef struct fc { inchr ord; - char **thechar; /* Alloc'd char thechar[charheight][]; */ + outchr **thechar; /* Alloc'd char thechar[charheight][]; */ struct fc *next; } fcharnode; fcharnode *fcharlist; -char **currchar; +outchr **currchar; int currcharwidth; int previouscharwidth; -char **outputline; /* Alloc'd char outputline[charheight][outlinelenlimit+1]; */ +outchr **outputline; /* Alloc'd char outputline[charheight][outlinelenlimit+1]; */ int outlinelen; @@ -364,6 +394,7 @@ ZFILE *fp; return (c==EOF) ? NULL : line; } + /**************************************************************************** usageerr @@ -398,8 +429,8 @@ int infonum; { switch (infonum) { case 0: /* Copyright message */ - printf("FIGlet Copyright 1991-2002 Glenn Chappell, Ian Chai, "); - printf("John Cowan, Christiaan Keet\n"); + printf("FIGlet Copyright (C) 1991-2011 Glenn Chappell, Ian Chai, "); + printf("John Cowan,\nChristiaan Keet and Claudio Matsuoka\n"); printf("Internet: <info@figlet.org> "); printf("Version: %s, date: %s\n\n",VERSION,DATE); printf("FIGlet, along with the various FIGlet fonts"); @@ -866,6 +897,12 @@ void getparams() if (suffixcmp(fontname,FONTFILESUFFIX)) { fontname[MYSTRLEN(fontname)-FSUFFIXLEN]='\0'; } +#ifdef TLF_FONTS + else if (suffixcmp(fontname,TOILETFILESUFFIX)) { + fontname[MYSTRLEN(fontname)-TSUFFIXLEN]='\0'; + } +#endif + cfilelist = NULL; cfilelistend = &cfilelist; commandlist = NULL; @@ -985,6 +1022,11 @@ void getparams() if (suffixcmp(fontname,FONTFILESUFFIX)) { fontname[MYSTRLEN(fontname)-FSUFFIXLEN] = '\0'; } +#ifdef TLF_FONTS + else if (suffixcmp(fontname,TOILETFILESUFFIX)) { + fontname[MYSTRLEN(fontname)-TSUFFIXLEN] = '\0'; + } +#endif break; case 'C': controlname = optarg; @@ -1065,35 +1107,42 @@ void clearline() void readfontchar(file,theord,line,maxlen) ZFILE *file; inchr theord; -char *line; +outchr *line; /* FIXME: line isn't used */ int maxlen; { int row,k; - char endchar; + char templine[MAXLEN+1]; + outchr endchar, outline[MAXLEN+1]; fcharnode *fclsave; fclsave = fcharlist; fcharlist = (fcharnode*)myalloc(sizeof(fcharnode)); fcharlist->ord = theord; - fcharlist->thechar = (char**)myalloc(sizeof(char*)*charheight); + fcharlist->thechar = (outchr**)myalloc(sizeof(outchr*)*charheight); fcharlist->next = fclsave; + for (row=0;row<charheight;row++) { - if (myfgets(line,maxlen+1,file)==NULL) { - line[0] = '\0'; + if (myfgets(templine,maxlen+1,file)==NULL) { + templine[0] = '\0'; } - k = MYSTRLEN(line)-1; - while (k>=0 && isspace(line[k])) { +#ifdef TLF_FONTS + utf8_to_wchar(templine,MAXLEN,outline,MAXLEN,0); +#else + strcpy(outline,templine); +#endif + k = STRLEN(outline)-1; + while (k>=0 && ISSPACE(outline[k])) { /* remove trailing spaces */ k--; } if (k>=0) { - endchar = line[k]; - while (k>=0 ? line[k]==endchar : 0) { + endchar = outline[k]; /* remove endmarks */ + while (k>=0 && outline[k]==endchar) { k--; } } - line[k+1] = '\0'; - fcharlist->thechar[row] = (char*)myalloc(sizeof(char)*(k+2)); - strcpy(fcharlist->thechar[row],line); + outline[k+1] = '\0'; + fcharlist->thechar[row] = (outchr*)myalloc(sizeof(outchr)*(STRLEN(outline)+1)); + STRCPY(fcharlist->thechar[row],outline); } } @@ -1109,12 +1158,11 @@ int maxlen; void readfont() { -#define MAXFIRSTLINELEN 1000 int i,row,numsread; inchr theord; int maxlen,cmtlines,ffright2left; int smush,smush2; - char *fontpath,*fileline,magicnum[5]; + char *fontpath,fileline[MAXLEN+1],magicnum[5]; ZFILE *fontfile; int namelen; @@ -1134,15 +1182,36 @@ void readfont() strcpy(fontpath,fontname); strcat(fontpath,FONTFILESUFFIX); fontfile = Zopen(fontpath,"rb"); - if (fontfile==NULL) { - fprintf(stderr,"%s: %s: Unable to open font file\n",myname,fontpath); - exit(1); + } + +#ifdef TLF_FONTS + if (fontfile==NULL) { + if (!hasdirsep(fontname)) { + strcpy(fontpath,fontdirname); + fontpath[namelen] = DIRSEP; + fontpath[namelen+1] = '\0'; + strcat(fontpath,fontname); + strcat(fontpath,TOILETFILESUFFIX); + fontfile = Zopen(fontpath,"rb"); } + if (fontfile==NULL) { + strcpy(fontpath,fontname); + strcat(fontpath,TOILETFILESUFFIX); + fontfile = Zopen(fontpath,"rb"); + } + if (fontfile!=NULL) { + toiletfont = 1; + } + } +#endif + + if (fontfile==NULL) { + fprintf(stderr,"%s: %s: Unable to open font file\n",myname,fontpath); + exit(1); } readmagic(fontfile,magicnum); - fileline = (char*)myalloc(sizeof(char)*(MAXFIRSTLINELEN+1)); - if (myfgets(fileline,MAXFIRSTLINELEN+1,fontfile)==NULL) { + if (myfgets(fileline,MAXLEN,fontfile)==NULL) { fileline[0] = '\0'; } if (MYSTRLEN(fileline)>0 ? fileline[MYSTRLEN(fileline)-1]!='\n' : 0) { @@ -1151,8 +1220,17 @@ void readfont() numsread = sscanf(fileline,"%*c%c %d %*d %d %d %d %d %d", &hardblank,&charheight,&maxlen,&smush,&cmtlines, &ffright2left,&smush2); - free(fileline); + + if (maxlen > MAXLEN) { + fprintf(stderr,"%s: %s: character is too wide\n",myname,fontpath); + exit(1); + } +#ifdef TLF_FONTS + if ((!toiletfont && strcmp(magicnum,FONTFILEMAGICNUMBER)) || + (toiletfont && strcmp(magicnum,TOILETFILEMAGICNUMBER)) || numsread<5) { +#else if (strcmp(magicnum,FONTFILEMAGICNUMBER) || numsread<5) { +#endif fprintf(stderr,"%s: %s: Not a FIGlet 2 font file\n",myname,fontpath); exit(1); } @@ -1194,28 +1272,26 @@ void readfont() justification = 2*right2left; } - fileline = (char*)myalloc(sizeof(char)*(maxlen+1)); /* Allocate "missing" character */ fcharlist = (fcharnode*)myalloc(sizeof(fcharnode)); fcharlist->ord = 0; - fcharlist->thechar = (char**)myalloc(sizeof(char*)*charheight); + fcharlist->thechar = (outchr**)myalloc(sizeof(outchr*)*charheight); fcharlist->next = NULL; for (row=0;row<charheight;row++) { - fcharlist->thechar[row] = (char*)myalloc(sizeof(char)); + fcharlist->thechar[row] = (outchr*)myalloc(sizeof(outchr)); fcharlist->thechar[row][0] = '\0'; } for (theord=' ';theord<='~';theord++) { - readfontchar(fontfile,theord,fileline,maxlen); + readfontchar(fontfile,theord); } for (theord=0;theord<=6;theord++) { - readfontchar(fontfile,deutsch[theord],fileline,maxlen); + readfontchar(fontfile,deutsch[theord]); } while (myfgets(fileline,maxlen+1,fontfile)==NULL?0: sscanf(fileline,"%li",&theord)==1) { - readfontchar(fontfile,theord,fileline,maxlen); + readfontchar(fontfile,theord); } Zclose(fontfile); - free(fileline); } @@ -1232,9 +1308,9 @@ void linealloc() { int row; - outputline = (char**)myalloc(sizeof(char*)*charheight); + outputline = (outchr**)myalloc(sizeof(outchr*)*charheight); for (row=0;row<charheight;row++) { - outputline[row] = (char*)myalloc(sizeof(char)*(outlinelenlimit+1)); + outputline[row] = (outchr*)myalloc(sizeof(outchr)*(outlinelenlimit+1)); } inchrlinelenlimit = outputwidth*4+100; inchrline = (inchr*)myalloc(sizeof(inchr)*(inchrlinelenlimit+1)); @@ -1267,7 +1343,7 @@ inchr c; currchar = charptr->thechar; } previouscharwidth = currcharwidth; - currcharwidth = MYSTRLEN(currchar[0]); + currcharwidth = STRLEN(currchar[0]); } @@ -1387,13 +1463,13 @@ int smushamt() maxsmush = currcharwidth; for (row=0;row<charheight;row++) { if (right2left) { - for (charbd=MYSTRLEN(currchar[row]); + for (charbd=STRLEN(currchar[row]); ch1=currchar[row][charbd],(charbd>0&&(!ch1||ch1==' '));charbd--) ; for (linebd=0;ch2=outputline[row][linebd],ch2==' ';linebd++) ; amt = linebd+currcharwidth-1-charbd; } else { - for (linebd=MYSTRLEN(outputline[row]); + for (linebd=STRLEN(outputline[row]); ch1 = outputline[row][linebd],(linebd>0&&(!ch1||ch1==' '));linebd--) ; for (charbd=0;ch2=currchar[row][charbd],ch2==' ';charbd++) ; amt = charbd+outlinelen-1-linebd; @@ -1427,7 +1503,7 @@ int addchar(c) inchr c; { int smushamount,row,k,column,offset; - char *templine; + outchr *templine; getletter(c); smushamount = smushamt(); @@ -1437,16 +1513,16 @@ inchr c; } offset = 0; - templine = (char*)myalloc(sizeof(char)*(outlinelenlimit+1)); + templine = (outchr*)myalloc(sizeof(outchr)*(outlinelenlimit+1)); for (row=0;row<charheight;row++) { if (right2left) { - strcpy(templine,currchar[row]); + STRCPY(templine,currchar[row]); for (k=0;k<smushamount;k++) { templine[currcharwidth-smushamount+k] = smushem(templine[currcharwidth-smushamount+k],outputline[row][k]); } - strcat(templine,outputline[row]+smushamount); - strcpy(outputline[row],templine); + STRCAT(templine,outputline[row]+smushamount); + STRCPY(outputline[row],templine); } else { for (k=0;k<smushamount;k++) { @@ -1458,11 +1534,11 @@ inchr c; outputline[row][column] = smushem(outputline[row][column],currchar[row][k + offset]); } - strcat(outputline[row],currchar[row]+smushamount); + STRCAT(outputline[row],currchar[row]+smushamount); } } free(templine); - outlinelen = MYSTRLEN(outputline[0]); + outlinelen = STRLEN(outputline[0]); inchrline[inchrlinelen++] = c; return 1; } @@ -1482,11 +1558,16 @@ inchr c; ****************************************************************************/ void putstring(string) -char *string; +outchr *string; { int i,len; + char c[10]; +#ifdef TLF_FONTS + size_t size; + wchar_t wc[2]; +#endif - len = MYSTRLEN(string); + len = STRLEN(string); if (outputwidth>1) { if (len>outputwidth-1) { len = outputwidth-1; @@ -1498,7 +1579,20 @@ char *string; } } for (i=0;i<len;i++) { +#ifdef TLF_FONTS + wc[0] = string[i]; + wc[1] = 0; + size = wchar_to_utf8(wc,1,c,10,0); + if(size==1) { + if(c[0]==hardblank) { + c[0] = ' '; + } + } + c[size] = 0; + printf("%s",c); +#else putchar(string[i]==hardblank?' ':string[i]); +#endif } putchar('\n'); } @@ -1942,6 +2036,10 @@ char *argv[]; wordbreakmode = 0; last_was_eol_flag = 0; +#ifdef TLF_FONTS + toiletfont = 0; +#endif + while ((c = getinchr())!=EOF) { if (c=='\n'&¶graphflag&&!last_was_eol_flag) { @@ -2003,7 +2101,7 @@ char *argv[]; else if (outlinelen==0) { for (i=0;i<charheight;i++) { if (right2left && outputwidth>1) { - putstring(currchar[i]+MYSTRLEN(currchar[i])-outlinelenlimit); + putstring(currchar[i]+STRLEN(currchar[i])-outlinelenlimit); } else { putstring(currchar[i]); diff --git a/utf8.c b/utf8.c new file mode 100644 index 0000000..b4d12af --- /dev/null +++ b/utf8.c @@ -0,0 +1,323 @@ +#ifdef TLF_FONTS +/* + * Copyright (c) 2007 Alexey Vatchenko <av@bsdua.org> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <wchar.h> +#include <arpa/inet.h> /* for htonl() */ + +#include "utf8.h" + +#define _NXT 0x80 +#define _SEQ2 0xc0 +#define _SEQ3 0xe0 +#define _SEQ4 0xf0 +#define _SEQ5 0xf8 +#define _SEQ6 0xfc + +#define _BOM 0xfeff + +static int __wchar_forbitten(wchar_t sym); +static int __utf8_forbitten(u_char octet); + +static int +__wchar_forbitten(wchar_t sym) +{ + + /* Surrogate pairs */ + if (sym >= 0xd800 && sym <= 0xdfff) + return (-1); + + return (0); +} + +static int +__utf8_forbitten(u_char octet) +{ + + switch (octet) { + case 0xc0: + case 0xc1: + case 0xf5: + case 0xff: + return (-1); + } + + return (0); +} + +/* + * DESCRIPTION + * This function translates UTF-8 string into UCS-4 string (all symbols + * will be in local machine byte order). + * + * It takes the following arguments: + * in - input UTF-8 string. It can be null-terminated. + * insize - size of input string in bytes. + * out - result buffer for UCS-4 string. If out is NULL, + * function returns size of result buffer. + * outsize - size of out buffer in wide characters. + * + * RETURN VALUES + * The function returns size of result buffer (in wide characters). + * Zero is returned in case of error. + * + * CAVEATS + * 1. If UTF-8 string contains zero symbols, they will be translated + * as regular symbols. + * 2. If UTF8_IGNORE_ERROR or UTF8_SKIP_BOM flag is set, sizes may vary + * when `out' is NULL and not NULL. It's because of special UTF-8 + * sequences which may result in forbitten (by RFC3629) UNICODE + * characters. So, the caller must check return value every time and + * not prepare buffer in advance (\0 terminate) but after calling this + * function. + */ +size_t +utf8_to_wchar(const char *in, size_t insize, wchar_t *out, size_t outsize, + int flags) +{ + u_char *p, *lim; + wchar_t *wlim, high; + size_t n, total, i, n_bits; + + if (in == NULL || insize == 0 || (outsize == 0 && out != NULL)) + return (0); + + total = 0; + p = (u_char *)in; + lim = p + insize; + wlim = out + outsize; + + for (; p < lim; p += n) { + if (__utf8_forbitten(*p) != 0 && + (flags & UTF8_IGNORE_ERROR) == 0) + return (0); + + /* + * Get number of bytes for one wide character. + */ + n = 1; /* default: 1 byte. Used when skipping bytes. */ + if ((*p & 0x80) == 0) + high = (wchar_t)*p; + else if ((*p & 0xe0) == _SEQ2) { + n = 2; + high = (wchar_t)(*p & 0x1f); + } else if ((*p & 0xf0) == _SEQ3) { + n = 3; + high = (wchar_t)(*p & 0x0f); + } else if ((*p & 0xf8) == _SEQ4) { + n = 4; + high = (wchar_t)(*p & 0x07); + } else if ((*p & 0xfc) == _SEQ5) { + n = 5; + high = (wchar_t)(*p & 0x03); + } else if ((*p & 0xfe) == _SEQ6) { + n = 6; + high = (wchar_t)(*p & 0x01); + } else { + if ((flags & UTF8_IGNORE_ERROR) == 0) + return (0); + continue; + } + + /* does the sequence header tell us truth about length? */ + if (lim - p <= n - 1) { + if ((flags & UTF8_IGNORE_ERROR) == 0) + return (0); + n = 1; + continue; /* skip */ + } + + /* + * Validate sequence. + * All symbols must have higher bits set to 10xxxxxx + */ + if (n > 1) { + for (i = 1; i < n; i++) { + if ((p[i] & 0xc0) != _NXT) + break; + } + if (i != n) { + if ((flags & UTF8_IGNORE_ERROR) == 0) + return (0); + n = 1; + continue; /* skip */ + } + } + + total++; + + if (out == NULL) + continue; + + if (out >= wlim) + return (0); /* no space left */ + + *out = 0; + n_bits = 0; + for (i = 1; i < n; i++) { + *out |= (wchar_t)(p[n - i] & 0x3f) << n_bits; + n_bits += 6; /* 6 low bits in every byte */ + } + *out |= high << n_bits; + + if (__wchar_forbitten(*out) != 0) { + if ((flags & UTF8_IGNORE_ERROR) == 0) + return (0); /* forbitten character */ + else { + total--; + out--; + } + } else if (*out == _BOM && (flags & UTF8_SKIP_BOM) != 0) { + total--; + out--; + } + + out++; + } + + return (total); +} + +/* + * DESCRIPTION + * This function translates UCS-4 symbols (given in local machine + * byte order) into UTF-8 string. + * + * It takes the following arguments: + * in - input unicode string. It can be null-terminated. + * insize - size of input string in wide characters. + * out - result buffer for utf8 string. If out is NULL, + * function returns size of result buffer. + * outsize - size of result buffer. + * + * RETURN VALUES + * The function returns size of result buffer (in bytes). Zero is returned + * in case of error. + * + * CAVEATS + * If UCS-4 string contains zero symbols, they will be translated + * as regular symbols. + */ +size_t +wchar_to_utf8(const wchar_t *in, size_t insize, char *out, size_t outsize, + int flags) +{ + wchar_t *w, *wlim, ch; + u_char *p, *lim, *oc; + size_t total, n; + + if (in == NULL || insize == 0 || (outsize == 0 && out != NULL)) + return (0); + + w = (wchar_t *)in; + wlim = w + insize; + p = (u_char *)out; + lim = p + outsize; + total = 0; + for (; w < wlim; w++) { + if (__wchar_forbitten(*w) != 0) { + if ((flags & UTF8_IGNORE_ERROR) == 0) + return (0); + else + continue; + } + + if (*w == _BOM && (flags & UTF8_SKIP_BOM) != 0) + continue; + + if (*w < 0) { + if ((flags & UTF8_IGNORE_ERROR) == 0) + return (0); + continue; + } else if (*w <= 0x0000007f) + n = 1; + else if (*w <= 0x000007ff) + n = 2; + else if (*w <= 0x0000ffff) + n = 3; + else if (*w <= 0x001fffff) + n = 4; + else if (*w <= 0x03ffffff) + n = 5; + else /* if (*w <= 0x7fffffff) */ + n = 6; + + total += n; + + if (out == NULL) + continue; + + if (lim - p <= n - 1) + return (0); /* no space left */ + + /* make it work under different endians */ + ch = htonl(*w); + oc = (u_char *)&ch; + switch (n) { + case 1: + *p = oc[3]; + break; + + case 2: + p[1] = _NXT | (oc[3] & 0x3f); + p[0] = _SEQ2 | (oc[3] >> 6) | ((oc[2] & 0x07) << 2); + break; + + case 3: + p[2] = _NXT | (oc[3] & 0x3f); + p[1] = _NXT | (oc[3] >> 6) | ((oc[2] & 0x0f) << 2); + p[0] = _SEQ3 | ((oc[2] & 0xf0) >> 4); + break; + + case 4: + p[3] = _NXT | (oc[3] & 0x3f); + p[2] = _NXT | (oc[3] >> 6) | ((oc[2] & 0x0f) << 2); + p[1] = _NXT | ((oc[2] & 0xf0) >> 4) | + ((oc[1] & 0x03) << 4); + p[0] = _SEQ4 | ((oc[1] & 0x1f) >> 2); + break; + + case 5: + p[4] = _NXT | (oc[3] & 0x3f); + p[3] = _NXT | (oc[3] >> 6) | ((oc[2] & 0x0f) << 2); + p[2] = _NXT | ((oc[2] & 0xf0) >> 4) | + ((oc[1] & 0x03) << 4); + p[1] = _NXT | (oc[1] >> 2); + p[0] = _SEQ5 | (oc[0] & 0x03); + break; + + case 6: + p[5] = _NXT | (oc[3] & 0x3f); + p[4] = _NXT | (oc[3] >> 6) | ((oc[2] & 0x0f) << 2); + p[3] = _NXT | (oc[2] >> 4) | ((oc[1] & 0x03) << 4); + p[2] = _NXT | (oc[1] >> 2); + p[1] = _NXT | (oc[0] & 0x3f); + p[0] = _SEQ6 | ((oc[0] & 0x40) >> 6); + break; + } + + /* + * NOTE: do not check here for forbitten UTF-8 characters. + * They cannot appear here because we do proper convertion. + */ + + p += n; + } + + return (total); +} +#endif /* TLF_FONTS */ diff --git a/utf8.h b/utf8.h new file mode 100644 index 0000000..0631b8a --- /dev/null +++ b/utf8.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2007 Alexey Vatchenko <av@bsdua.org> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * utf8: implementation of UTF-8 charset encoding (RFC3629). + */ +#ifndef _UTF8_H_ +#define _UTF8_H_ + +#include <sys/types.h> + +#include <wchar.h> + +#define UTF8_IGNORE_ERROR 0x01 +#define UTF8_SKIP_BOM 0x02 + +__BEGIN_DECLS + +size_t utf8_to_wchar(const char *in, size_t insize, wchar_t *out, + size_t outsize, int flags); +size_t wchar_to_utf8(const wchar_t *in, size_t insize, char *out, + size_t outsize, int flags); + +__END_DECLS + +#endif /* !_UTF8_H_ */