- Subject: [slang-users] system wchar functions support?
- From: Miroslav Lichvar <mlichvar@xxxxxxxxxx>
- Date: Wed, 23 Apr 2008 19:09:18 +0200
Hi,
I was wondering if there is any interest in using libc functions for
handling wide characters? The main benefit would be support of other
multibyte encodings than UTF-8. It could be controlled by a configure
option.
The patch in attachment should demonstrate what I mean, it's not
complete, but it allowed me to successfully run some S-Lang
applications in non-UTF-8 environment.
Comments?
--
Miroslav Lichvar
Index: slang.h
===================================================================
--- slang.h (revision 233)
+++ slang.h (working copy)
@@ -47,6 +47,7 @@
# ifndef __unix__
# define __unix__ 1
# endif
+# define SYSTEM_WCHAR 1
#endif
#if defined(__APPLE__)
@@ -282,6 +283,7 @@
/*{{{ UTF-8 and Wide Char support */
+#ifndef SYSTEM_WCHAR
#if SLANG_SIZEOF_INT == 4
typedef unsigned int SLwchar_Type;
# define SLANG_WCHAR_TYPE SLANG_UINT_TYPE
@@ -293,10 +295,18 @@
# define SLang_push_wchar SLang_push_ulong
# define SLang_pop_wchar SLang_pop_ulong
#endif
+#else
+# define SLANG_WCHAR_TYPE SLANG_UINT_TYPE
+typedef wchar_t SLwchar_Type;
+#endif
typedef unsigned char SLuchar_Type;
/* Maximum multi-byte len for a unicode wchar */
-#define SLUTF8_MAX_MBLEN 6
+#ifndef SYSTEM_WCHAR
+# define SLUTF8_MAX_MBLEN 6
+#else
+# define SLUTF8_MAX_MBLEN MB_CUR_MAX
+#endif
/* If argument is 1, force UTF-8 mode on. If argument is 0, force mode off.
* If -1, determine mode from the locale.
Index: slinclud.h
===================================================================
--- slinclud.h (revision 233)
+++ slinclud.h (working copy)
@@ -30,4 +30,8 @@
# include <memory.h>
#endif
+#ifdef SYSTEM_WCHAR
+# include <wchar.h>
+#endif
+
#endif /* _SLANG_INCLUDE_H_ */
Index: slutf8.c
===================================================================
--- slutf8.c (revision 233)
+++ slutf8.c (working copy)
@@ -24,6 +24,8 @@
#include "slang.h"
#include "_slang.h"
+#ifndef SYSTEM_WCHAR
+
static unsigned char Len_Map[256] =
{
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* - 31 */
@@ -115,9 +117,11 @@
}
return w;
}
+#endif
unsigned char *SLutf8_skip_char (unsigned char *s, unsigned char *smax)
{
+#ifndef SYSTEM_WCHAR
unsigned int len;
if (s >= smax)
@@ -134,6 +138,18 @@
return s + 1;
return s + len;
+#else
+ size_t len;
+
+ if (s >= smax)
+ return s;
+
+ len = mbrlen(s, smax - s, NULL);
+ if (len <= 1)
+ return s + 1;
+
+ return s + len;
+#endif
}
SLuchar_Type *SLutf8_skip_chars (SLuchar_Type *s, SLuchar_Type *smax,
@@ -145,6 +161,7 @@
n = 0;
while ((n < num) && (s < smax))
{
+#ifndef SYSTEM_WCHAR
unsigned int len = Len_Map[*s];
if (len <= 1)
@@ -167,10 +184,19 @@
n++;
continue;
}
-
+#else
+ unsigned int len;
+
+ len = s - SLutf8_skip_char(s, smax);
+#endif
if (ignore_combining)
{
+#ifndef SYSTEM_WCHAR
SLwchar_Type w = fast_utf8_decode (s, len);
+#else
+ SLwchar_Type w;
+ SLutf8_decode (s, smax, &w, NULL);
+#endif
if (0 != SLwchar_wcwidth (w))
n++;
s += len;
@@ -213,6 +239,7 @@
n = 0;
while ((n < num) && (s > smin))
{
+#ifndef SYSTEM_WCHAR
unsigned char ch;
unsigned int dn;
@@ -266,6 +293,18 @@
smax = s;
}
+#else
+ SLwchar_Type w;
+
+ s--;
+ if (NULL != SLutf8_decode(s, smax, &w, NULL))
+ {
+ if ((ignore_combining == 0)
+ || (0 != SLwchar_wcwidth (w)))
+ n++;
+ smax = s;
+ }
+#endif
}
if (dnum != NULL)
@@ -313,6 +352,7 @@
unsigned char *SLutf8_decode (unsigned char *u, unsigned char *umax,
SLwchar_Type *wp, unsigned int *nconsumedp)
{
+#ifndef SYSTEM_WCHAR
unsigned int len;
unsigned char ch;
SLwchar_Type w;
@@ -362,6 +402,31 @@
return NULL;
return u + len;
+#else
+ size_t len;
+
+ if (u >= umax)
+ {
+ *wp = 0;
+ if (nconsumedp != NULL)
+ *nconsumedp = 0;
+ return NULL;
+ }
+
+ len = mbrtowc(wp, u, umax - u, NULL);
+ if (len <= 0)
+ {
+ *wp = 0;
+ if (nconsumedp != NULL)
+ *nconsumedp = 1;
+ return NULL;
+ }
+
+ if (nconsumedp != NULL)
+ *nconsumedp = len;
+
+ return u + len;
+#endif
}
@@ -373,6 +438,7 @@
*/
SLuchar_Type *SLutf8_encode (SLwchar_Type w, SLuchar_Type *u, unsigned int ulen)
{
+#ifndef SYSTEM_WCHAR
SLuchar_Type *umax = u + ulen;
/* U-00000000 - U-0000007F: 0xxxxxxx */
@@ -451,6 +517,18 @@
*u++ = (w & 0x3F)|0x80;
return u;
+#else
+ int len;
+
+ if (ulen < MB_CUR_MAX)
+ return NULL;
+
+ len = wctomb(u, w);
+ if (len < 0)
+ return NULL;
+
+ return u + len;
+#endif
}
/* Like SLutf8_encode, but null terminates the result.
Index: slcommon.c
===================================================================
--- slcommon.c (revision 233)
+++ slcommon.c (working copy)
@@ -90,6 +90,7 @@
#ifdef HAVE_NL_LANGINFO_CODESET
locale = nl_langinfo (CODESET);
if ((locale != NULL) && (*locale))
+# ifndef SYSTEM_WCHAR
{
if ((0 == strcmp (locale, "UTF-8"))
|| (0 == strcmp (locale, "utf-8"))
@@ -99,6 +100,9 @@
return 0;
}
+# else
+ return 1;
+# endif
#endif
locale = setlocale (LC_ALL, "");
Index: slsmg.c
===================================================================
--- slsmg.c (revision 233)
+++ slsmg.c (working copy)
@@ -26,6 +26,10 @@
#include "slang.h"
#include "_slang.h"
+#ifdef HAVE_LANGINFO_H
+# include <langinfo.h>
+#endif
+
typedef struct
{
int n; /* number of chars written last time */
@@ -177,6 +181,21 @@
tt_Screen_Rows = &SLtt_Screen_Rows;
tt_Screen_Cols = &SLtt_Screen_Cols;
tt_unicode_ok = &_pSLtt_UTF8_Mode;
+#ifdef HAVE_NL_LANGINFO_CODESET
+#ifdef SYSTEM_WCHAR
+ if ((tt_unicode_ok != NULL) && (*tt_unicode_ok > 0))
+ {
+ char *locale;
+ locale = nl_langinfo (CODESET);
+ if ((locale != NULL) && (*locale) &&
+ (!((0 == strcmp (locale, "UTF-8"))
+ || (0 == strcmp (locale, "utf-8"))
+ || (0 == strcmp (locale, "utf8"))
+ || (0 == strcmp (locale, "UTF8")))))
+ tt_unicode_ok = NULL;
+ }
+#endif
+#endif
tt_normal_video = SLtt_normal_video;
tt_goto_rc = SLtt_goto_rc;
Index: slwclut.c
===================================================================
--- slwclut.c (revision 233)
+++ slwclut.c (working copy)
@@ -125,6 +125,8 @@
return 0;
}
+static int is_of_class (int char_class, SLwchar_Type w);
+
static void add_char_class (SLwchar_Lut_Type *r, unsigned char char_class)
{
unsigned int i;
@@ -135,7 +137,11 @@
for (i = 0; i < 256; i++)
{
+#ifndef SYSTEM_WCHAR
if (SL_CLASSIFICATION_LOOKUP(i) & char_class)
+#else
+ if (is_of_class(char_class, i))
+#endif
lut[i] = 1;
}
}
@@ -149,7 +155,11 @@
return r->lut[wch];
if (r->char_class
+#ifndef SYSTEM_WCHAR
&& (SL_CLASSIFICATION_LOOKUP(wch) & r->char_class))
+#else
+ && (is_of_class(r->char_class, wch)))
+#endif
return 1;
/* FIXME. I should use a binary search for this... */
Index: slischar.c
===================================================================
--- slischar.c (revision 233)
+++ slischar.c (working copy)
@@ -26,7 +26,9 @@
#include "slang.h"
#include "_slang.h"
-#define DEFINE_PSLWC_CLASSIFICATION_TABLE
+#ifndef SYSTEM_WCHAR
+# define DEFINE_PSLWC_CLASSIFICATION_TABLE
+#endif
#include "slischar.h"
#define MODE_VARIABLE _pSLinterp_UTF8_Mode
@@ -34,7 +36,11 @@
int SLwchar_islower (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_LOWER;
+#else
+ return iswlower(ch);
+#endif
if (ch < 256)
return islower ((unsigned char) ch);
@@ -45,7 +51,11 @@
int SLwchar_isupper (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_UPPER;
+#else
+ return iswupper(ch);
+#endif
if (ch < 256)
return isupper ((unsigned char) ch);
@@ -56,7 +66,11 @@
int SLwchar_isalpha (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_ALPHA;
+#else
+ return iswalpha(ch);
+#endif
if (ch < 256)
return isalpha ((unsigned char) ch);
@@ -67,7 +81,11 @@
int SLwchar_isxdigit (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_XDIGIT;
+#else
+ return iswxdigit(ch);
+#endif
if (ch < 256)
return isxdigit ((unsigned char) ch);
@@ -78,7 +96,11 @@
int SLwchar_isspace (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_SPACE;
+#else
+ return iswspace(ch);
+#endif
if (ch < 256)
return isspace ((unsigned char) ch);
@@ -89,7 +111,11 @@
int SLwchar_isblank (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_BLANK;
+#else
+ return iswblank(ch);
+#endif
return (ch == ' ') || (ch == '\t');
}
@@ -97,7 +123,11 @@
int SLwchar_iscntrl (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_CNTRL;
+#else
+ return iswcntrl(ch);
+#endif
if (ch < 256)
return iscntrl ((unsigned char) ch);
@@ -108,7 +138,11 @@
int SLwchar_isprint (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_PRINT;
+#else
+ return iswprint(ch);
+#endif
if (ch < 256)
return isprint ((unsigned char) ch);
@@ -124,11 +158,15 @@
int SLwchar_isdigit (SLwchar_Type ch)
{
+#ifndef SYSTEM_WCHAR
if (MODE_VARIABLE)
{
unsigned char t = SL_CLASSIFICATION_LOOKUP(ch);
return DIGITCLASS(t);
}
+#else
+ return iswdigit(ch);
+#endif
if ((unsigned)ch < 256)
return isdigit ((unsigned char) ch);
@@ -138,11 +176,15 @@
int SLwchar_isgraph (SLwchar_Type ch)
{
+#ifndef SYSTEM_WCHAR
if (MODE_VARIABLE)
{
unsigned char t = SL_CLASSIFICATION_LOOKUP(ch);
return GRAPHCLASS(t);
}
+#else
+ return iswgraph(ch);
+#endif
if ((unsigned)ch < 256)
return isgraph ((unsigned char) ch);
@@ -153,10 +195,14 @@
int SLwchar_isalnum (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
{
unsigned char t = SL_CLASSIFICATION_LOOKUP(ch);
return ALNUMCLASS(t);
}
+#else
+ return iswalnum(ch);
+#endif
if ((unsigned)ch < 256)
return isalnum ((unsigned char) ch);
@@ -168,10 +214,14 @@
int SLwchar_ispunct (SLwchar_Type ch)
{
if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
{
unsigned char t = SL_CLASSIFICATION_LOOKUP(ch);
return PUNCTCLASS(t);
}
+#else
+ return iswpunct(ch);
+#endif
if ((unsigned)ch < 256)
return ispunct ((unsigned char) ch);
Index: slwcwidth.c
===================================================================
--- slwcwidth.c (revision 233)
+++ slwcwidth.c (working copy)
@@ -24,12 +24,15 @@
#include "slang.h"
#include "_slang.h"
-#define DEFINE_PSLWC_WIDTH_TABLE
-#include "slwcwidth.h"
+#ifndef SYSTEM_WCHAR
+# define DEFINE_PSLWC_WIDTH_TABLE
+# include "slwcwidth.h"
+#endif
static int Width_Flags = 0;
int SLwchar_wcwidth (SLwchar_Type ch)
{
+#ifndef SYSTEM_WCHAR
int w;
SL_WIDTH_ALOOKUP(w,ch);
@@ -48,6 +51,12 @@
w = 1;
}
return w;
+#else
+ if (Width_Flags & SLWCWIDTH_SINGLE_WIDTH)
+ return 1;
+
+ return wcwidth(ch);
+#endif
}
int SLwchar_set_wcwidth_flags (int flags)
[2008 date index]
[2008 thread index]
[Thread Prev] [Thread Next]
[Date Prev] [Date Next]