slang-users mailing list

[2008 Date Index] [2008 Thread Index] [Other years]
[Thread Prev] [Thread Next]      [Date Prev] [Date Next]

[slang-users] system wchar functions support?


Hi,

I was wondering if there is any interest in using libc functions for
handling wide characters? The main benefit would be support of other
multibyte encodings than UTF-8. It could be controlled by a configure
option.

The patch in attachment should demonstrate what I mean, it's not
complete, but it allowed me to successfully run some S-Lang
applications in non-UTF-8 environment.

Comments?

-- 
Miroslav Lichvar
Index: slang.h
===================================================================
--- slang.h	(revision 233)
+++ slang.h	(working copy)
@@ -47,6 +47,7 @@
 # ifndef __unix__
 #  define __unix__ 1
 # endif
+# define SYSTEM_WCHAR 1
 #endif
 
 #if defined(__APPLE__)
@@ -282,6 +283,7 @@
 
 /*{{{ UTF-8 and Wide Char support */
 
+#ifndef SYSTEM_WCHAR
 #if SLANG_SIZEOF_INT == 4
 typedef unsigned int SLwchar_Type;
 # define SLANG_WCHAR_TYPE SLANG_UINT_TYPE
@@ -293,10 +295,18 @@
 # define SLang_push_wchar SLang_push_ulong
 # define SLang_pop_wchar SLang_pop_ulong
 #endif
+#else
+# define SLANG_WCHAR_TYPE SLANG_UINT_TYPE
+typedef wchar_t SLwchar_Type;
+#endif
 typedef unsigned char SLuchar_Type;
 
 /* Maximum multi-byte len for a unicode wchar */
-#define SLUTF8_MAX_MBLEN	6
+#ifndef SYSTEM_WCHAR
+# define SLUTF8_MAX_MBLEN	6
+#else
+# define SLUTF8_MAX_MBLEN	MB_CUR_MAX
+#endif
 
 /* If argument is 1, force UTF-8 mode on.  If argument is 0, force mode off.
  * If -1, determine mode from the locale.
Index: slinclud.h
===================================================================
--- slinclud.h	(revision 233)
+++ slinclud.h	(working copy)
@@ -30,4 +30,8 @@
 # include <memory.h>
 #endif
 
+#ifdef SYSTEM_WCHAR
+# include <wchar.h>
+#endif
+
 #endif				       /* _SLANG_INCLUDE_H_ */
Index: slutf8.c
===================================================================
--- slutf8.c	(revision 233)
+++ slutf8.c	(working copy)
@@ -24,6 +24,8 @@
 #include "slang.h"
 #include "_slang.h"
 
+#ifndef SYSTEM_WCHAR
+
 static unsigned char Len_Map[256] =
 {
   0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  /* - 31 */
@@ -115,9 +117,11 @@
      }
    return w;
 }
+#endif
 
 unsigned char *SLutf8_skip_char (unsigned char *s, unsigned char *smax)
 {
+#ifndef SYSTEM_WCHAR
    unsigned int len;
    
    if (s >= smax)
@@ -134,6 +138,18 @@
      return s + 1;
    
    return s + len;
+#else
+   size_t len;
+
+   if (s >= smax)
+     return s;
+
+   len = mbrlen(s, smax - s, NULL);
+   if (len <= 1)
+     return s + 1;
+
+   return s + len;
+#endif
 }
 
 SLuchar_Type *SLutf8_skip_chars (SLuchar_Type *s, SLuchar_Type *smax,
@@ -145,6 +161,7 @@
    n = 0;
    while ((n < num) && (s < smax))
      {
+#ifndef SYSTEM_WCHAR
 	unsigned int len = Len_Map[*s];
 
 	if (len <= 1)
@@ -167,10 +184,19 @@
 	     n++;
 	     continue;
 	  }
-	
+#else
+	unsigned int len;
+
+	len = s - SLutf8_skip_char(s, smax);
+#endif
 	if (ignore_combining)
 	  {
+#ifndef SYSTEM_WCHAR
 	     SLwchar_Type w = fast_utf8_decode (s, len);
+#else
+	     SLwchar_Type w;
+	     SLutf8_decode (s, smax, &w, NULL);
+#endif
 	     if (0 != SLwchar_wcwidth (w))
 	       n++;
 	     s += len;
@@ -213,6 +239,7 @@
    n = 0;
    while ((n < num) && (s > smin))
      {
+#ifndef SYSTEM_WCHAR
 	unsigned char ch;
 	unsigned int dn;
 
@@ -266,6 +293,18 @@
 
 	     smax = s;
 	  }
+#else
+	SLwchar_Type w;
+
+	s--;
+	if (NULL != SLutf8_decode(s, smax, &w, NULL))
+	  {
+	     if ((ignore_combining == 0) 
+		 || (0 != SLwchar_wcwidth (w)))
+	       n++;
+	     smax = s;
+	  }
+#endif
      }
 
    if (dnum != NULL)
@@ -313,6 +352,7 @@
 unsigned char *SLutf8_decode (unsigned char *u, unsigned char *umax,
 			      SLwchar_Type *wp, unsigned int *nconsumedp)
 {
+#ifndef SYSTEM_WCHAR
    unsigned int len;
    unsigned char ch;
    SLwchar_Type w;
@@ -362,6 +402,31 @@
      return NULL;
 
    return u + len;
+#else
+   size_t len;
+
+   if (u >= umax)
+     {
+       *wp = 0;
+	if (nconsumedp != NULL)
+	  *nconsumedp = 0;
+	return NULL;
+     }
+
+   len = mbrtowc(wp, u, umax - u, NULL);
+   if (len <= 0)
+     {
+       *wp = 0;
+       if (nconsumedp != NULL)
+	 *nconsumedp = 1;
+       return NULL;
+     }
+
+   if (nconsumedp != NULL)
+     *nconsumedp = len;
+
+   return u + len;
+#endif
 }
 
 
@@ -373,6 +438,7 @@
  */
 SLuchar_Type *SLutf8_encode (SLwchar_Type w, SLuchar_Type *u, unsigned int ulen)
 {
+#ifndef SYSTEM_WCHAR
    SLuchar_Type *umax = u + ulen;
    
    /*   U-00000000 - U-0000007F: 0xxxxxxx */
@@ -451,6 +517,18 @@
              *u++ = (w & 0x3F)|0x80;
    
    return u;
+#else
+   int len;
+
+   if (ulen < MB_CUR_MAX)
+     return NULL;
+
+   len = wctomb(u, w);
+   if (len < 0)
+     return NULL;
+
+   return u + len;
+#endif
 }
 
 /* Like SLutf8_encode, but null terminates the result.  
Index: slcommon.c
===================================================================
--- slcommon.c	(revision 233)
+++ slcommon.c	(working copy)
@@ -90,6 +90,7 @@
 #ifdef HAVE_NL_LANGINFO_CODESET
    locale = nl_langinfo (CODESET);
    if ((locale != NULL) && (*locale))
+# ifndef SYSTEM_WCHAR
      {
 	if ((0 == strcmp (locale, "UTF-8"))
 	    || (0 == strcmp (locale, "utf-8"))
@@ -99,6 +100,9 @@
 	
 	return 0;
      }
+# else
+     return 1;
+# endif
 #endif
 
    locale = setlocale (LC_ALL, "");
Index: slsmg.c
===================================================================
--- slsmg.c	(revision 233)
+++ slsmg.c	(working copy)
@@ -26,6 +26,10 @@
 #include "slang.h"
 #include "_slang.h"
 
+#ifdef HAVE_LANGINFO_H
+# include <langinfo.h>
+#endif
+
 typedef struct
   {
      int n;                    /* number of chars written last time */
@@ -177,6 +181,21 @@
    tt_Screen_Rows = &SLtt_Screen_Rows;
    tt_Screen_Cols = &SLtt_Screen_Cols;
    tt_unicode_ok = &_pSLtt_UTF8_Mode;
+#ifdef HAVE_NL_LANGINFO_CODESET
+#ifdef SYSTEM_WCHAR
+   if ((tt_unicode_ok != NULL) && (*tt_unicode_ok > 0))
+   {
+      char *locale;
+      locale = nl_langinfo (CODESET);
+      if ((locale != NULL) && (*locale) &&
+	 (!((0 == strcmp (locale, "UTF-8"))
+	    || (0 == strcmp (locale, "utf-8"))
+	    || (0 == strcmp (locale, "utf8"))
+	    || (0 == strcmp (locale, "UTF8")))))
+	 tt_unicode_ok = NULL;
+   }
+#endif
+#endif
    
    tt_normal_video = SLtt_normal_video;
    tt_goto_rc = SLtt_goto_rc;
Index: slwclut.c
===================================================================
--- slwclut.c	(revision 233)
+++ slwclut.c	(working copy)
@@ -125,6 +125,8 @@
    return 0;
 }
 
+static int is_of_class (int char_class, SLwchar_Type w);
+
 static void add_char_class (SLwchar_Lut_Type *r, unsigned char char_class)
 {
    unsigned int i;
@@ -135,7 +137,11 @@
 
    for (i = 0; i < 256; i++)
      {
+#ifndef SYSTEM_WCHAR
 	if (SL_CLASSIFICATION_LOOKUP(i) & char_class)
+#else
+	if (is_of_class(char_class, i))
+#endif
 	  lut[i] = 1;
      }
 }
@@ -149,7 +155,11 @@
      return r->lut[wch];
    
    if (r->char_class
+#ifndef SYSTEM_WCHAR
        && (SL_CLASSIFICATION_LOOKUP(wch) & r->char_class))
+#else
+       && (is_of_class(r->char_class, wch)))
+#endif
      return 1;
 
    /* FIXME.  I should use a binary search for this... */
Index: slischar.c
===================================================================
--- slischar.c	(revision 233)
+++ slischar.c	(working copy)
@@ -26,7 +26,9 @@
 #include "slang.h"
 #include "_slang.h"
 
-#define DEFINE_PSLWC_CLASSIFICATION_TABLE
+#ifndef SYSTEM_WCHAR
+# define DEFINE_PSLWC_CLASSIFICATION_TABLE
+#endif
 #include "slischar.h"
 
 #define MODE_VARIABLE _pSLinterp_UTF8_Mode
@@ -34,7 +36,11 @@
 int SLwchar_islower (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_LOWER;
+#else
+     return iswlower(ch);
+#endif
    
    if (ch < 256)
      return islower ((unsigned char) ch);
@@ -45,7 +51,11 @@
 int SLwchar_isupper (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_UPPER;
+#else
+     return iswupper(ch);
+#endif
    
    if (ch < 256)
      return isupper ((unsigned char) ch);
@@ -56,7 +66,11 @@
 int SLwchar_isalpha (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_ALPHA;
+#else
+     return iswalpha(ch);
+#endif
    
    if (ch < 256)
      return isalpha ((unsigned char) ch);
@@ -67,7 +81,11 @@
 int SLwchar_isxdigit (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_XDIGIT;
+#else
+     return iswxdigit(ch);
+#endif
    
    if (ch < 256)
      return isxdigit ((unsigned char) ch);
@@ -78,7 +96,11 @@
 int SLwchar_isspace (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_SPACE;
+#else
+     return iswspace(ch);
+#endif
    
    if (ch < 256)
      return isspace ((unsigned char) ch);
@@ -89,7 +111,11 @@
 int SLwchar_isblank (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_BLANK;
+#else
+     return iswblank(ch);
+#endif
    
    return (ch == ' ') || (ch == '\t');
 }
@@ -97,7 +123,11 @@
 int SLwchar_iscntrl (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_CNTRL;
+#else
+     return iswcntrl(ch);
+#endif
    
    if (ch < 256)
      return iscntrl ((unsigned char) ch);
@@ -108,7 +138,11 @@
 int SLwchar_isprint (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      return SL_CLASSIFICATION_LOOKUP(ch) & SLCHARCLASS_PRINT;
+#else
+     return iswprint(ch);
+#endif
    
    if (ch < 256)
      return isprint ((unsigned char) ch);
@@ -124,11 +158,15 @@
 
 int SLwchar_isdigit (SLwchar_Type ch)
 {
+#ifndef SYSTEM_WCHAR
    if (MODE_VARIABLE)
      {
 	unsigned char t = SL_CLASSIFICATION_LOOKUP(ch);
 	return DIGITCLASS(t);
      }
+#else
+     return iswdigit(ch);
+#endif
 
    if ((unsigned)ch < 256)
      return isdigit ((unsigned char) ch);
@@ -138,11 +176,15 @@
 
 int SLwchar_isgraph (SLwchar_Type ch)
 {
+#ifndef SYSTEM_WCHAR
    if (MODE_VARIABLE)
      {
 	unsigned char t = SL_CLASSIFICATION_LOOKUP(ch);
 	return GRAPHCLASS(t);
      }
+#else
+     return iswgraph(ch);
+#endif
 
    if ((unsigned)ch < 256)
      return isgraph ((unsigned char) ch);
@@ -153,10 +195,14 @@
 int SLwchar_isalnum (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      {
 	unsigned char t = SL_CLASSIFICATION_LOOKUP(ch);
 	return ALNUMCLASS(t);
      }
+#else
+     return iswalnum(ch);
+#endif
 
    if ((unsigned)ch < 256)
      return isalnum ((unsigned char) ch);
@@ -168,10 +214,14 @@
 int SLwchar_ispunct (SLwchar_Type ch)
 {
    if (MODE_VARIABLE)
+#ifndef SYSTEM_WCHAR
      {
 	unsigned char t = SL_CLASSIFICATION_LOOKUP(ch);
 	return PUNCTCLASS(t);
      }
+#else
+     return iswpunct(ch);
+#endif
 
    if ((unsigned)ch < 256)
      return ispunct ((unsigned char) ch);
Index: slwcwidth.c
===================================================================
--- slwcwidth.c	(revision 233)
+++ slwcwidth.c	(working copy)
@@ -24,12 +24,15 @@
 #include "slang.h"
 #include "_slang.h"
 
-#define DEFINE_PSLWC_WIDTH_TABLE
-#include "slwcwidth.h"
+#ifndef SYSTEM_WCHAR
+# define DEFINE_PSLWC_WIDTH_TABLE
+# include "slwcwidth.h"
+#endif
 
 static int Width_Flags = 0;
 int SLwchar_wcwidth (SLwchar_Type ch)
 {
+#ifndef SYSTEM_WCHAR
    int w;
    
    SL_WIDTH_ALOOKUP(w,ch);
@@ -48,6 +51,12 @@
 	  w = 1;
      }
    return w;
+#else
+   if (Width_Flags & SLWCWIDTH_SINGLE_WIDTH)
+     return 1;
+
+   return wcwidth(ch);
+#endif
 }
 
 int SLwchar_set_wcwidth_flags (int flags)





[2008 date index] [2008 thread index]
[Thread Prev] [Thread Next]      [Date Prev] [Date Next]