1/* Determine the number of screen columns needed for a string.
2   Copyright (C) 2000-2005 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software Foundation,
16   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17
18/* Written by Bruno Haible <haible@clisp.cons.org>.  */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24/* Specification.  */
25#include "mbswidth.h"
26
27/* Get MB_CUR_MAX.  */
28#include <stdlib.h>
29
30#include <string.h>
31
32/* Get isprint().  */
33#include <ctype.h>
34
35/* Get mbstate_t, mbrtowc(), mbsinit(), wcwidth().  */
36#if HAVE_WCHAR_H
37/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
38   <wchar.h>.
39   BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
40   <wchar.h>.  */
41# include <stdio.h>
42# include <time.h>
43# include <wchar.h>
44#endif
45
46/* Get iswprint(), iswcntrl().  */
47#if HAVE_WCTYPE_H
48# include <wctype.h>
49#endif
50#if !defined iswprint && !HAVE_ISWPRINT
51# define iswprint(wc) 1
52#endif
53#if !defined iswcntrl && !HAVE_ISWCNTRL
54# define iswcntrl(wc) 0
55#endif
56
57#ifndef mbsinit
58# if !HAVE_MBSINIT
59#  define mbsinit(ps) 1
60# endif
61#endif
62
63#ifndef HAVE_DECL_WCWIDTH
64"this configure-time declaration test was not run"
65#endif
66#if !HAVE_DECL_WCWIDTH
67int wcwidth ();
68#endif
69
70#ifndef wcwidth
71# if !HAVE_WCWIDTH
72/* wcwidth doesn't exist, so assume all printable characters have
73   width 1.  */
74#  define wcwidth(wc) ((wc) == 0 ? 0 : iswprint (wc) ? 1 : -1)
75# endif
76#endif
77
78/* Get ISPRINT.  */
79#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
80# define IN_CTYPE_DOMAIN(c) 1
81#else
82# define IN_CTYPE_DOMAIN(c) isascii(c)
83#endif
84/* Undefine to protect against the definition in wctype.h of Solaris 2.6.   */
85#undef ISPRINT
86#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
87#undef ISCNTRL
88#define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
89
90/* Returns the number of columns needed to represent the multibyte
91   character string pointed to by STRING.  If a non-printable character
92   occurs, and MBSW_REJECT_UNPRINTABLE is specified, -1 is returned.
93   With flags = MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE, this is
94   the multibyte analogue of the wcswidth function.
95   If STRING is not of length < INT_MAX / 2, integer overflow can occur.  */
96int
97mbswidth (const char *string, int flags)
98{
99  return mbsnwidth (string, strlen (string), flags);
100}
101
102/* Returns the number of columns needed to represent the multibyte
103   character string pointed to by STRING of length NBYTES.  If a
104   non-printable character occurs, and MBSW_REJECT_UNPRINTABLE is
105   specified, -1 is returned.
106   If NBYTES is not < INT_MAX / 2, integer overflow can occur.  */
107int
108mbsnwidth (const char *string, size_t nbytes, int flags)
109{
110  const char *p = string;
111  const char *plimit = p + nbytes;
112  int width;
113
114  width = 0;
115#if HAVE_MBRTOWC
116  if (MB_CUR_MAX > 1)
117    {
118      while (p < plimit)
119	switch (*p)
120	  {
121	    case ' ': case '!': case '"': case '#': case '%':
122	    case '&': case '\'': case '(': case ')': case '*':
123	    case '+': case ',': case '-': case '.': case '/':
124	    case '0': case '1': case '2': case '3': case '4':
125	    case '5': case '6': case '7': case '8': case '9':
126	    case ':': case ';': case '<': case '=': case '>':
127	    case '?':
128	    case 'A': case 'B': case 'C': case 'D': case 'E':
129	    case 'F': case 'G': case 'H': case 'I': case 'J':
130	    case 'K': case 'L': case 'M': case 'N': case 'O':
131	    case 'P': case 'Q': case 'R': case 'S': case 'T':
132	    case 'U': case 'V': case 'W': case 'X': case 'Y':
133	    case 'Z':
134	    case '[': case '\\': case ']': case '^': case '_':
135	    case 'a': case 'b': case 'c': case 'd': case 'e':
136	    case 'f': case 'g': case 'h': case 'i': case 'j':
137	    case 'k': case 'l': case 'm': case 'n': case 'o':
138	    case 'p': case 'q': case 'r': case 's': case 't':
139	    case 'u': case 'v': case 'w': case 'x': case 'y':
140	    case 'z': case '{': case '|': case '}': case '~':
141	      /* These characters are printable ASCII characters.  */
142	      p++;
143	      width++;
144	      break;
145	    default:
146	      /* If we have a multibyte sequence, scan it up to its end.  */
147	      {
148		mbstate_t mbstate;
149		memset (&mbstate, 0, sizeof mbstate);
150		do
151		  {
152		    wchar_t wc;
153		    size_t bytes;
154		    int w;
155
156		    bytes = mbrtowc (&wc, p, plimit - p, &mbstate);
157
158		    if (bytes == (size_t) -1)
159		      /* An invalid multibyte sequence was encountered.  */
160		      {
161			if (!(flags & MBSW_REJECT_INVALID))
162			  {
163			    p++;
164			    width++;
165			    break;
166			  }
167			else
168			  return -1;
169		      }
170
171		    if (bytes == (size_t) -2)
172		      /* An incomplete multibyte character at the end.  */
173		      {
174			if (!(flags & MBSW_REJECT_INVALID))
175			  {
176			    p = plimit;
177			    width++;
178			    break;
179			  }
180			else
181			  return -1;
182		      }
183
184		    if (bytes == 0)
185		      /* A null wide character was encountered.  */
186		      bytes = 1;
187
188		    w = wcwidth (wc);
189		    if (w >= 0)
190		      /* A printable multibyte character.  */
191		      width += w;
192		    else
193		      /* An unprintable multibyte character.  */
194		      if (!(flags & MBSW_REJECT_UNPRINTABLE))
195			width += (iswcntrl (wc) ? 0 : 1);
196		      else
197			return -1;
198
199		    p += bytes;
200		  }
201		while (! mbsinit (&mbstate));
202	      }
203	      break;
204	  }
205      return width;
206    }
207#endif
208
209  while (p < plimit)
210    {
211      unsigned char c = (unsigned char) *p++;
212
213      if (ISPRINT (c))
214	width++;
215      else if (!(flags & MBSW_REJECT_UNPRINTABLE))
216	width += (ISCNTRL (c) ? 0 : 1);
217      else
218	return -1;
219    }
220  return width;
221}
222