Location	Tool	Test ID	Function	Issue
e-unicode.c:96:7	clang-analyzer			Dereference of null pointer (loaded from variable 'ob')
e-unicode.c:96:7	clang-analyzer			Dereference of null pointer (loaded from variable 'ob')
  1 /*
  2  * e-unicode.c - utf-8 support functions for gal
  3  *
  4  * This program is free software; you can redistribute it and/or
  5  * modify it under the terms of the GNU Lesser General Public
  6  * License as published by the Free Software Foundation; either
  7  * version 2 of the License, or (at your option) version 3.
  8  *
  9  * This program is distributed in the hope that it will be useful,
 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 12  * Lesser General Public License for more details.
 13  *
 14  * You should have received a copy of the GNU Lesser General Public
 15  * License along with the program; if not, see <http://www.gnu.org/licenses/>
 16  *
 17  *
 18  * Authors:
 19  *		Lauris Kaplinski <lauris@ximian.com>
 20  *
 21  * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
 22  *
 23  */
 24 
 25 #ifdef HAVE_CONFIG_H
 26 #include <config.h>
 27 #endif
 28 
 29 #include <stdlib.h>
 30 #include <string.h>
 31 #include <stdio.h>
 32 #include <ctype.h>
 33 #include <iconv.h>
 34 
 35 #include <gdk/gdkkeysyms.h>
 36 #include <gtk/gtk.h>
 37 #include <libxml/xmlmemory.h>
 38 
 39 #include <camel/camel.h>
 40 
 41 #include <glib/gi18n.h>
 42 #include "e-unicode.h"
 43 
 44 #define d(x)
 45 
 46 #define FONT_TESTING
 47 #define MAX_DECOMP 8
 48 
 49 /* FIXME: this has not been ported fully yet - non ASCII people beware. */
 50 
 51 gchar *
 52 e_utf8_from_gtk_event_key (GtkWidget *widget,
 53                            guint keyval,
 54                            const gchar *string)
 55 {
 56 	gint unival;
 57 	gchar *utf;
 58 	gint unilen;
 59 
 60 	if (keyval == GDK_KEY_VoidSymbol) {
 61 		utf = e_utf8_from_locale_string_sized (string, strlen (string));
 62 	} else {
 63 		unival = gdk_keyval_to_unicode (keyval);
 64 
 65 		if (unival < ' ') return NULL;
 66 
 67 		utf = g_new (gchar, 7);
 68 
 69 		unilen = e_unichar_to_utf8 (unival, utf);
 70 
 71 		utf[unilen] = '\0';
 72 	}
 73 
 74 	return utf;
 75 }
 76 
 77 gchar *
 78 e_utf8_from_iconv_string_sized (iconv_t ic,
 79                                 const gchar *string,
 80                                 gint bytes)
 81 {
 82 	gchar *new, *ob;
 83 	const gchar *ib;
 84 	gsize ibl, obl;
 85 
 86 	if (!string) return NULL;
 87 
 88 	if (ic == (iconv_t) -1) {
 89 		gint i;
 90 		/* iso-8859-1 */
 91 		ib = (gchar *) string;
 92 		new = ob = (gchar *) g_new (guchar, bytes * 2 + 1);
 93 		for (i = 0; i < (bytes); i++) {
 94 			ob += e_unichar_to_utf8 (ib[i], ob);
 95 		}
 96 		*ob = '\0';
   Dereference of null pointer (loaded from variable 'ob')
   (emitted by clang-analyzer)
TODO: a detailed trace is available in the data model (not yet rendered in this report)
   Dereference of null pointer (loaded from variable 'ob')
   (emitted by clang-analyzer)
TODO: a detailed trace is available in the data model (not yet rendered in this report) 97 		return new;
 98 	}
 99 
100 	ib = string;
101 	ibl = bytes;
102 	new = ob = g_new (gchar, ibl * 6 + 1);
103 	obl = ibl * 6;
104 
105 	while (ibl > 0) {
106 		camel_iconv (ic, &ib, &ibl, &ob, &obl);
107 		if (ibl > 0) {
108 			gint len;
109 			if ((*ib & 0x80) == 0x00) len = 1;
110 			else if ((*ib &0xe0) == 0xc0) len = 2;
111 			else if ((*ib &0xf0) == 0xe0) len = 3;
112 			else if ((*ib &0xf8) == 0xf0) len = 4;
113 			else {
114 				g_warning ("Invalid UTF-8 sequence");
115 				break;
116 			}
117 			ib += len;
118 			ibl = bytes - (ib - string);
119 			if (ibl > bytes) ibl = 0;
120 			*ob++ = '_';
121 			obl--;
122 		}
123 	}
124 
125 	*ob = '\0';
126 
127 	return new;
128 }
129 
130 gchar *
131 e_utf8_to_iconv_string_sized (iconv_t ic,
132                               const gchar *string,
133                               gint bytes)
134 {
135 	gchar *new, *ob;
136 	const gchar *ib;
137 	gsize ibl, obl;
138 
139 	if (!string) return NULL;
140 
141 	if (ic == (iconv_t) -1) {
142 		gint len;
143 		const gchar *u;
144 		gunichar uc;
145 
146 		new = (gchar *) g_new (guchar, bytes * 4 + 1);
147 		u = string;
148 		len = 0;
149 
150 		while ((u) && (u - string < bytes)) {
151 			u = e_unicode_get_utf8 (u, &uc);
152 			new[len++] = uc & 0xff;
153 		}
154 		new[len] = '\0';
155 		return new;
156 	}
157 
158 	ib = string;
159 	ibl = bytes;
160 	new = ob = g_new (char, ibl * 4 + 4);
161 	obl = ibl * 4;
162 
163 	while (ibl > 0) {
164 		camel_iconv (ic, &ib, &ibl, &ob, &obl);
165 		if (ibl > 0) {
166 			gint len;
167 			if ((*ib & 0x80) == 0x00) len = 1;
168 			else if ((*ib &0xe0) == 0xc0) len = 2;
169 			else if ((*ib &0xf0) == 0xe0) len = 3;
170 			else if ((*ib &0xf8) == 0xf0) len = 4;
171 			else {
172 				g_warning ("Invalid UTF-8 sequence");
173 				break;
174 			}
175 			ib += len;
176 			ibl = bytes - (ib - string);
177 			if (ibl > bytes) ibl = 0;
178 
179 			/* FIXME This is wrong.  What if the destination
180 			 *       charset is 16 or 32 bit? */
181 			*ob++ = '_';
182 			obl--;
183 		}
184 	}
185 
186 	/* Make sure to terminate with plenty of padding */
187 	memset (ob, 0, 4);
188 
189 	return new;
190 }
191 
192 gchar *
193 e_utf8_to_charset_string_sized (const gchar *charset,
194                                 const gchar *string,
195                                 gint bytes)
196 {
197 	iconv_t ic;
198 	gchar *ret;
199 
200 	if (!string) return NULL;
201 
202 	ic = camel_iconv_open (charset, "utf-8");
203 	ret = e_utf8_to_iconv_string_sized (ic, string, bytes);
204 	camel_iconv_close (ic);
205 
206 	return ret;
207 }
208 
209 gchar *
210 e_utf8_from_locale_string_sized (const gchar *string,
211                                  gint bytes)
212 {
213 	iconv_t ic;
214 	gchar *ret;
215 
216 	if (!string) return NULL;
217 
218 	ic = camel_iconv_open ("utf-8", camel_iconv_locale_charset ());
219 	ret = e_utf8_from_iconv_string_sized (ic, string, bytes);
220 	camel_iconv_close (ic);
221 
222 	return ret;
223 }
224 
225 /**
226  * e_utf8_ensure_valid:
227  * @string: string to make valid UTF-8
228  *
229  * Ensures the returned string will be valid UTF-8 string, thus GTK+
230  * functions expecting only valid UTF-8 text will not crash.
231  *
232  * Returned pointer should be freed with g_free().
233  *
234  * Returns: a newly-allocated UTF-8 string
235  **/
236 gchar *
237 e_utf8_ensure_valid (const gchar *string)
238 {
239 	gchar *res = g_strdup (string), *p;
240 
241 	if (!res)
242 		return res;
243 
244 	p = res;
245 	while (!g_utf8_validate (p, -1, (const gchar **) &p)) {
246 		/* make all invalid characters appear as question marks */
247 		*p = '?';
248 	}
249 
250 	return res;
251 }
252 
253 /**
254  * e_unichar_to_utf8:
255  * @c: a ISO10646 character code
256  * @outbuf: output buffer, must have at least 6 bytes of space.
257  *          If %NULL, the length will be computed and returned
258  *          and nothing will be written to @out.
259  *
260  * Convert a single character to utf8
261  *
262  * Return value: number of bytes written
263  **/
264 
265 gint
266 e_unichar_to_utf8 (gint c,
267                    gchar *outbuf)
268 {
269   gsize len = 0;
270   gint first;
271   gint i;
272 
273   if (c < 0x80)
274     {
275       first = 0;
276       len = 1;
277     }
278   else if (c < 0x800)
279     {
280       first = 0xc0;
281       len = 2;
282     }
283   else if (c < 0x10000)
284     {
285       first = 0xe0;
286       len = 3;
287     }
288    else if (c < 0x200000)
289     {
290       first = 0xf0;
291       len = 4;
292     }
293   else if (c < 0x4000000)
294     {
295       first = 0xf8;
296       len = 5;
297     }
298   else
299     {
300       first = 0xfc;
301       len = 6;
302     }
303 
304   if (outbuf)
305     {
306       for (i = len - 1; i > 0; --i)
307 	{
308 	  outbuf[i] = (c & 0x3f) | 0x80;
309 	  c >>= 6;
310 	}
311       outbuf[0] = c | first;
312     }
313 
314   return len;
315 }
316 
317 gchar *
318 e_unicode_get_utf8 (const gchar *text,
319                     gunichar *out)
320 {
321 	*out = g_utf8_get_char (text);
322 	return (*out == (gunichar) - 1) ? NULL : g_utf8_next_char (text);
323 }
324 
325 gchar *
326 e_xml_get_translated_utf8_string_prop_by_name (const xmlNode *parent,
327                                                const xmlChar *prop_name)
328 {
329 	xmlChar *prop;
330 	gchar *ret_val = NULL;
331 	gchar *combined_name;
332 
333 	g_return_val_if_fail (parent != NULL, NULL);
334 	g_return_val_if_fail (prop_name != NULL, NULL);
335 
336 	prop = xmlGetProp ((xmlNode *) parent, prop_name);
337 	if (prop != NULL) {
338 		ret_val = g_strdup ((gchar *) prop);
339 		xmlFree (prop);
340 		return ret_val;
341 	}
342 
343 	combined_name = g_strdup_printf ("_%s", prop_name);
344 	prop = xmlGetProp ((xmlNode *) parent, (guchar *) combined_name);
345 	if (prop != NULL) {
346 		ret_val = g_strdup (gettext ((gchar *) prop));
347 		xmlFree (prop);
348 	}
349 	g_free (combined_name);
350 
351 	return ret_val;
352 }
evolution-3.6.4/e-util/e-unicode.c