No issues found
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
2 *
3 * Copyright (C) 2010 Uri Sivan
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
9 *
10 * The Rhythmbox authors hereby grant permission for non-GPL compatible
11 * GStreamer plugins to be used and distributed together with GStreamer
12 * and Rhythmbox. This permission is above and beyond the permissions granted
13 * by the GPL license by which Rhythmbox is covered. If you modify this code
14 * you may extend this exception to your version of the code, but you are not
15 * obligated to do so. If you do not wish to do so, delete this exception
16 * statement from your version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
26 *
27 */
28
29 #include "config.h"
30
31 #include <rb-text-helpers.h>
32
33 /**
34 * SECTION:rb-text-helpers
35 * @short_description: text direction (LTR/RTL) functions
36 *
37 * Provides some helper functions for constructing strings that
38 * may include both left-to-right and right-to-left text.
39 */
40
41 /* unicode direction markup characters
42 * see http://unicode.org/reports/tr9/, in particular sections 2.1-2.4
43 *
44 * LRM = Left-to-Right Mark = invisible character with LTR direction
45 * RLM = Right-to-Left Mark = invisible character with RTL direction
46 * LRE = Left-to-Right Embedding = start of LTR "island" in RTL text
47 * RLE = Right-to-Left Embedding = start of RTL "island" in LTR text
48 * PDF = Pop Directional Format = close last LRE or RLE section
49 *
50 * the following constants are in UTF-8 encoding
51 */
52 static const char const *UNICODE_LRM = "\xE2\x80\x8E";
53 static const char const *UNICODE_RLM = "\xE2\x80\x8F";
54 static const char const *UNICODE_LRE = "\xE2\x80\xAA";
55 static const char const *UNICODE_RLE = "\xE2\x80\xAB";
56 static const char const *UNICODE_PDF = "\xE2\x80\xAC";
57
58 static void
59 append_and_free (GString *str, char *text)
60 {
61 g_string_append (str, text);
62 g_free (text);
63 }
64
65 /**
66 * rb_text_direction_conflict:
67 * @dir1: direction A
68 * @dir2: direction B
69 *
70 * Direction conflict here means the two directions are defined (non-neutral)
71 * and they are different.
72 *
73 * Return value: %TRUE if the two directions conflict.
74 */
75 gboolean
76 rb_text_direction_conflict (PangoDirection dir1, PangoDirection dir2)
77 {
78 return (dir1 != dir2) &&
79 (dir1 != PANGO_DIRECTION_NEUTRAL) &&
80 (dir2 != PANGO_DIRECTION_NEUTRAL);
81 }
82
83 /**
84 * rb_text_common_direction:
85 * @first: first string
86 * @...: rest of strings, terminated with %NULL
87 *
88 * This functions checks the direction of all given strings and:
89 *
90 * 1. If all strings are direction neutral, returns %PANGO_DIRECTION_NEUTRAL;
91 *
92 * 2. If all strings are either LTR or neutral, returns %PANGO_DIRECTION_LTR;
93 *
94 * 3. If all strings are either RTL or neutral, returns %PANGO_DIRECTION_RTL;
95 *
96 * 4. If at least one is RTL and one LTR, returns %PANGO_DIRECTION_NEUTRAL.
97 *
98 * Note: neutral (1) and mixed (4) are two very different situations,
99 * they share a return code here only because they're the same for our
100 * specific use.
101 *
102 * Return value: common direction of all strings, as defined above.
103 */
104 PangoDirection
105 rb_text_common_direction (const char *first, ...)
106 {
107 PangoDirection common_dir = PANGO_DIRECTION_NEUTRAL;
108 PangoDirection text_dir;
109 const char *text;
110 va_list args;
111
112 va_start (args, first);
113
114 for (text = first; text; text = va_arg(args, const char *)) {
115 if (!text[0])
116 continue;
117
118 text_dir = pango_find_base_dir (text, -1);
119
120 if (rb_text_direction_conflict (text_dir, common_dir)) {
121 /* mixed direction */
122 common_dir = PANGO_DIRECTION_NEUTRAL;
123 break;
124 }
125
126 common_dir = text_dir;
127 }
128
129 va_end (args);
130
131 return common_dir;
132 }
133
134 /**
135 * rb_text_cat:
136 * @base_dir: direction of the result string.
137 * @...: pairs of strings (content, format) terminated with %NULL.
138 *
139 * This function concatenates strings to a single string, preserving
140 * each part's original direction (LTR or RTL) using unicode markup,
141 * as detailed here: http://unicode.org/reports/tr9/.
142 *
143 * It is called like this:
144 *
145 * s = rb_text_cat(base_dir, str1, format1, ..., strN, formatN, %NULL)
146 *
147 * Format is a printf format with exactly one \%s. "\%s" or "" will
148 * insert the string as is.
149 *
150 * Any string that is empty ("") will be skipped, its format must still be
151 * passed.
152 *
153 * A space is inserted between strings.
154 *
155 * The algorithm:
156 *
157 * 1. Caller supplies the base direction of the result in base_dir.
158 *
159 * 2. Insert either LRM or RLM at the beginning of the string to set
160 * its base direction, according to base_dir.
161 *
162 * 3. Find the direction of each string using pango.
163 *
164 * 4. For strings that have the same direction as the base direction,
165 * just insert them in.
166 *
167 * 5. For strings that have the opposite direction than the base one,
168 * insert them surrounded with embedding codes RLE/LRE .. PDF.
169 *
170 * Return value: a new string containing the result.
171 */
172 char *
173 rb_text_cat (PangoDirection base_dir, ...)
174 {
175 PangoDirection text_dir;
176 va_list args;
177 const char *embed_start;
178 const char *embed_stop = UNICODE_PDF;
179 GString *result;
180
181 va_start (args, base_dir);
182
183 result = g_string_sized_new (100);
184
185 if (base_dir == PANGO_DIRECTION_LTR) {
186 /* base direction LTR, embedded parts are RTL */
187 g_string_append (result, UNICODE_LRM);
188 embed_start = UNICODE_RLE;
189 } else {
190 /* base direction RTL, embedded parts are LTR */
191 g_string_append (result, UNICODE_RLM);
192 embed_start = UNICODE_LRE;
193 }
194
195 while (1) {
196 const char *text = va_arg (args, const char *);
197 const char *format;
198
199 if (!text)
200 break;
201
202 format = va_arg (args, const char *);
203 if (!text[0])
204 continue;
205 if (!format[0])
206 format = "%s";
207
208 if (result->len > 0) {
209 g_string_append (result, " ");
210 }
211
212 text_dir = pango_find_base_dir (text, -1);
213
214 if (rb_text_direction_conflict (text_dir, base_dir)) {
215 /* surround text with embed codes */
216 g_string_append (result, embed_start);
217 append_and_free (result, g_markup_printf_escaped (format, text));
218 g_string_append (result, embed_stop);
219 } else {
220 append_and_free (result, g_markup_printf_escaped (format, text));
221 }
222 }
223
224 va_end (args);
225
226 return g_string_free (result, FALSE);
227 }