No issues found
1 /*
2 * Copyright (C) 2009, Nokia <ivan.frade@nokia.com>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 #include "config.h"
21
22 #define _XOPEN_SOURCE
23 #define _XOPEN_SOURCE_EXTENDED 1 /* strptime is XPG4v2 */
24
25 #ifndef _GNU_SOURCE
26 #define _GNU_SOURCE
27 #endif
28
29 #include <time.h>
30 #include <string.h>
31 #include <stdio.h>
32
33 #include <libtracker-common/tracker-utils.h>
34 #include <libtracker-common/tracker-date-time.h>
35
36 #include "tracker-utils.h"
37
38 #ifndef HAVE_GETLINE
39
40 #include <stddef.h>
41 #include <stdlib.h>
42 #include <limits.h>
43 #include <errno.h>
44
45 #undef getdelim
46 #undef getline
47
48 #define GROW_BY 80
49
50 #endif /* HAVE_GETLINE */
51
52 #define DATE_FORMAT_ISO8601 "%Y-%m-%dT%H:%M:%S%z"
53
54 /**
55 * SECTION:tracker-utils
56 * @title: Data utilities
57 * @short_description: Functions for coalescing, merging, date
58 * handling and normalizing
59 * @stability: Stable
60 * @include: libtracker-extract/tracker-extract.h
61 *
62 * This API is provided to facilitate common more general functions
63 * which extractors may find useful. These functions are also used by
64 * the in-house extractors quite frequently.
65 **/
66
67 static const char *months[] = {
68 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
69 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
70 };
71
72 static const char imonths[] = {
73 '1', '2', '3', '4', '5',
74 '6', '7', '8', '9', '0', '1', '2'
75 };
76
77
78 /**
79 * tracker_coalesce_strip:
80 * @n_values: the number of @... supplied
81 * @...: the string pointers to coalesce
82 *
83 * This function iterates through a series of string pointers passed
84 * using @... and returns the first which is not %NULL, not empty
85 * (i.e. "") and not comprised of one or more spaces (i.e. " ").
86 *
87 * The returned value is stripped using g_strstrip(). It is MOST
88 * important NOT to pass constant string pointers to this function!
89 *
90 * Returns: the first string pointer from those provided which
91 * matches, otherwise %NULL.
92 *
93 * Since: 0.10
94 **/
95 const gchar *
96 tracker_coalesce_strip (gint n_values,
97 ...)
98 {
99 va_list args;
100 gint i;
101 const gchar *result = NULL;
102
103 va_start (args, n_values);
104
105 for (i = 0; i < n_values; i++) {
106 gchar *value;
107
108 value = va_arg (args, gchar *);
109 if (!result && !tracker_is_blank_string (value)) {
110 result = (const gchar *) g_strstrip (value);
111 break;
112 }
113 }
114
115 va_end (args);
116
117 return result;
118 }
119
120 // LCOV_EXCL_START
121
122 /**
123 * tracker_coalesce:
124 * @n_values: the number of @Varargs supplied
125 * @...: the string pointers to coalesce
126 *
127 * This function iterates through a series of string pointers passed
128 * using @... and returns the first which is not %NULL, not empty
129 * (i.e. "") and not comprised of one or more spaces (i.e. " ").
130 *
131 * The returned value is stripped using g_strstrip(). All other values
132 * supplied are freed. It is MOST important NOT to pass constant
133 * string pointers to this function!
134 *
135 * Returns: the first string pointer from those provided which
136 * matches, otherwise %NULL.
137 *
138 * Since: 0.8
139 *
140 * Deprecated: 0.10: Use tracker_coalesce_strip() instead.
141 *
142 **/
143 gchar *
144 tracker_coalesce (gint n_values,
145 ...)
146 {
147 va_list args;
148 gint i;
149 gchar *result = NULL;
150
151 va_start (args, n_values);
152
153 for (i = 0; i < n_values; i++) {
154 gchar *value;
155
156 value = va_arg (args, gchar *);
157 if (!result && !tracker_is_blank_string (value)) {
158 result = g_strstrip (value);
159 } else {
160 g_free (value);
161 }
162 }
163
164 va_end (args);
165
166 return result;
167 }
168 // LCOV_EXCL_STOP
169
170 /**
171 * tracker_merge_const:
172 * @delimiter: the delimiter to use when merging
173 * @n_values: the number of @... supplied
174 * @...: the string pointers to merge
175 *
176 * This function iterates through a series of string pointers passed
177 * using @... and returns a newly allocated string of the merged
178 * strings.
179 *
180 * The @delimiter can be %NULL. If specified, it will be used in
181 * between each merged string in the result.
182 *
183 * Returns: a newly-allocated string holding the result which should
184 * be freed with g_free() when finished with, otherwise %NULL.
185 *
186 * Since: 0.10
187 **/
188 gchar *
189 tracker_merge_const (const gchar *delimiter,
190 gint n_values,
191 ...)
192 {
193 va_list args;
194 gint i;
195 GString *str = NULL;
196
197 va_start (args, n_values);
198
199 for (i = 0; i < n_values; i++) {
200 gchar *value;
201
202 value = va_arg (args, gchar *);
203 if (value) {
204 if (!str) {
205 str = g_string_new (value);
206 } else {
207 if (delimiter) {
208 g_string_append (str, delimiter);
209 }
210 g_string_append (str, value);
211 }
212 }
213 }
214
215 va_end (args);
216
217 if (!str) {
218 return NULL;
219 }
220
221 return g_string_free (str, FALSE);
222 }
223
224 // LCOV_EXCL_START
225
226 /**
227 * tracker_merge:
228 * @delimiter: the delimiter to use when merging
229 * @n_values: the number of @... supplied
230 * @...: the string pointers to merge
231 *
232 * This function iterates through a series of string pointers passed
233 * using @... and returns a newly allocated string of the merged
234 * strings. All passed strings are freed (don't pass const values)/
235 *
236 * The @delimiter can be %NULL. If specified, it will be used in
237 * between each merged string in the result.
238 *
239 * Returns: a newly-allocated string holding the result which should
240 * be freed with g_free() when finished with, otherwise %NULL.
241 *
242 * Since: 0.8
243 *
244 * Deprecated: 0.10: Use tracker_merge_const() instead.
245 **/
246 gchar *
247 tracker_merge (const gchar *delimiter,
248 gint n_values,
249 ...)
250 {
251 va_list args;
252 gint i;
253 GString *str = NULL;
254
255 va_start (args, n_values);
256
257 for (i = 0; i < n_values; i++) {
258 gchar *value;
259
260 value = va_arg (args, gchar *);
261 if (value) {
262 if (!str) {
263 str = g_string_new (value);
264 } else {
265 if (delimiter) {
266 g_string_append (str, delimiter);
267 }
268 g_string_append (str, value);
269 }
270 g_free (value);
271 }
272 }
273
274 va_end (args);
275
276 if (!str) {
277 return NULL;
278 }
279
280 return g_string_free (str, FALSE);
281 }
282
283 /**
284 * tracker_text_normalize:
285 * @text: the text to normalize
286 * @max_words: the maximum words of @text to normalize
287 * @n_words: the number of words actually normalized
288 *
289 * This function iterates through @text checking for UTF-8 validity
290 * using g_utf8_get_char_validated(). For each character found, the
291 * %GUnicodeType is checked to make sure it is one fo the following
292 * values:
293 * <itemizedlist>
294 * <listitem><para>%G_UNICODE_LOWERCASE_LETTER</para></listitem>
295 * <listitem><para>%G_UNICODE_MODIFIER_LETTER</para></listitem>
296 * <listitem><para>%G_UNICODE_OTHER_LETTER</para></listitem>
297 * <listitem><para>%G_UNICODE_TITLECASE_LETTER</para></listitem>
298 * <listitem><para>%G_UNICODE_UPPERCASE_LETTER</para></listitem>
299 * </itemizedlist>
300 *
301 * All other symbols, punctuation, marks, numbers and separators are
302 * stripped. A regular space (i.e. " ") is used to separate the words
303 * in the returned string.
304 *
305 * The @n_words can be %NULL. If specified, it will be populated with
306 * the number of words that were normalized in the result.
307 *
308 * Returns: a newly-allocated string holding the result which should
309 * be freed with g_free() when finished with, otherwise %NULL.
310 *
311 * Since: 0.8
312 *
313 * Deprecated: 0.10: Use tracker_text_validate_utf8() instead.
314 **/
315 gchar *
316 tracker_text_normalize (const gchar *text,
317 guint max_words,
318 guint *n_words)
319 {
320 GString *string;
321 gboolean in_break = TRUE;
322 gunichar ch;
323 gint words = 0;
324
325 string = g_string_new (NULL);
326
327 while ((ch = g_utf8_get_char_validated (text, -1)) > 0) {
328 GUnicodeType type;
329
330 type = g_unichar_type (ch);
331
332 if (type == G_UNICODE_LOWERCASE_LETTER ||
333 type == G_UNICODE_MODIFIER_LETTER ||
334 type == G_UNICODE_OTHER_LETTER ||
335 type == G_UNICODE_TITLECASE_LETTER ||
336 type == G_UNICODE_UPPERCASE_LETTER) {
337 /* Append regular chars */
338 g_string_append_unichar (string, ch);
339 in_break = FALSE;
340 } else if (!in_break) {
341 /* Non-regular char found, treat as word break */
342 g_string_append_c (string, ' ');
343 in_break = TRUE;
344 words++;
345
346 if (words > max_words) {
347 break;
348 }
349 }
350
351 text = g_utf8_find_next_char (text, NULL);
352 }
353
354 if (n_words) {
355 if (!in_break) {
356 /* Count the last word */
357 words += 1;
358 }
359 *n_words = words;
360 }
361
362 return g_string_free (string, FALSE);
363 }
364
365 // LCOV_EXCL_STOP
366
367 /**
368 * tracker_text_validate_utf8:
369 * @text: the text to validate
370 * @text_len: length of @text, or -1 if NUL-terminated
371 * @str: the string where to place the validated UTF-8 characters, or %NULL if
372 * not needed.
373 * @valid_len: Output number of valid UTF-8 bytes found, or %NULL if not needed
374 *
375 * This function iterates through @text checking for UTF-8 validity
376 * using g_utf8_validate(), appends the first chunk of valid characters
377 * to @str, and gives the number of valid UTF-8 bytes in @valid_len.
378 *
379 * Returns: %TRUE if some bytes were found to be valid, %FALSE otherwise.
380 *
381 * Since: 0.10
382 **/
383 gboolean
384 tracker_text_validate_utf8 (const gchar *text,
385 gssize text_len,
386 GString **str,
387 gsize *valid_len)
388 {
389 gsize len_to_validate;
390
391 g_return_val_if_fail (text, FALSE);
392
393 len_to_validate = text_len >= 0 ? text_len : strlen (text);
394
395 if (len_to_validate > 0) {
396 const gchar *end = text;
397
398 /* Validate string, getting the pointer to first non-valid character
399 * (if any) or to the end of the string. */
400 g_utf8_validate (text, len_to_validate, &end);
401 if (end > text) {
402 /* If str output required... */
403 if (str) {
404 /* Create string to output if not already as input */
405 *str = (*str == NULL ?
406 g_string_new_len (text, end - text) :
407 g_string_append_len (*str, text, end - text));
408 }
409
410 /* If utf8 len output required... */
411 if (valid_len) {
412 *valid_len = end - text;
413 }
414
415 return TRUE;
416 }
417 }
418
419 return FALSE;
420 }
421
422 /**
423 * tracker_date_format_to_iso8601:
424 * @date_string: the date in a string pointer
425 * @format: the format of the @date_string
426 *
427 * This function uses strptime() to create a time tm structure using
428 * @date_string and @format.
429 *
430 * Returns: a newly-allocated string with the time represented in
431 * ISO8601 date format which should be freed with g_free() when
432 * finished with, otherwise %NULL.
433 *
434 * Since: 0.8
435 **/
436 gchar *
437 tracker_date_format_to_iso8601 (const gchar *date_string,
438 const gchar *format)
439 {
440 gchar *result;
441 struct tm date_tm = { 0 };
442
443 g_return_val_if_fail (date_string != NULL, NULL);
444 g_return_val_if_fail (format != NULL, NULL);
445
446 if (strptime (date_string, format, &date_tm) == 0) {
447 return NULL;
448 }
449
450 /* If the input format string doesn't parse timezone information with
451 * either %z or %Z, strptime() won't set the tm_gmtoff member in the
452 * broken-down time, and the value during initialization (0) will be
453 * left. This effectively means that every broken-down time obtained
454 * with strptime() without parsing timezone information will be based
455 * on UTC, instead of being treated as localtime. In order to fix this
456 * and set the correct value for the offset w.r.t gmt, we can just
457 * use mktime() to fill in the daylight saving flag as well as the
458 * gmt offset value. */
459 if (!strstr (format, "%z") && !strstr (format, "%Z")) {
460 /* tm_isdst not set by strptime(), we set -1 on it in order to ask
461 * mktime to 'normalize' its contents and fill in the gmt offset
462 * and daylight saving time information */
463 date_tm.tm_isdst = -1;
464
465 /* Note: no real problem if mktime() fails. In this case, tm_isdst
466 * will be -1, and therefore strftime() will not write the timezone
467 * information, which is equally right to represent localtime. */
468 mktime (&date_tm);
469 }
470
471 result = g_malloc (sizeof (char) * 25);
472 strftime (result, 25, DATE_FORMAT_ISO8601 , &date_tm);
473 return result;
474 }
475
476 static gboolean
477 is_int (const gchar *str)
478 {
479 gint i, len;
480
481 if (!str || str[0] == '\0') {
482 return FALSE;
483 }
484
485 len = strlen (str);
486
487 for (i = 0; i < len; i++) {
488 if (!g_ascii_isdigit (str[i])) {
489 return FALSE;
490 }
491 }
492
493 return TRUE ;
494 }
495
496 static gint
497 parse_month (const gchar *month)
498 {
499 gint i;
500
501 for (i = 0; i < 12; i++) {
502 if (!strncmp (month, months[i], 3)) {
503 return i;
504 }
505 }
506
507 return -1;
508 }
509
510 /* Determine date format and convert to ISO 8601 format */
511 /* FIXME We should handle all the fractions here (see ISO 8601), as well as YYYY:DDD etc */
512
513 /**
514 * tracker_date_guess:
515 * @date_string: the date in a string pointer
516 *
517 * This function uses a number of methods to try and guess the date
518 * held in @date_string. The @date_string must be at least 5
519 * characters in length or longer for any guessing to be attempted.
520 * Some of the string formats guessed include:
521 *
522 * <itemizedlist>
523 * <listitem><para>"YYYY-MM-DD" (Simple format)</para></listitem>
524 * <listitem><para>"20050315113224-08'00'" (PDF format)</para></listitem>
525 * <listitem><para>"20050216111533Z" (PDF format)</para></listitem>
526 * <listitem><para>"Mon Feb 9 10:10:00 2004" (Microsoft Office format)</para></listitem>
527 * <listitem><para>"2005:04:29 14:56:54" (Exif format)</para></listitem>
528 * <listitem><para>"YYYY-MM-DDThh:mm:ss.ff+zz:zz</para></listitem>
529 * </itemizedlist>
530 *
531 * Returns: a newly-allocated string with the time represented in
532 * ISO8601 date format which should be freed with g_free() when
533 * finished with, otherwise %NULL.
534 *
535 * Since: 0.8
536 **/
537 gchar *
538 tracker_date_guess (const gchar *date_string)
539 {
540 gchar buf[30];
541 gint len;
542 GError *error = NULL;
543
544 if (!date_string) {
545 return NULL;
546 }
547
548 len = strlen (date_string);
549
550 /* We cannot format a date without at least a four digit
551 * year.
552 */
553 if (len < 4) {
554 return NULL;
555 }
556
557 /* Check for year only dates (EG ID3 music tags might have
558 * Audio.ReleaseDate as 4 digit year)
559 */
560 if (len == 4) {
561 if (is_int (date_string)) {
562 buf[0] = date_string[0];
563 buf[1] = date_string[1];
564 buf[2] = date_string[2];
565 buf[3] = date_string[3];
566 buf[4] = '-';
567 buf[5] = '0';
568 buf[6] = '1';
569 buf[7] = '-';
570 buf[8] = '0';
571 buf[9] = '1';
572 buf[10] = 'T';
573 buf[11] = '0';
574 buf[12] = '0';
575 buf[13] = ':';
576 buf[14] = '0';
577 buf[15] = '0';
578 buf[16] = ':';
579 buf[17] = '0';
580 buf[18] = '0';
581 buf[19] = 'Z';
582 buf[20] = '\0';
583
584 tracker_string_to_date (buf, NULL, &error);
585
586 if (error != NULL) {
587 g_error_free (error);
588 return NULL;
589 }
590
591 return g_strdup (buf);
592 } else {
593 return NULL;
594 }
595 } else if (len == 10) {
596 /* Check for date part only YYYY-MM-DD */
597 buf[0] = date_string[0];
598 buf[1] = date_string[1];
599 buf[2] = date_string[2];
600 buf[3] = date_string[3];
601 buf[4] = '-';
602 buf[5] = date_string[5];
603 buf[6] = date_string[6];
604 buf[7] = '-';
605 buf[8] = date_string[8];
606 buf[9] = date_string[9];
607 buf[10] = 'T';
608 buf[11] = '0';
609 buf[12] = '0';
610 buf[13] = ':';
611 buf[14] = '0';
612 buf[15] = '0';
613 buf[16] = ':';
614 buf[17] = '0';
615 buf[18] = '0';
616 buf[19] = '\0';
617
618 tracker_string_to_date (buf, NULL, &error);
619
620 if (error != NULL) {
621 g_error_free (error);
622 return NULL;
623 }
624
625 return g_strdup (buf);
626 } else if (len == 14) {
627 /* Check for pdf format EG 20050315113224-08'00' or
628 * 20050216111533Z
629 */
630 buf[0] = date_string[0];
631 buf[1] = date_string[1];
632 buf[2] = date_string[2];
633 buf[3] = date_string[3];
634 buf[4] = '-';
635 buf[5] = date_string[4];
636 buf[6] = date_string[5];
637 buf[7] = '-';
638 buf[8] = date_string[6];
639 buf[9] = date_string[7];
640 buf[10] = 'T';
641 buf[11] = date_string[8];
642 buf[12] = date_string[9];
643 buf[13] = ':';
644 buf[14] = date_string[10];
645 buf[15] = date_string[11];
646 buf[16] = ':';
647 buf[17] = date_string[12];
648 buf[18] = date_string[13];
649 buf[19] = '\0';
650
651 tracker_string_to_date (buf, NULL, &error);
652
653 if (error != NULL) {
654 g_error_free (error);
655 return NULL;
656 }
657
658 return g_strdup (buf);
659 } else if (len == 15 && date_string[14] == 'Z') {
660 buf[0] = date_string[0];
661 buf[1] = date_string[1];
662 buf[2] = date_string[2];
663 buf[3] = date_string[3];
664 buf[4] = '-';
665 buf[5] = date_string[4];
666 buf[6] = date_string[5];
667 buf[7] = '-';
668 buf[8] = date_string[6];
669 buf[9] = date_string[7];
670 buf[10] = 'T';
671 buf[11] = date_string[8];
672 buf[12] = date_string[9];
673 buf[13] = ':';
674 buf[14] = date_string[10];
675 buf[15] = date_string[11];
676 buf[16] = ':';
677 buf[17] = date_string[12];
678 buf[18] = date_string[13];
679 buf[19] = 'Z';
680 buf[20] = '\0';
681
682 tracker_string_to_date (buf, NULL, &error);
683
684 if (error != NULL) {
685 g_error_free (error);
686 return NULL;
687 }
688
689 return g_strdup (buf);
690 } else if (len == 21 && (date_string[14] == '-' || date_string[14] == '+' )) {
691 buf[0] = date_string[0];
692 buf[1] = date_string[1];
693 buf[2] = date_string[2];
694 buf[3] = date_string[3];
695 buf[4] = '-';
696 buf[5] = date_string[4];
697 buf[6] = date_string[5];
698 buf[7] = '-';
699 buf[8] = date_string[6];
700 buf[9] = date_string[7];
701 buf[10] = 'T';
702 buf[11] = date_string[8];
703 buf[12] = date_string[9];
704 buf[13] = ':';
705 buf[14] = date_string[10];
706 buf[15] = date_string[11];
707 buf[16] = ':';
708 buf[17] = date_string[12];
709 buf[18] = date_string[13];
710 buf[19] = date_string[14];
711 buf[20] = date_string[15];
712 buf[21] = date_string[16];
713 buf[22] = ':';
714 buf[23] = date_string[18];
715 buf[24] = date_string[19];
716 buf[25] = '\0';
717
718 tracker_string_to_date (buf, NULL, &error);
719
720 if (error != NULL) {
721 g_error_free (error);
722 return NULL;
723 }
724
725 return g_strdup (buf);
726 } else if ((len == 24) && (date_string[3] == ' ')) {
727 /* Check for msoffice date format "Mon Feb 9 10:10:00 2004" */
728 gint num_month;
729 gchar mon1;
730 gchar day1;
731
732 num_month = parse_month (date_string + 4);
733
734 if (num_month < 0) {
735 return NULL;
736 }
737
738 mon1 = imonths[num_month];
739
740 if (date_string[8] == ' ') {
741 day1 = '0';
742 } else {
743 day1 = date_string[8];
744 }
745
746 buf[0] = date_string[20];
747 buf[1] = date_string[21];
748 buf[2] = date_string[22];
749 buf[3] = date_string[23];
750 buf[4] = '-';
751
752 if (num_month < 10) {
753 buf[5] = '0';
754 buf[6] = mon1;
755 } else {
756 buf[5] = '1';
757 buf[6] = mon1;
758 }
759
760 buf[7] = '-';
761 buf[8] = day1;
762 buf[9] = date_string[9];
763 buf[10] = 'T';
764 buf[11] = date_string[11];
765 buf[12] = date_string[12];
766 buf[13] = ':';
767 buf[14] = date_string[14];
768 buf[15] = date_string[15];
769 buf[16] = ':';
770 buf[17] = date_string[17];
771 buf[18] = date_string[18];
772 buf[19] = '\0';
773
774 tracker_string_to_date (buf, NULL, &error);
775
776 if (error != NULL) {
777 g_error_free (error);
778 return NULL;
779 }
780
781 return g_strdup (buf);
782 } else if ((len == 19) && (date_string[4] == ':') && (date_string[7] == ':')) {
783 /* Check for Exif date format "2005:04:29 14:56:54" */
784 buf[0] = date_string[0];
785 buf[1] = date_string[1];
786 buf[2] = date_string[2];
787 buf[3] = date_string[3];
788 buf[4] = '-';
789 buf[5] = date_string[5];
790 buf[6] = date_string[6];
791 buf[7] = '-';
792 buf[8] = date_string[8];
793 buf[9] = date_string[9];
794 buf[10] = 'T';
795 buf[11] = date_string[11];
796 buf[12] = date_string[12];
797 buf[13] = ':';
798 buf[14] = date_string[14];
799 buf[15] = date_string[15];
800 buf[16] = ':';
801 buf[17] = date_string[17];
802 buf[18] = date_string[18];
803 buf[19] = '\0';
804
805 tracker_string_to_date (buf, NULL, &error);
806
807 if (error != NULL) {
808 g_error_free (error);
809 return NULL;
810 }
811
812 return g_strdup (buf);
813 }
814
815 tracker_string_to_date (date_string, NULL, &error);
816
817 if (error != NULL) {
818 g_error_free (error);
819 return NULL;
820 }
821
822 return g_strdup (date_string);
823 }
824
825 #ifndef HAVE_GETLINE
826
827 static gint
828 my_igetdelim (gchar **linebuf,
829 guint *linebufsz,
830 gint delimiter,
831 FILE *file)
832 {
833 gint ch;
834 gint idx;
835
836 if ((file == NULL || linebuf == NULL || *linebuf == NULL || *linebufsz == 0) &&
837 !(*linebuf == NULL && *linebufsz == 0)) {
838 errno = EINVAL;
839 return -1;
840 }
841
842 if (*linebuf == NULL && *linebufsz == 0) {
843 *linebuf = g_malloc (GROW_BY);
844
845 if (!*linebuf) {
846 errno = ENOMEM;
847 return -1;
848 }
849
850 *linebufsz += GROW_BY;
851 }
852
853 idx = 0;
854
855 while ((ch = fgetc (file)) != EOF) {
856 /* Grow the line buffer as necessary */
857 while (idx > *linebufsz - 2) {
858 *linebuf = g_realloc (*linebuf, *linebufsz += GROW_BY);
859
860 if (!*linebuf) {
861 errno = ENOMEM;
862 return -1;
863 }
864 }
865 (*linebuf)[idx++] = (gchar) ch;
866
867 if ((gchar) ch == delimiter) {
868 break;
869 }
870 }
871
872 if (idx != 0) {
873 (*linebuf)[idx] = 0;
874 } else if ( ch == EOF ) {
875 return -1;
876 }
877
878 return idx;
879 }
880
881 #endif /* HAVE_GETLINE */
882
883 /**
884 * tracker_getline:
885 * @lineptr: Buffer to write into
886 * @n: Max bytes of linebuf
887 * @stream: Filestream to read from
888 *
889 * Reads an entire line from stream, storing the address of the buffer
890 * containing the text into *lineptr. The buffer is null-terminated
891 * and includes the newline character, if one was found.
892 *
893 * Read GNU getline()'s manpage for more information
894 *
895 * Returns: the number of characters read, including the delimiter
896 * character, but not including the terminating %NULL byte. This value
897 * can be used to handle embedded %NULL bytes in the line read. Upon
898 * failure, -1 is returned.
899 *
900 * Since: 0.10
901 **/
902 gssize
903 tracker_getline (gchar **lineptr,
904 gsize *n,
905 FILE *stream)
906 {
907 #ifndef HAVE_GETLINE
908 return my_igetdelim (lineptr, n, '\n', stream);
909 #else /* HAVE_GETLINE */
910 return getline (lineptr, n, stream);
911 #endif /* HAVE_GETLINE */
912 }
913
914 /**
915 * tracker_keywords_parse:
916 * @store: Array where to store the keywords
917 * @keywords: Keywords line to parse
918 *
919 * Parses a keywords line into store, avoiding duplicates and stripping leading
920 * and trailing spaces from keywords. Allowed delimiters are , and ;
921 *
922 * Since: 0.10
923 **/
924 void
925 tracker_keywords_parse (GPtrArray *store,
926 const gchar *keywords)
927 {
928 gchar *orig, *keywords_d;
929 char *saveptr, *p;
930 size_t len;
931
932 keywords_d = orig = g_strdup (keywords);
933 p = keywords_d;
934 keywords_d = strchr (keywords_d, '"');
935
936 if (keywords_d) {
937 keywords_d++;
938 } else {
939 keywords_d = p;
940 }
941
942 len = strlen (keywords_d);
943 if (keywords_d[len - 1] == '"') {
944 keywords_d[len - 1] = '\0';
945 }
946
947 for (p = strtok_r (keywords_d, ",;", &saveptr); p;
948 p = strtok_r (NULL, ",;", &saveptr)) {
949 guint i;
950 gboolean found = FALSE;
951 gchar *p_do = g_strdup (p);
952 gchar *p_dup = p_do;
953 guint len = strlen (p_dup);
954
955 if (*p_dup == ' ')
956 p_dup++;
957
958 if (p_dup[len-1] == ' ')
959 p_dup[len-1] = '\0';
960
961 /* ignore keywords containing invalid UTF-8 */
962 if (!g_utf8_validate (p_dup, -1, NULL)) {
963 g_free (p_do);
964 continue;
965 }
966
967 for (i = 0; i < store->len; i++) {
968 const gchar *earlier = g_ptr_array_index (store, i);
969 if (g_strcmp0 (earlier, p_dup) == 0) {
970 found = TRUE;
971 break;
972 }
973 }
974
975 if (!found) {
976 g_ptr_array_add (store, g_strdup (p_dup));
977 }
978
979 g_free (p_do);
980 }
981
982 g_free (orig);
983 }