No issues found
1 /*
2 * Copyright (C) 2007, Jamie McCracken <jamiemcc@gnome.org>
3 * Copyright (C) 2008, Nokia <ivan.frade@nokia.com>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 */
20
21 #include "config.h"
22
23 #ifndef _GNU_SOURCE
24 #define _GNU_SOURCE
25 #endif
26
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <unistd.h>
33 #include <sys/mman.h>
34
35 #include <glib.h>
36 #include <glib/gstdio.h>
37
38 #include <libtracker-common/tracker-file-utils.h>
39
40 #include <libtracker-extract/tracker-extract.h>
41
42 typedef struct AbwParserData AbwParserData;
43 typedef enum {
44 ABW_PARSER_TAG_UNHANDLED,
45 ABW_PARSER_TAG_TITLE,
46 ABW_PARSER_TAG_SUBJECT,
47 ABW_PARSER_TAG_CREATOR,
48 ABW_PARSER_TAG_KEYWORDS,
49 ABW_PARSER_TAG_DESCRIPTION,
50 ABW_PARSER_TAG_GENERATOR
51 } AbwParserTag;
52
53 struct AbwParserData {
54 TrackerSparqlBuilder *metadata;
55 TrackerSparqlBuilder *preupdate;
56 GString *content;
57
58 guint cur_tag;
59 guint in_text : 1;
60 };
61
62 static void
63 abw_parser_start_elem (GMarkupParseContext *context,
64 const gchar *element_name,
65 const gchar **attribute_names,
66 const gchar **attribute_values,
67 gpointer user_data,
68 GError **error)
69 {
70 AbwParserData *data = user_data;
71
72 if (g_strcmp0 (element_name, "m") == 0 &&
73 g_strcmp0 (attribute_names[0], "key") == 0) {
74 if (g_strcmp0 (attribute_values[0], "dc.title") == 0) {
75 data->cur_tag = ABW_PARSER_TAG_TITLE;
76 } else if (g_strcmp0 (attribute_values[0], "dc.subject") == 0) {
77 data->cur_tag = ABW_PARSER_TAG_SUBJECT;
78 } else if (g_strcmp0 (attribute_values[0], "dc.creator") == 0) {
79 data->cur_tag = ABW_PARSER_TAG_CREATOR;
80 } else if (g_strcmp0 (attribute_values[0], "abiword.keywords") == 0) {
81 data->cur_tag = ABW_PARSER_TAG_KEYWORDS;
82 } else if (g_strcmp0 (attribute_values[0], "dc.description") == 0) {
83 data->cur_tag = ABW_PARSER_TAG_DESCRIPTION;
84 } else if (g_strcmp0 (attribute_values[0], "abiword.generator") == 0) {
85 data->cur_tag = ABW_PARSER_TAG_GENERATOR;
86 }
87 } else if (g_strcmp0 (element_name, "section") == 0) {
88 data->in_text = TRUE;
89 }
90 }
91
92 static void
93 abw_parser_text (GMarkupParseContext *context,
94 const gchar *text,
95 gsize text_len,
96 gpointer user_data,
97 GError **error)
98 {
99 AbwParserData *data = user_data;
100 gchar *str;
101
102 str = g_strndup (text, text_len);
103
104 switch (data->cur_tag) {
105 case ABW_PARSER_TAG_TITLE:
106 tracker_sparql_builder_predicate (data->metadata, "nie:title");
107 tracker_sparql_builder_object_unvalidated (data->metadata, str);
108 break;
109 case ABW_PARSER_TAG_SUBJECT:
110 tracker_sparql_builder_predicate (data->metadata, "nie:subject");
111 tracker_sparql_builder_object_unvalidated (data->metadata, str);
112 break;
113 case ABW_PARSER_TAG_CREATOR:
114 tracker_sparql_builder_predicate (data->metadata, "nco:creator");
115
116 tracker_sparql_builder_object_blank_open (data->metadata);
117 tracker_sparql_builder_predicate (data->metadata, "a");
118 tracker_sparql_builder_object (data->metadata, "nco:Contact");
119
120 tracker_sparql_builder_predicate (data->metadata, "nco:fullname");
121 tracker_sparql_builder_object_unvalidated (data->metadata, str);
122 tracker_sparql_builder_object_blank_close (data->metadata);
123 break;
124 case ABW_PARSER_TAG_DESCRIPTION:
125 tracker_sparql_builder_predicate (data->metadata, "nie:comment");
126 tracker_sparql_builder_object_unvalidated (data->metadata, str);
127 break;
128 case ABW_PARSER_TAG_GENERATOR:
129 tracker_sparql_builder_predicate (data->metadata, "nie:generator");
130 tracker_sparql_builder_object_unvalidated (data->metadata, str);
131 break;
132 case ABW_PARSER_TAG_KEYWORDS:
133 {
134 char *lasts, *keyword;
135
136 for (keyword = strtok_r (str, ",; ", &lasts); keyword;
137 keyword = strtok_r (NULL, ",; ", &lasts)) {
138 tracker_sparql_builder_predicate (data->metadata, "nie:keyword");
139 tracker_sparql_builder_object_unvalidated (data->metadata, keyword);
140 }
141 }
142 break;
143 default:
144 break;
145 }
146
147 if (data->in_text) {
148 if (G_UNLIKELY (!data->content)) {
149 data->content = g_string_new ("");
150 }
151
152 g_string_append_len (data->content, text, text_len);
153 }
154
155 data->cur_tag = ABW_PARSER_TAG_UNHANDLED;
156 g_free (str);
157 }
158
159 static GMarkupParser parser = {
160 abw_parser_start_elem,
161 NULL,
162 abw_parser_text,
163 NULL, NULL
164 };
165
166 G_MODULE_EXPORT gboolean
167 tracker_extract_get_metadata (TrackerExtractInfo *info)
168 {
169 TrackerSparqlBuilder *preupdate, *metadata;
170 int fd;
171 gchar *filename, *contents;
172 gboolean retval = FALSE;
173 GFile *f;
174 gsize len;
175 struct stat st;
176
177 preupdate = tracker_extract_info_get_preupdate_builder (info);
178 metadata = tracker_extract_info_get_metadata_builder (info);
179
180 f = tracker_extract_info_get_file (info);
181 filename = g_file_get_path (f);
182
183 fd = tracker_file_open_fd (filename);
184
185 if (fd == -1) {
186 g_warning ("Could not open abw file '%s': %s\n",
187 filename,
188 g_strerror (errno));
189 g_free (filename);
190 return retval;
191 }
192
193 if (fstat (fd, &st) == -1) {
194 g_warning ("Could not fstat abw file '%s': %s\n",
195 filename,
196 g_strerror (errno));
197 close (fd);
198 g_free (filename);
199 return retval;
200 }
201
202 if (st.st_size == 0) {
203 contents = NULL;
204 len = 0;
205 } else {
206 contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
207 if (contents == NULL) {
208 g_warning ("Could not mmap abw file '%s': %s\n",
209 filename,
210 g_strerror (errno));
211 close (fd);
212 g_free (filename);
213 return retval;
214 }
215 len = st.st_size;
216 }
217
218 g_free (filename);
219
220 if (contents) {
221 GError *error = NULL;
222 GMarkupParseContext *context;
223 AbwParserData data = { 0 };
224
225 data.metadata = metadata;
226 data.preupdate = preupdate;
227
228 tracker_sparql_builder_predicate (metadata, "a");
229 tracker_sparql_builder_object (metadata, "nfo:Document");
230
231 context = g_markup_parse_context_new (&parser, 0, &data, NULL);
232 g_markup_parse_context_parse (context, contents, len, &error);
233
234 if (error) {
235 g_warning ("Could not parse abw file: %s\n", error->message);
236 g_error_free (error);
237 } else {
238 if (data.content) {
239 tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
240 tracker_sparql_builder_object_unvalidated (metadata, data.content->str);
241 g_string_free (data.content, TRUE);
242 }
243
244 retval = TRUE;
245 }
246
247 g_markup_parse_context_free (context);
248 }
249
250
251 if (contents) {
252 munmap (contents, len);
253 }
254
255 close (fd);
256
257 return retval;
258 }