tracker-0.16.2/src/tracker-extract/tracker-extract-abw.c

No issues found

  1 /*
  2  * Copyright (C) 2007, Jamie McCracken <jamiemcc@gnome.org>
  3  * Copyright (C) 2008, Nokia <ivan.frade@nokia.com>
  4  *
  5  * This library is free software; you can redistribute it and/or
  6  * modify it under the terms of the GNU Lesser General Public
  7  * License as published by the Free Software Foundation; either
  8  * version 2.1 of the License, or (at your option) any later version.
  9  *
 10  * This library is distributed in the hope that it will be useful,
 11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13  * Lesser General Public License for more details.
 14  *
 15  * You should have received a copy of the GNU Lesser General Public
 16  * License along with this library; if not, write to the
 17  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 18  * Boston, MA  02110-1301, USA.
 19  */
 20 
 21 #include "config.h"
 22 
 23 #ifndef _GNU_SOURCE
 24 #define _GNU_SOURCE
 25 #endif
 26 
 27 #include <errno.h>
 28 #include <fcntl.h>
 29 #include <string.h>
 30 #include <sys/types.h>
 31 #include <sys/stat.h>
 32 #include <unistd.h>
 33 #include <sys/mman.h>
 34 
 35 #include <glib.h>
 36 #include <glib/gstdio.h>
 37 
 38 #include <libtracker-common/tracker-file-utils.h>
 39 
 40 #include <libtracker-extract/tracker-extract.h>
 41 
 42 typedef struct AbwParserData AbwParserData;
 43 typedef enum {
 44 	ABW_PARSER_TAG_UNHANDLED,
 45 	ABW_PARSER_TAG_TITLE,
 46 	ABW_PARSER_TAG_SUBJECT,
 47 	ABW_PARSER_TAG_CREATOR,
 48 	ABW_PARSER_TAG_KEYWORDS,
 49 	ABW_PARSER_TAG_DESCRIPTION,
 50 	ABW_PARSER_TAG_GENERATOR
 51 } AbwParserTag;
 52 
 53 struct AbwParserData {
 54 	TrackerSparqlBuilder *metadata;
 55 	TrackerSparqlBuilder *preupdate;
 56 	GString *content;
 57 
 58 	guint cur_tag;
 59 	guint in_text : 1;
 60 };
 61 
 62 static void
 63 abw_parser_start_elem (GMarkupParseContext *context,
 64                        const gchar         *element_name,
 65                        const gchar        **attribute_names,
 66                        const gchar        **attribute_values,
 67                        gpointer             user_data,
 68                        GError             **error)
 69 {
 70 	AbwParserData *data = user_data;
 71 
 72 	if (g_strcmp0 (element_name, "m") == 0 &&
 73 	    g_strcmp0 (attribute_names[0], "key") == 0) {
 74 		if (g_strcmp0 (attribute_values[0], "dc.title") == 0) {
 75 			data->cur_tag = ABW_PARSER_TAG_TITLE;
 76 		} else if (g_strcmp0 (attribute_values[0], "dc.subject") == 0) {
 77 			data->cur_tag = ABW_PARSER_TAG_SUBJECT;
 78 		} else if (g_strcmp0 (attribute_values[0], "dc.creator") == 0) {
 79 			data->cur_tag = ABW_PARSER_TAG_CREATOR;
 80 		} else if (g_strcmp0 (attribute_values[0], "abiword.keywords") == 0) {
 81 			data->cur_tag = ABW_PARSER_TAG_KEYWORDS;
 82 		} else if (g_strcmp0 (attribute_values[0], "dc.description") == 0) {
 83 			data->cur_tag = ABW_PARSER_TAG_DESCRIPTION;
 84 		} else if (g_strcmp0 (attribute_values[0], "abiword.generator") == 0) {
 85 			data->cur_tag = ABW_PARSER_TAG_GENERATOR;
 86 		}
 87 	} else if (g_strcmp0 (element_name, "section") == 0) {
 88 		data->in_text = TRUE;
 89 	}
 90 }
 91 
 92 static void
 93 abw_parser_text (GMarkupParseContext *context,
 94                  const gchar         *text,
 95                  gsize                text_len,
 96                  gpointer             user_data,
 97                  GError             **error)
 98 {
 99 	AbwParserData *data = user_data;
100 	gchar *str;
101 
102 	str = g_strndup (text, text_len);
103 
104 	switch (data->cur_tag) {
105 	case ABW_PARSER_TAG_TITLE:
106 		tracker_sparql_builder_predicate (data->metadata, "nie:title");
107 		tracker_sparql_builder_object_unvalidated (data->metadata, str);
108 		break;
109 	case ABW_PARSER_TAG_SUBJECT:
110 		tracker_sparql_builder_predicate (data->metadata, "nie:subject");
111 		tracker_sparql_builder_object_unvalidated (data->metadata, str);
112 		break;
113 	case ABW_PARSER_TAG_CREATOR:
114 		tracker_sparql_builder_predicate (data->metadata, "nco:creator");
115 
116 		tracker_sparql_builder_object_blank_open (data->metadata);
117 		tracker_sparql_builder_predicate (data->metadata, "a");
118 		tracker_sparql_builder_object (data->metadata, "nco:Contact");
119 
120 		tracker_sparql_builder_predicate (data->metadata, "nco:fullname");
121 		tracker_sparql_builder_object_unvalidated (data->metadata, str);
122 		tracker_sparql_builder_object_blank_close (data->metadata);
123 		break;
124 	case ABW_PARSER_TAG_DESCRIPTION:
125 		tracker_sparql_builder_predicate (data->metadata, "nie:comment");
126 		tracker_sparql_builder_object_unvalidated (data->metadata, str);
127 		break;
128 	case ABW_PARSER_TAG_GENERATOR:
129 		tracker_sparql_builder_predicate (data->metadata, "nie:generator");
130 		tracker_sparql_builder_object_unvalidated (data->metadata, str);
131 		break;
132 	case ABW_PARSER_TAG_KEYWORDS:
133 	{
134 		char *lasts, *keyword;
135 
136 		for (keyword = strtok_r (str, ",; ", &lasts); keyword;
137 		     keyword = strtok_r (NULL, ",; ", &lasts)) {
138 			tracker_sparql_builder_predicate (data->metadata, "nie:keyword");
139 			tracker_sparql_builder_object_unvalidated (data->metadata, keyword);
140 		}
141 	}
142 		break;
143 	default:
144 		break;
145 	}
146 
147 	if (data->in_text) {
148 		if (G_UNLIKELY (!data->content)) {
149 			data->content = g_string_new ("");
150 		}
151 
152 		g_string_append_len (data->content, text, text_len);
153 	}
154 
155 	data->cur_tag = ABW_PARSER_TAG_UNHANDLED;
156 	g_free (str);
157 }
158 
159 static GMarkupParser parser = {
160 	abw_parser_start_elem,
161 	NULL,
162 	abw_parser_text,
163 	NULL, NULL
164 };
165 
166 G_MODULE_EXPORT gboolean
167 tracker_extract_get_metadata (TrackerExtractInfo *info)
168 {
169 	TrackerSparqlBuilder *preupdate, *metadata;
170 	int fd;
171 	gchar *filename, *contents;
172 	gboolean retval = FALSE;
173 	GFile *f;
174 	gsize len;
175 	struct stat st;
176 
177 	preupdate = tracker_extract_info_get_preupdate_builder (info);
178 	metadata = tracker_extract_info_get_metadata_builder (info);
179 
180 	f = tracker_extract_info_get_file (info);
181 	filename = g_file_get_path (f);
182 
183 	fd = tracker_file_open_fd (filename);
184 
185 	if (fd == -1) {
186 		g_warning ("Could not open abw file '%s': %s\n",
187 		           filename,
188 		           g_strerror (errno));
189 		g_free (filename);
190 		return retval;
191 	}
192 
193 	if (fstat (fd, &st) == -1) {
194 		g_warning ("Could not fstat abw file '%s': %s\n",
195 		           filename,
196 		           g_strerror (errno));
197 		close (fd);
198 		g_free (filename);
199 		return retval;
200 	}
201 
202 	if (st.st_size == 0) {
203 		contents = NULL;
204 		len = 0;
205 	} else {
206 		contents = (gchar *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
207 		if (contents == NULL) {
208 			g_warning ("Could not mmap abw file '%s': %s\n",
209 			           filename,
210 			           g_strerror (errno));
211 			close (fd);
212 			g_free (filename);
213 			return retval;
214 		}
215 		len = st.st_size;
216 	}
217 
218 	g_free (filename);
219 
220 	if (contents) {
221 		GError *error = NULL;
222 		GMarkupParseContext *context;
223 		AbwParserData data = { 0 };
224 
225 		data.metadata = metadata;
226 		data.preupdate = preupdate;
227 
228 		tracker_sparql_builder_predicate (metadata, "a");
229 		tracker_sparql_builder_object (metadata, "nfo:Document");
230 
231 		context = g_markup_parse_context_new (&parser, 0, &data, NULL);
232 		g_markup_parse_context_parse (context, contents, len, &error);
233 
234 		if (error) {
235 			g_warning ("Could not parse abw file: %s\n", error->message);
236 			g_error_free (error);
237 		} else {
238 			if (data.content) {
239 				tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
240 				tracker_sparql_builder_object_unvalidated (metadata, data.content->str);
241 				g_string_free (data.content, TRUE);
242 			}
243 
244 			retval = TRUE;
245 		}
246 
247 		g_markup_parse_context_free (context);
248 	}
249 
250 
251 	if (contents) {
252 		munmap (contents, len);
253 	}
254 
255 	close (fd);
256 
257 	return retval;
258 }