tracker-0.16.2/src/tracker-extract/tracker-extract.c

No issues found

  1 /*
  2  * Copyright (C) 2008, Nokia <ivan.frade@nokia.com>
  3  *
  4  * This library is free software; you can redistribute it and/or
  5  * modify it under the terms of the GNU Lesser General Public
  6  * License as published by the Free Software Foundation; either
  7  * version 2.1 of the License, or (at your option) any later version.
  8  *
  9  * This library is distributed in the hope that it will be useful,
 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 12  * Lesser General Public License for more details.
 13  *
 14  * You should have received a copy of the GNU Lesser General Public
 15  * License along with this library; if not, write to the
 16  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 17  * Boston, MA  02110-1301, USA.
 18  */
 19 
 20 #include "config.h"
 21 
 22 #include <string.h>
 23 #include <unistd.h>
 24 
 25 #include <gmodule.h>
 26 #include <gio/gio.h>
 27 
 28 #include <gio/gunixoutputstream.h>
 29 #include <gio/gunixinputstream.h>
 30 #include <gio/gunixfdlist.h>
 31 
 32 #include <libtracker-common/tracker-common.h>
 33 
 34 #include <libtracker-extract/tracker-extract.h>
 35 
 36 #include "tracker-extract.h"
 37 #include "tracker-main.h"
 38 #include "tracker-marshal.h"
 39 
 40 #ifdef HAVE_LIBSTREAMANALYZER
 41 #include "tracker-topanalyzer.h"
 42 #endif /* HAVE_STREAMANALYZER */
 43 
 44 #ifdef THREAD_ENABLE_TRACE
 45 #warning Main thread traces enabled
 46 #endif /* THREAD_ENABLE_TRACE */
 47 
 48 #define TRACKER_EXTRACT_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), TRACKER_TYPE_EXTRACT, TrackerExtractPrivate))
 49 
 50 extern gboolean debug;
 51 
 52 typedef struct {
 53 	gint extracted_count;
 54 	gint failed_count;
 55 } StatisticsData;
 56 
 57 typedef struct {
 58 	GHashTable *statistics_data;
 59 	GList *running_tasks;
 60 
 61 	/* used to maintain the running tasks
 62 	 * and stats from different threads
 63 	 */
 64 #if GLIB_CHECK_VERSION (2,31,0)
 65 	GMutex task_mutex;
 66 #else
 67 	GMutex *task_mutex;
 68 #endif
 69 
 70 	/* Thread pool for multi-threaded extractors */
 71 	GThreadPool *thread_pool;
 72 
 73 	/* module -> async queue hashtable
 74 	 * for single-threaded extractors
 75 	 */
 76 	GHashTable *single_thread_extractors;
 77 
 78 	gboolean disable_shutdown;
 79 	gboolean force_internal_extractors;
 80 	gboolean disable_summary_on_finalize;
 81 
 82 	gchar *force_module;
 83 
 84 	gint unhandled_count;
 85 } TrackerExtractPrivate;
 86 
 87 typedef struct {
 88 	TrackerExtract *extract;
 89 	GCancellable *cancellable;
 90 	GAsyncResult *res;
 91 	gchar *file;
 92 	gchar *mimetype;
 93 	gchar *graph;
 94 
 95 	TrackerMimetypeInfo *mimetype_handlers;
 96 
 97 	/* to be fed from mimetype_handlers */
 98 	TrackerExtractMetadataFunc cur_func;
 99 	GModule *cur_module;
100 
101 	guint signal_id;
102 	guint success : 1;
103 } TrackerExtractTask;
104 
105 static void tracker_extract_finalize (GObject *object);
106 static void report_statistics        (GObject *object);
107 static gboolean get_metadata         (TrackerExtractTask *task);
108 static gboolean dispatch_task_cb     (TrackerExtractTask *task);
109 
110 
111 G_DEFINE_TYPE(TrackerExtract, tracker_extract, G_TYPE_OBJECT)
112 
113 static void
114 tracker_extract_class_init (TrackerExtractClass *klass)
115 {
116 	GObjectClass *object_class;
117 
118 	object_class = G_OBJECT_CLASS (klass);
119 
120 	object_class->finalize = tracker_extract_finalize;
121 
122 	g_type_class_add_private (object_class, sizeof (TrackerExtractPrivate));
123 }
124 
125 static void
126 statistics_data_free (StatisticsData *data)
127 {
128 	g_slice_free (StatisticsData, data);
129 }
130 
131 static void
132 tracker_extract_init (TrackerExtract *object)
133 {
134 	TrackerExtractPrivate *priv;
135 
136 #ifdef HAVE_LIBSTREAMANALYZER
137 	tracker_topanalyzer_init ();
138 #endif /* HAVE_STREAMANALYZER */
139 
140 	priv = TRACKER_EXTRACT_GET_PRIVATE (object);
141 	priv->statistics_data = g_hash_table_new_full (NULL, NULL, NULL,
142 	                                               (GDestroyNotify) statistics_data_free);
143 	priv->single_thread_extractors = g_hash_table_new (NULL, NULL);
144 	priv->thread_pool = g_thread_pool_new ((GFunc) get_metadata,
145 	                                       NULL, 10, TRUE, NULL);
146 
147 #if GLIB_CHECK_VERSION (2,31,0)
148 	g_mutex_init (&priv->task_mutex);
149 #else
150 	priv->task_mutex = g_mutex_new ();
151 #endif
152 }
153 
154 static void
155 tracker_extract_finalize (GObject *object)
156 {
157 	TrackerExtractPrivate *priv;
158 
159 	priv = TRACKER_EXTRACT_GET_PRIVATE (object);
160 
161 	/* FIXME: Shutdown modules? */
162 
163 	g_hash_table_destroy (priv->single_thread_extractors);
164 	g_thread_pool_free (priv->thread_pool, TRUE, FALSE);
165 
166 	if (!priv->disable_summary_on_finalize) {
167 		report_statistics (object);
168 	}
169 
170 #ifdef HAVE_LIBSTREAMANALYZER
171 	tracker_topanalyzer_shutdown ();
172 #endif /* HAVE_STREAMANALYZER */
173 
174 	g_hash_table_destroy (priv->statistics_data);
175 
176 #if GLIB_CHECK_VERSION (2,31,0)
177 	g_mutex_clear (&priv->task_mutex);
178 #else
179 	g_mutex_free (priv->task_mutex);
180 #endif
181 
182 	G_OBJECT_CLASS (tracker_extract_parent_class)->finalize (object);
183 }
184 
185 static void
186 report_statistics (GObject *object)
187 {
188 	TrackerExtractPrivate *priv;
189 	GHashTableIter iter;
190 	gpointer key, value;
191 
192 	priv = TRACKER_EXTRACT_GET_PRIVATE (object);
193 
194 #if GLIB_CHECK_VERSION (2,31,0)
195 	g_mutex_lock (&priv->task_mutex);
196 #else
197 	g_mutex_lock (priv->task_mutex);
198 #endif
199 
200 	g_message ("--------------------------------------------------");
201 	g_message ("Statistics:");
202 
203 	g_hash_table_iter_init (&iter, priv->statistics_data);
204 
205 	while (g_hash_table_iter_next (&iter, &key, &value)) {
206 		GModule *module = key;
207 		StatisticsData *data = value;
208 
209 		if (data->extracted_count > 0 || data->failed_count > 0) {
210 			const gchar *name, *name_without_path;
211 
212 			name = g_module_name (module);
213 			name_without_path = strrchr (name, G_DIR_SEPARATOR) + 1;
214 
215 			g_message ("    Module:'%s', extracted:%d, failures:%d",
216 			           name_without_path,
217 			           data->extracted_count,
218 			           data->failed_count);
219 		}
220 	}
221 
222 	g_message ("Unhandled files: %d", priv->unhandled_count);
223 
224 	if (priv->unhandled_count == 0 &&
225 	    g_hash_table_size (priv->statistics_data) < 1) {
226 		g_message ("    No files handled");
227 	}
228 
229 	g_message ("--------------------------------------------------");
230 
231 #if GLIB_CHECK_VERSION (2,31,0)
232 	g_mutex_unlock (&priv->task_mutex);
233 #else
234 	g_mutex_unlock (priv->task_mutex);
235 #endif
236 }
237 
238 TrackerExtract *
239 tracker_extract_new (gboolean     disable_shutdown,
240                      gboolean     force_internal_extractors,
241                      const gchar *force_module)
242 {
243 	TrackerExtract *object;
244 	TrackerExtractPrivate *priv;
245 
246 	if (!tracker_extract_module_manager_init ()) {
247 		return NULL;
248 	}
249 
250 	/* Set extractors */
251 	object = g_object_new (TRACKER_TYPE_EXTRACT, NULL);
252 
253 	priv = TRACKER_EXTRACT_GET_PRIVATE (object);
254 
255 	priv->disable_shutdown = disable_shutdown;
256 	priv->force_internal_extractors = force_internal_extractors;
257 	priv->force_module = g_strdup (force_module);
258 
259 	return object;
260 }
261 
262 static void
263 notify_task_finish (TrackerExtractTask *task,
264                     gboolean            success)
265 {
266 	TrackerExtract *extract;
267 	TrackerExtractPrivate *priv;
268 	StatisticsData *stats_data;
269 
270 	extract = task->extract;
271 	priv = TRACKER_EXTRACT_GET_PRIVATE (extract);
272 
273 	/* Reports and ongoing tasks may be
274 	 * accessed from other threads.
275 	 */
276 #if GLIB_CHECK_VERSION (2,31,0)
277 	g_mutex_lock (&priv->task_mutex);
278 #else
279 	g_mutex_lock (priv->task_mutex);
280 #endif
281 
282 	stats_data = g_hash_table_lookup (priv->statistics_data,
283 	                                  task->cur_module);
284 
285 	if (!stats_data) {
286 		stats_data = g_slice_new0 (StatisticsData);
287 		g_hash_table_insert (priv->statistics_data,
288 		                     task->cur_module,
289 		                     stats_data);
290 	}
291 
292 	stats_data->extracted_count++;
293 
294 	if (!success) {
295 		stats_data->failed_count++;
296 	}
297 
298 	priv->running_tasks = g_list_remove (priv->running_tasks, task);
299 
300 #if GLIB_CHECK_VERSION (2,31,0)
301 	g_mutex_unlock (&priv->task_mutex);
302 #else
303 	g_mutex_unlock (priv->task_mutex);
304 #endif
305 }
306 
307 static gboolean
308 get_file_metadata (TrackerExtractTask  *task,
309                    TrackerExtractInfo **info_out)
310 {
311 	TrackerExtractInfo *info;
312 	GFile *file;
313 	gchar *mime_used = NULL;
314 #ifdef HAVE_LIBSTREAMANALYZER
315 	gchar *content_type = NULL;
316 #endif
317 	gint items = 0;
318 
319 	g_debug ("Extracting...");
320 
321 	*info_out = NULL;
322 
323 	file = g_file_new_for_uri (task->file);
324 	info = tracker_extract_info_new (file, task->mimetype, task->graph);
325 	g_object_unref (file);
326 
327 #ifdef HAVE_LIBSTREAMANALYZER
328 	/* FIXME: This entire section is completely broken,
329 	 * it doesn't even build these days. It should be removed or fixed.
330 	 * -mr (05/09/11)
331 	 */
332 	if (!priv->force_internal_extractors) {
333 		g_debug ("  Using libstreamanalyzer...");
334 
335 		tracker_topanalyzer_extract (task->file, statements, &content_type);
336 
337 		if (tracker_sparql_builder_get_length (statements) > 0) {
338 			g_free (content_type);
339 			tracker_sparql_builder_insert_close (statements);
340 
341 			*info_out = info;
342 
343 			return TRUE;
344 		}
345 	} else {
346 		g_debug ("  Using internal extractors ONLY...");
347 	}
348 #endif /* HAVE_LIBSTREAMANALYZER */
349 
350 	if (task->mimetype && *task->mimetype) {
351 		/* We know the mime */
352 		mime_used = g_strdup (task->mimetype);
353 	}
354 #ifdef HAVE_LIBSTREAMANALYZER
355 	else if (content_type && *content_type) {
356 		/* We know the mime from LSA */
357 		mime_used = content_type;
358 		g_strstrip (mime_used);
359 	}
360 #endif /* HAVE_LIBSTREAMANALYZER */
361 	else {
362 		tracker_extract_info_unref (info);
363 		return FALSE;
364 	}
365 
366 	/* Now we have sanity checked everything, actually get the
367 	 * data we need from the extractors.
368 	 */
369 	if (mime_used) {
370 		if (task->cur_func) {
371 			TrackerSparqlBuilder *statements;
372 
373 			g_debug ("  Using %s...", g_module_name (task->cur_module));
374 
375 			(task->cur_func) (info);
376 
377 			statements = tracker_extract_info_get_metadata_builder (info);
378 			items = tracker_sparql_builder_get_length (statements);
379 
380 			if (items > 0) {
381 				tracker_sparql_builder_insert_close (statements);
382 
383 				g_debug ("Done (%d items)", items);
384 
385 				task->success = TRUE;
386 			}
387 		}
388 
389 		g_free (mime_used);
390 	}
391 
392 	if (items == 0) {
393 		g_debug ("No extractor or failed");
394 		tracker_extract_info_unref (info);
395 		info = NULL;
396 	}
397 
398 	*info_out = info;
399 
400 	return (items > 0);
401 }
402 
403 /* This function is called on the thread calling g_cancellable_cancel() */
404 static void
405 task_cancellable_cancelled_cb (GCancellable       *cancellable,
406                                TrackerExtractTask *task)
407 {
408 	TrackerExtractPrivate *priv;
409 	TrackerExtract *extract;
410 
411 	extract = task->extract;
412 	priv = TRACKER_EXTRACT_GET_PRIVATE (extract);
413 
414 #if GLIB_CHECK_VERSION (2,31,0)
415 	g_mutex_lock (&priv->task_mutex);
416 #else
417 	g_mutex_lock (priv->task_mutex);
418 #endif
419 
420 	if (g_list_find (priv->running_tasks, task)) {
421 		g_message ("Cancelled task for '%s' was currently being "
422 		           "processed, _exit()ing immediately",
423 		           task->file);
424 		_exit (0);
425 	}
426 
427 #if GLIB_CHECK_VERSION (2,31,0)
428 	g_mutex_unlock (&priv->task_mutex);
429 #else
430 	g_mutex_unlock (priv->task_mutex);
431 #endif
432 }
433 
434 static TrackerExtractTask *
435 extract_task_new (TrackerExtract *extract,
436                   const gchar    *uri,
437                   const gchar    *mimetype,
438                   const gchar    *graph,
439                   GCancellable   *cancellable,
440                   GAsyncResult   *res,
441                   GError        **error)
442 {
443 	TrackerExtractTask *task;
444 	gchar *mimetype_used;
445 
446 	if (!mimetype || !*mimetype) {
447 		GFile *file;
448 		GFileInfo *info;
449 		GError *internal_error = NULL;
450 
451 		file = g_file_new_for_uri (uri);
452 		info = g_file_query_info (file,
453 		                          G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE,
454 		                          G_FILE_QUERY_INFO_NONE,
455 		                          NULL,
456 		                          &internal_error);
457 
458 		g_object_unref (file);
459 
460 		if (internal_error) {
461 			g_propagate_error (error, internal_error);
462 			return NULL;
463 		}
464 
465 		mimetype_used = g_strdup (g_file_info_get_content_type (info));
466 		g_debug ("Guessing mime type as '%s'", mimetype);
467 		g_object_unref (info);
468 	} else {
469 		mimetype_used = g_strdup (mimetype);
470 	}
471 
472 	task = g_slice_new0 (TrackerExtractTask);
473 	task->cancellable = (cancellable) ? g_object_ref (cancellable) : NULL;
474 	task->res = (res) ? g_object_ref (res) : NULL;
475 	task->file = g_strdup (uri);
476 	task->mimetype = mimetype_used;
477 	task->graph = g_strdup (graph);
478 	task->extract = extract;
479 
480 	if (task->cancellable) {
481 		task->signal_id = g_cancellable_connect (cancellable,
482 		                                         G_CALLBACK (task_cancellable_cancelled_cb),
483 		                                         task, NULL);
484 	}
485 
486 	return task;
487 }
488 
489 static void
490 extract_task_free (TrackerExtractTask *task)
491 {
492 	if (task->cancellable && task->signal_id != 0) {
493 		g_cancellable_disconnect (task->cancellable, task->signal_id);
494 	}
495 
496 	notify_task_finish (task, task->success);
497 
498 	if (task->res) {
499 		g_object_unref (task->res);
500 	}
501 
502 	if (task->cancellable) {
503 		g_object_unref (task->cancellable);
504 	}
505 
506 	if (task->mimetype_handlers) {
507 		tracker_mimetype_info_free (task->mimetype_handlers);
508 	}
509 
510 	g_free (task->graph);
511 	g_free (task->mimetype);
512 	g_free (task->file);
513 
514 	g_slice_free (TrackerExtractTask, task);
515 }
516 
517 static gboolean
518 filter_module (TrackerExtract *extract,
519                GModule        *module)
520 {
521 	TrackerExtractPrivate *priv;
522 	gchar *module_basename, *filter_name;
523 	gboolean filter;
524 
525 	priv = TRACKER_EXTRACT_GET_PRIVATE (extract);
526 
527 	if (!priv->force_module) {
528 		return FALSE;
529 	}
530 
531 	/* Module name is the full path to it */
532 	module_basename = g_path_get_basename (g_module_name (module));
533 
534 	if (g_str_has_prefix (priv->force_module, "lib") &&
535 	    g_str_has_suffix (priv->force_module, "." G_MODULE_SUFFIX)) {
536 		filter_name = g_strdup (priv->force_module);
537 	} else {
538 		filter_name = g_strdup_printf ("libextract-%s.so",
539 		                               priv->force_module);
540 	}
541 
542 	filter = strcmp (module_basename, filter_name) != 0;
543 
544 	if (filter) {
545 		g_debug ("Module filtered out '%s' (due to --force-module='%s')",
546 		         module_basename,
547 		         filter_name);
548 	} else {
549 		g_debug ("Module used '%s' (due to --force-module='%s')",
550 		         module_basename,
551 		         filter_name);
552 	}
553 
554 	g_free (module_basename);
555 	g_free (filter_name);
556 
557 	return filter;
558 }
559 
560 static gboolean
561 get_metadata (TrackerExtractTask *task)
562 {
563 	TrackerExtractInfo *info;
564 	TrackerSparqlBuilder *preupdate, *postupdate, *statements;
565 	gchar *where = NULL;
566 
567 	preupdate = postupdate = statements = NULL;
568 
569 #ifdef THREAD_ENABLE_TRACE
570 	g_debug ("Thread:%p --> File:'%s' - Extracted",
571 	         g_thread_self (),
572 	         task->file);
573 #endif /* THREAD_ENABLE_TRACE */
574 
575 	if (task->cancellable &&
576 	    g_cancellable_is_cancelled (task->cancellable)) {
577 		g_simple_async_result_set_error ((GSimpleAsyncResult *) task->res,
578 		                                 TRACKER_DBUS_ERROR, 0,
579 		                                 "Extraction of '%s' was cancelled",
580 		                                 task->file);
581 
582 		g_simple_async_result_complete_in_idle ((GSimpleAsyncResult *) task->res);
583 		extract_task_free (task);
584 		return FALSE;
585 	}
586 
587 	if (!filter_module (task->extract, task->cur_module) &&
588 	    get_file_metadata (task, &info)) {
589 		g_simple_async_result_set_op_res_gpointer ((GSimpleAsyncResult *) task->res,
590 		                                           info,
591 		                                           (GDestroyNotify) tracker_extract_info_unref);
592 
593 		g_simple_async_result_complete_in_idle ((GSimpleAsyncResult *) task->res);
594 		extract_task_free (task);
595 	} else {
596 		if (preupdate) {
597 			g_object_unref (preupdate);
598 		}
599 
600 		if (postupdate) {
601 			g_object_unref (postupdate);
602 		}
603 
604 		if (statements) {
605 			g_object_unref (statements);
606 		}
607 
608 		g_free (where);
609 
610 		/* Reinject the task into the main thread
611 		 * queue, so the next module kicks in.
612 		 */
613 		g_idle_add ((GSourceFunc) dispatch_task_cb, task);
614 	}
615 
616 	return FALSE;
617 }
618 
619 static void
620 single_thread_get_metadata (GAsyncQueue *queue)
621 {
622 	while (TRUE) {
623 		TrackerExtractTask *task;
624 
625 		task = g_async_queue_pop (queue);
626 		g_message ("Dispatching '%s' in dedicated thread", task->file);
627 		get_metadata (task);
628 	}
629 }
630 
631 /* This function is executed in the main thread, decides the
632  * module that's going to be run for a given task, and dispatches
633  * the task according to the threading strategy of that module.
634  */
635 static gboolean
636 dispatch_task_cb (TrackerExtractTask *task)
637 {
638 	TrackerModuleThreadAwareness thread_awareness;
639 	TrackerExtractPrivate *priv;
640 	GError *error = NULL;
641 	GModule *module;
642 
643 #ifdef THREAD_ENABLE_TRACE
644 	g_debug ("Thread:%p (Main) <-- File:'%s' - Dispatching\n",
645 	         g_thread_self (),
646 	         task->file);
647 #endif /* THREAD_ENABLE_TRACE */
648 
649 	priv = TRACKER_EXTRACT_GET_PRIVATE (task->extract);
650 
651 	if (!task->mimetype) {
652 		error = g_error_new (TRACKER_DBUS_ERROR, 0,
653 		                     "No mimetype for '%s'",
654 		                     task->file);
655 	} else {
656 		if (!task->mimetype_handlers) {
657 			/* First iteration for task, get the mimetype handlers */
658 			task->mimetype_handlers = tracker_extract_module_manager_get_mimetype_handlers (task->mimetype);
659 
660 			if (!task->mimetype_handlers) {
661 				error = g_error_new (TRACKER_DBUS_ERROR, 0,
662 				                     "No mimetype extractor handlers for uri:'%s' and mime:'%s'",
663 				                     task->file, task->mimetype);
664 			}
665 		} else {
666 			/* Any further iteration, should happen rarely if
667 			 * most specific handlers know nothing about the file
668 			 */
669 			g_message ("Trying next extractor for '%s'", task->file);
670 
671 			if (!tracker_mimetype_info_iter_next (task->mimetype_handlers)) {
672 				g_message ("  There's no next extractor");
673 
674 				error = g_error_new (TRACKER_DBUS_ERROR, 0,
675 				                     "Could not get any metadata for uri:'%s' and mime:'%s'",
676 				                     task->file, task->mimetype);
677 			}
678 		}
679 	}
680 
681 	if (error) {
682 		g_simple_async_result_set_from_error ((GSimpleAsyncResult *) task->res, error);
683 		g_simple_async_result_complete_in_idle ((GSimpleAsyncResult *) task->res);
684 		extract_task_free (task);
685 		g_error_free (error);
686 
687 		return FALSE;
688 	}
689 
690 	task->cur_module = module = tracker_mimetype_info_get_module (task->mimetype_handlers, &task->cur_func, &thread_awareness);
691 
692 	if (!module || !task->cur_func) {
693 		g_warning ("Discarding task with no module '%s'", task->file);
694 		priv->unhandled_count++;
695 		return FALSE;
696 	}
697 
698 #if GLIB_CHECK_VERSION (2,31,0)
699 	g_mutex_lock (&priv->task_mutex);
700 	priv->running_tasks = g_list_prepend (priv->running_tasks, task);
701 	g_mutex_unlock (&priv->task_mutex);
702 #else
703 	g_mutex_lock (priv->task_mutex);
704 	priv->running_tasks = g_list_prepend (priv->running_tasks, task);
705 	g_mutex_unlock (priv->task_mutex);
706 #endif
707 
708 	switch (thread_awareness) {
709 	case TRACKER_MODULE_NONE:
710 		/* Error out */
711 		g_simple_async_result_set_error ((GSimpleAsyncResult *) task->res,
712 		                                 TRACKER_DBUS_ERROR, 0,
713 		                                 "Module '%s' initialization failed",
714 		                                 g_module_name (module));
715 		g_simple_async_result_complete_in_idle ((GSimpleAsyncResult *) task->res);
716 		extract_task_free (task);
717 		break;
718 	case TRACKER_MODULE_MAIN_THREAD:
719 		/* Dispatch the task right away in this thread */
720 		g_message ("Dispatching '%s' in main thread", task->file);
721 		get_metadata (task);
722 		break;
723 	case TRACKER_MODULE_SINGLE_THREAD:
724 	{
725 		GAsyncQueue *async_queue;
726 
727 		async_queue = g_hash_table_lookup (priv->single_thread_extractors, module);
728 
729 		if (!async_queue) {
730 			/* No thread created yet for this module, create it
731 			 * together with the async queue used to pass data to it
732 			 */
733 			async_queue = g_async_queue_new ();
734 
735 #if GLIB_CHECK_VERSION (2,31,0)
736 			{
737 				GThread *thread;
738 
739 				thread = g_thread_try_new ("single",
740 				                           (GThreadFunc) single_thread_get_metadata,
741 				                           g_async_queue_ref (async_queue),
742 				                           &error);
743 				if (!thread) {
744 					g_simple_async_result_take_error ((GSimpleAsyncResult *) task->res, error);
745 					g_simple_async_result_complete_in_idle ((GSimpleAsyncResult *) task->res);
746 					extract_task_free (task);
747 					return FALSE;
748 				}
749 				/* We won't join the thread, so just unref it here */
750 				g_object_unref (thread);
751 			}
752 #else
753 			g_thread_create ((GThreadFunc) single_thread_get_metadata,
754 			                 g_async_queue_ref (async_queue),
755 			                 FALSE, &error);
756 
757 			if (error) {
758 				g_simple_async_result_set_from_error ((GSimpleAsyncResult *) task->res, error);
759 				g_simple_async_result_complete_in_idle ((GSimpleAsyncResult *) task->res);
760 				extract_task_free (task);
761 				g_error_free (error);
762 
763 				return FALSE;
764 			}
765 #endif
766 
767 			g_hash_table_insert (priv->single_thread_extractors, module, async_queue);
768 		}
769 
770 		g_async_queue_push (async_queue, task);
771 	}
772 		break;
773 	case TRACKER_MODULE_MULTI_THREAD:
774 		/* Put task in thread pool */
775 		g_message ("Dispatching '%s' in thread pool", task->file);
776 		g_thread_pool_push (priv->thread_pool, task, &error);
777 
778 		if (error) {
779 			g_simple_async_result_set_from_error ((GSimpleAsyncResult *) task->res, error);
780 			g_simple_async_result_complete_in_idle ((GSimpleAsyncResult *) task->res);
781 			extract_task_free (task);
782 			g_error_free (error);
783 
784 			return FALSE;
785 		}
786 
787 		break;
788 	}
789 
790 	return FALSE;
791 }
792 
793 /* This function can be called in any thread */
794 void
795 tracker_extract_file (TrackerExtract      *extract,
796                       const gchar         *file,
797                       const gchar         *mimetype,
798                       const gchar         *graph,
799                       GCancellable        *cancellable,
800                       GAsyncReadyCallback  cb,
801                       gpointer             user_data)
802 {
803 	GSimpleAsyncResult *res;
804 	GError *error = NULL;
805 	TrackerExtractTask *task;
806 
807 	g_return_if_fail (TRACKER_IS_EXTRACT (extract));
808 	g_return_if_fail (file != NULL);
809 	g_return_if_fail (cb != NULL);
810 
811 #ifdef THREAD_ENABLE_TRACE
812 	g_debug ("Thread:%p <-- File:'%s' - Extracting\n",
813 	         g_thread_self (),
814 	         file);
815 #endif /* THREAD_ENABLE_TRACE */
816 
817 	res = g_simple_async_result_new (G_OBJECT (extract), cb, user_data, NULL);
818 
819 	task = extract_task_new (extract, file, mimetype, graph,
820 	                         cancellable, G_ASYNC_RESULT (res), &error);
821 
822 	if (error) {
823 		g_warning ("Could not get mimetype, %s", error->message);
824 		g_simple_async_result_set_from_error (res, error);
825 		g_simple_async_result_complete_in_idle (res);
826 		g_error_free (error);
827 	} else {
828 		g_idle_add ((GSourceFunc) dispatch_task_cb, task);
829 	}
830 
831 	/* Task takes a ref and if this fails, we want to unref anyway */
832 	g_object_unref (res);
833 }
834 
835 void
836 tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
837                                          const gchar    *uri,
838                                          const gchar    *mime)
839 {
840 	GError *error = NULL;
841 	TrackerExtractPrivate *priv;
842 	TrackerExtractTask *task;
843 	TrackerExtractInfo *info;
844 	gboolean no_modules = TRUE;
845 
846 	priv = TRACKER_EXTRACT_GET_PRIVATE (object);
847 	priv->disable_summary_on_finalize = TRUE;
848 
849 	g_return_if_fail (uri != NULL);
850 
851 	task = extract_task_new (object, uri, mime, NULL, NULL, NULL, &error);
852 
853 	if (error) {
854 		g_printerr ("Extraction failed, %s\n", error->message);
855 		g_error_free (error);
856 
857 		return;
858 	}
859 
860 	task->mimetype_handlers = tracker_extract_module_manager_get_mimetype_handlers (task->mimetype);
861 	task->cur_module = tracker_mimetype_info_get_module (task->mimetype_handlers, &task->cur_func, NULL);
862 
863 	while (task->cur_module && task->cur_func) {
864 		if (!filter_module (object, task->cur_module) &&
865 		    get_file_metadata (task, &info)) {
866 			const gchar *preupdate_str, *postupdate_str, *statements_str, *where;
867 			TrackerSparqlBuilder *builder;
868 
869 			no_modules = FALSE;
870 			preupdate_str = statements_str = postupdate_str = NULL;
871 
872 			builder = tracker_extract_info_get_metadata_builder (info);
873 
874 			if (tracker_sparql_builder_get_length (builder) > 0) {
875 				statements_str = tracker_sparql_builder_get_result (builder);
876 			}
877 
878 			builder = tracker_extract_info_get_preupdate_builder (info);
879 
880 			if (tracker_sparql_builder_get_length (builder) > 0) {
881 				preupdate_str = tracker_sparql_builder_get_result (builder);
882 			}
883 
884 			builder = tracker_extract_info_get_postupdate_builder (info);
885 
886 			if (tracker_sparql_builder_get_length (builder) > 0) {
887 				postupdate_str = tracker_sparql_builder_get_result (builder);
888 			}
889 
890 			where = tracker_extract_info_get_where_clause (info);
891 
892 			g_print ("\n");
893 
894 			g_print ("SPARQL pre-update:\n--\n%s--\n\n",
895 			         preupdate_str ? preupdate_str : "");
896 			g_print ("SPARQL item:\n--\n%s--\n\n",
897 			         statements_str ? statements_str : "");
898 			g_print ("SPARQL where clause:\n--\n%s--\n\n",
899 			         where ? where : "");
900 			g_print ("SPARQL post-update:\n--\n%s--\n\n",
901 			         postupdate_str ? postupdate_str : "");
902 
903 			tracker_extract_info_unref (info);
904 			break;
905 		} else {
906 			if (!tracker_mimetype_info_iter_next (task->mimetype_handlers)) {
907 				break;
908 			}
909 
910 			task->cur_module = tracker_mimetype_info_get_module (task->mimetype_handlers,
911 			                                                     &task->cur_func,
912 			                                                     NULL);
913 		}
914 	}
915 
916 	if (no_modules) {
917 		g_print ("No modules found to handle metadata extraction\n\n");
918 	}
919 
920 	extract_task_free (task);
921 }