1 /*
  2  * Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
  3  *
  4  * This library is free software; you can redistribute it and/or
  5  * modify it under the terms of the GNU General Public
  6  * License as published by the Free Software Foundation; either
  7  * version 2 of the License, or (at your option) any later version.
  8  *
  9  * This library is distributed in the hope that it will be useful,
 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 12  * General Public License for more details.
 13  *
 14  * You should have received a copy of the GNU General Public
 15  * License along with this library; if not, write to the
 16  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 17  * Boston, MA  02110-1301, USA.
 18  */
 19 
 20 #include "config.h"
 21 
 22 #include <string.h>
 23 #include <locale.h>
 24 
 25 #include <glib.h>
 26 #include <gio/gio.h>
 27 
 28 #include <libtracker-fts/tracker-parser.h>
 29 #include <libtracker-fts/tracker-fts-config.h>
 30 #include <libtracker-common/tracker-common.h>
 31 
 32 static gchar    *text;
 33 static gchar    *filename;
 34 static gboolean  verbose;
 35 
 36 /* Command Line options */
 37 static const GOptionEntry options [] = {
 38 	{
 39 		"verbose", 'v', G_OPTION_FLAG_NO_ARG,
 40 		G_OPTION_ARG_NONE, &verbose,
 41 		"Enable verbose output",
 42 		NULL
 43 	},
 44 	{
 45 		"text", 't', 0,
 46 		G_OPTION_ARG_STRING, &text,
 47 		"Specific text to parse",
 48 		NULL
 49 	},
 50 	{
 51 		"file", 'f', 0,
 52 		G_OPTION_ARG_STRING, &filename,
 53 		"Specific file to parse its contents",
 54 		NULL
 55 	},
 56 	{ NULL }
 57 };
 58 
 59 static gboolean
 60 setup_context (gint argc,
 61                gchar **argv)
 62 {
 63 	GOptionContext *context = NULL;
 64 	GError *error = NULL;
 65 
 66 	/* Setup command line options */
 67 	context = g_option_context_new ("- Test the Tracker FTS parser");
 68 	g_option_context_add_main_entries (context,
 69 	                                   options,
 70 	                                   argv[0]);
 71 
 72 	/* Parse input arguments */
 73 	if (!g_option_context_parse (context,
 74 	                             &argc,
 75 	                             &argv,
 76 	                             &error))
 77 	{
 78 		g_printerr ("%s\nRun '%s --help' to see a full list of available "
 79 		            "command line options.\n",
 80 		            error->message,
 81 		            argv[0]);
 82 		g_error_free (error);
 83 		return FALSE;
 84 	}
 85 
 86 	g_option_context_free (context);
 87 	return TRUE;
 88 }
 89 
 90 static gboolean
 91 load_file_contents (void)
 92 {
 93 	GError *error = NULL;
 94 	GFile *file;
 95 
 96 	file = g_file_new_for_commandline_arg (filename);
 97 	if (!g_file_load_contents (file, NULL, &text, NULL, NULL, &error)) {
 98 		g_printerr ("Error loading file '%s' contents: '%s'\n",
 99 		            filename,
100 		            error->message);
101 		g_error_free (error);
102 		g_object_unref (file);
103 		return FALSE;
104 	}
105 	g_object_unref (file);
106 	return TRUE;
107 }
108 
109 static gboolean
110 run_parsing (void)
111 {
112 	TrackerFTSConfig *config;
113 	TrackerLanguage *language;
114 	TrackerParser *parser;
115 	GTimer *timer;
116 
117 	/* Initialize timing */
118 	timer = g_timer_new ();
119 
120 	/* Read config file */
121 	config = tracker_fts_config_new ();
122 
123 	/* Setup language for parser */
124 	language = tracker_language_new (NULL);
125 	if (!language) {
126 		g_printerr ("Language setup failed!\n");
127 		return FALSE;
128 	}
129 
130 	/* Create the parser */
131 	parser = tracker_parser_new (language);
132 	if (!parser) {
133 		g_printerr ("Parser creation failed!\n");
134 		g_object_unref (language);
135 		return FALSE;
136 	}
137 
138 	/* Reset the parser with our string, reading the current FTS config */
139 	tracker_parser_reset (parser,
140 	                      text,
141 	                      strlen (text),
142 	                      tracker_fts_config_get_max_word_length (config),
143 	                      tracker_fts_config_get_enable_stemmer (config),
144 	                      tracker_fts_config_get_enable_unaccent (config),
145 	                      tracker_fts_config_get_ignore_stop_words (config),
146 	                      TRUE,
147 	                      tracker_fts_config_get_ignore_numbers (config));
148 
149 	/* Loop through all words! */
150 	while (1) {
151 		const gchar *word;
152 		gint position;
153 		gint byte_offset_start;
154 		gint byte_offset_end;
155 		gboolean stop_word;
156 		gint word_length;
157 
158 
159 		/* Process next word */
160 		word = tracker_parser_next (parser,
161 		                            &position,
162 		                            &byte_offset_start,
163 		                            &byte_offset_end,
164 		                            &stop_word,
165 		                            &word_length);
166 
167 		/* Stop loop if no more words */
168 		if (!word) {
169 			break;
170 		}
171 
172 		if (verbose) {
173 			gchar *word_hex;
174 			gchar *original_word;
175 			gchar *original_word_hex;
176 			gint original_word_length;
177 
178 			/* Get original word */
179 			original_word_length = byte_offset_end - byte_offset_start;
180 			original_word = g_malloc (original_word_length + 1);
181 			memcpy (original_word,
182 			        &text[byte_offset_start],
183 			        original_word_length);
184 			original_word[original_word_length] = '\0';
185 
186 			/* Get hex strings */
187 			word_hex = tracker_strhex (word, word_length, ':');
   pointer targets in passing argument 1 of 'tracker_strhex' differ in signedness
   (emitted by gcc)
 188 			original_word_hex = tracker_strhex (original_word,
189 			                                    original_word_length,
190 			                                    ':');
191 
192 			g_print ("WORD at %d [%d,%d] Original: '%s' (%s), "
193 			         "Processed: '%s' (%s) (stop? %s)\n",
194 			         position,
195 			         byte_offset_start,
196 			         byte_offset_end,
197 			         original_word,
198 			         original_word_hex,
199 			         word,
200 			         word_hex,
201 			         stop_word ? "yes" : "no");
202 
203 			g_free (word_hex);
204 			g_free (original_word_hex);
205 			g_free (original_word);
206 		}
207 	}
208 
209 	g_print ("\n----> Parsing finished after '%lf' seconds\n",
210 	         g_timer_elapsed (timer, NULL));
211 
212 	g_timer_destroy (timer);
213 
214 	tracker_parser_free (parser);
215 	g_object_unref (language);
216 	return TRUE;
217 }
218 
219 
220 int
221 main (int argc, char **argv)
222 {
223 	/* Setup locale */
224 	setlocale (LC_ALL, "");
225 
226 	/* Setup context */
227 	if (!setup_context (argc, argv)) {
228 		g_printerr ("Context setup failed... exiting\n");
229 		return -1;
230 	}
231 
232 	/* Either text or file must be given */
233 	if (filename == NULL &&
234 	    text == NULL) {
235 		g_printerr ("Either 'file' or 'text' options should be used\n"
236 		            "Run '%s --help' to see a full list of available "
237 		            "command line options.\n",
238 		            argv[0]);
239 		return -2;
240 	}
241 
242 	/* If required, load file contents */
243 	if (filename != NULL &&
244 	    !load_file_contents ()) {
245 		g_printerr ("Loading file '%s' contents failed... exiting\n",
246 		            filename);
247 		return -3;
248 	}
249 
250 	/* Run the parsing! */
251 	if (!run_parsing ()) {
252 		g_printerr ("Parsing operation failed... exiting\n");
253 		return -4;
254 	}
255 
256 	/* Clean exit */
257 	if (filename)
258 		g_free (text);
259 	return 0;
260 }