1 /*
2 * Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 #include "config.h"
21
22 #include <string.h>
23 #include <locale.h>
24
25 #include <glib.h>
26 #include <gio/gio.h>
27
28 #include <libtracker-fts/tracker-parser.h>
29 #include <libtracker-fts/tracker-fts-config.h>
30 #include <libtracker-common/tracker-common.h>
31
32 static gchar *text;
33 static gchar *filename;
34 static gboolean verbose;
35
36 /* Command Line options */
37 static const GOptionEntry options [] = {
38 {
39 "verbose", 'v', G_OPTION_FLAG_NO_ARG,
40 G_OPTION_ARG_NONE, &verbose,
41 "Enable verbose output",
42 NULL
43 },
44 {
45 "text", 't', 0,
46 G_OPTION_ARG_STRING, &text,
47 "Specific text to parse",
48 NULL
49 },
50 {
51 "file", 'f', 0,
52 G_OPTION_ARG_STRING, &filename,
53 "Specific file to parse its contents",
54 NULL
55 },
56 { NULL }
57 };
58
59 static gboolean
60 setup_context (gint argc,
61 gchar **argv)
62 {
63 GOptionContext *context = NULL;
64 GError *error = NULL;
65
66 /* Setup command line options */
67 context = g_option_context_new ("- Test the Tracker FTS parser");
68 g_option_context_add_main_entries (context,
69 options,
70 argv[0]);
71
72 /* Parse input arguments */
73 if (!g_option_context_parse (context,
74 &argc,
75 &argv,
76 &error))
77 {
78 g_printerr ("%s\nRun '%s --help' to see a full list of available "
79 "command line options.\n",
80 error->message,
81 argv[0]);
82 g_error_free (error);
83 return FALSE;
84 }
85
86 g_option_context_free (context);
87 return TRUE;
88 }
89
90 static gboolean
91 load_file_contents (void)
92 {
93 GError *error = NULL;
94 GFile *file;
95
96 file = g_file_new_for_commandline_arg (filename);
97 if (!g_file_load_contents (file, NULL, &text, NULL, NULL, &error)) {
98 g_printerr ("Error loading file '%s' contents: '%s'\n",
99 filename,
100 error->message);
101 g_error_free (error);
102 g_object_unref (file);
103 return FALSE;
104 }
105 g_object_unref (file);
106 return TRUE;
107 }
108
109 static gboolean
110 run_parsing (void)
111 {
112 TrackerFTSConfig *config;
113 TrackerLanguage *language;
114 TrackerParser *parser;
115 GTimer *timer;
116
117 /* Initialize timing */
118 timer = g_timer_new ();
119
120 /* Read config file */
121 config = tracker_fts_config_new ();
122
123 /* Setup language for parser */
124 language = tracker_language_new (NULL);
125 if (!language) {
126 g_printerr ("Language setup failed!\n");
127 return FALSE;
128 }
129
130 /* Create the parser */
131 parser = tracker_parser_new (language);
132 if (!parser) {
133 g_printerr ("Parser creation failed!\n");
134 g_object_unref (language);
135 return FALSE;
136 }
137
138 /* Reset the parser with our string, reading the current FTS config */
139 tracker_parser_reset (parser,
140 text,
141 strlen (text),
142 tracker_fts_config_get_max_word_length (config),
143 tracker_fts_config_get_enable_stemmer (config),
144 tracker_fts_config_get_enable_unaccent (config),
145 tracker_fts_config_get_ignore_stop_words (config),
146 TRUE,
147 tracker_fts_config_get_ignore_numbers (config));
148
149 /* Loop through all words! */
150 while (1) {
151 const gchar *word;
152 gint position;
153 gint byte_offset_start;
154 gint byte_offset_end;
155 gboolean stop_word;
156 gint word_length;
157
158
159 /* Process next word */
160 word = tracker_parser_next (parser,
161 &position,
162 &byte_offset_start,
163 &byte_offset_end,
164 &stop_word,
165 &word_length);
166
167 /* Stop loop if no more words */
168 if (!word) {
169 break;
170 }
171
172 if (verbose) {
173 gchar *word_hex;
174 gchar *original_word;
175 gchar *original_word_hex;
176 gint original_word_length;
177
178 /* Get original word */
179 original_word_length = byte_offset_end - byte_offset_start;
180 original_word = g_malloc (original_word_length + 1);
181 memcpy (original_word,
182 &text[byte_offset_start],
183 original_word_length);
184 original_word[original_word_length] = '\0';
185
186 /* Get hex strings */
187 word_hex = tracker_strhex (word, word_length, ':');
pointer targets in passing argument 1 of 'tracker_strhex' differ in signedness
(emitted by gcc)
188 original_word_hex = tracker_strhex (original_word,
189 original_word_length,
190 ':');
191
192 g_print ("WORD at %d [%d,%d] Original: '%s' (%s), "
193 "Processed: '%s' (%s) (stop? %s)\n",
194 position,
195 byte_offset_start,
196 byte_offset_end,
197 original_word,
198 original_word_hex,
199 word,
200 word_hex,
201 stop_word ? "yes" : "no");
202
203 g_free (word_hex);
204 g_free (original_word_hex);
205 g_free (original_word);
206 }
207 }
208
209 g_print ("\n----> Parsing finished after '%lf' seconds\n",
210 g_timer_elapsed (timer, NULL));
211
212 g_timer_destroy (timer);
213
214 tracker_parser_free (parser);
215 g_object_unref (language);
216 return TRUE;
217 }
218
219
220 int
221 main (int argc, char **argv)
222 {
223 /* Setup locale */
224 setlocale (LC_ALL, "");
225
226 /* Setup context */
227 if (!setup_context (argc, argv)) {
228 g_printerr ("Context setup failed... exiting\n");
229 return -1;
230 }
231
232 /* Either text or file must be given */
233 if (filename == NULL &&
234 text == NULL) {
235 g_printerr ("Either 'file' or 'text' options should be used\n"
236 "Run '%s --help' to see a full list of available "
237 "command line options.\n",
238 argv[0]);
239 return -2;
240 }
241
242 /* If required, load file contents */
243 if (filename != NULL &&
244 !load_file_contents ()) {
245 g_printerr ("Loading file '%s' contents failed... exiting\n",
246 filename);
247 return -3;
248 }
249
250 /* Run the parsing! */
251 if (!run_parsing ()) {
252 g_printerr ("Parsing operation failed... exiting\n");
253 return -4;
254 }
255
256 /* Clean exit */
257 if (filename)
258 g_free (text);
259 return 0;
260 }