1 /*
2 * Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 #include <errno.h>
21 #include <string.h>
22
23 #include <glib.h>
24
25 #include <libtracker-common/tracker-file-utils.h>
26
27 #include <gsf/gsf.h>
28 #include <gsf/gsf-infile.h>
29 #include <gsf/gsf-input-stdio.h>
30 #include <gsf/gsf-infile-zip.h>
31
32 #include "tracker-gsf.h"
33
34 /* Size of the buffer to use */
35 #define XML_BUFFER_SIZE 8192 /* bytes */
36 /* Note: 20 MBytes of max size is really assumed to be a safe limit. */
37 #define XML_MAX_BYTES_READ (20u << 20) /* bytes */
38
39 /**
40 * based on find_member() from vsd_utils.c:
41 * http://vsdump.sourcearchive.com/documentation/0.0.44/vsd__utils_8c-source.html
42 */
43 static GsfInput *
44 find_member (GsfInfile *arch,
45 gchar const *name)
46 {
47 gchar const *slash;
48
49 slash = strchr (name, '/');
50
51 if (slash) {
52 gchar *dirname;
53 GsfInput *member;
54
55 dirname = g_strndup (name, slash - name);
56
57 if ((member = gsf_infile_child_by_name (arch, dirname)) != NULL) {
58 GsfInfile *dir;
59
60 dir = GSF_INFILE (member);
61 member = find_member (dir, slash + 1);
62 g_object_unref (dir);
63 }
64
65 g_free (dirname);
66 return member;
67 } else {
68 return gsf_infile_child_by_name (arch, name);
69 }
70 }
71
72 /**
73 * tracker_gsf_parse_xml_in_zip:
74 * @zip_file_uri: URI of the ZIP archive
75 * @xml_filename: Name of the XML file stored inside the ZIP archive
76 * @context: Markup context to be used when parsing the XML
77 *
78 * This function reads and parses the contents of an XML file stored
79 * inside a ZIP compressed archive. Reading and parsing is done buffered, and
80 * maximum size of the uncompressed XML file is limited to be to 20MBytes.
81 */
82 void
83 tracker_gsf_parse_xml_in_zip (const gchar *zip_file_uri,
84 const gchar *xml_filename,
85 GMarkupParseContext *context,
86 GError **err)
87 {
88 gchar *filename;
89 GError *error = NULL;
90 GsfInfile *infile = NULL;
91 GsfInput *src = NULL;
92 GsfInput *member = NULL;
93 FILE *file;
94
95 g_debug ("Parsing '%s' XML file from '%s' zip archive...",
96 xml_filename, zip_file_uri);
97
98 /* Get filename from the given URI */
99 if ((filename = g_filename_from_uri (zip_file_uri,
100 NULL, &error)) == NULL) {
101 g_warning ("Can't get filename from uri '%s': %s",
102 zip_file_uri, error ? error->message : "no error given");
103 } else { /* Create a new Input GSF object for the given file */
104
105 file = tracker_file_open (filename);
106 if (!file) {
107 g_warning ("Can't open file from uri '%s': %s",
108 zip_file_uri, g_strerror (errno));
109 } else if ((src = gsf_input_stdio_new_FILE (filename, file, TRUE)) == NULL) {
110 g_warning ("Failed creating a GSF Input object for '%s': %s",
111 zip_file_uri, error ? error->message : "no error given");
112 }
113 /* Input object is a Zip file */
114 else if ((infile = gsf_infile_zip_new (src, &error)) == NULL) {
115 g_warning ("'%s' Not a zip file: %s",
116 zip_file_uri, error ? error->message : "no error given");
117 }
118 /* Look for requested filename inside the ZIP file */
119 else if ((member = find_member (infile, xml_filename)) == NULL) {
120 g_warning ("No member '%s' in zip file '%s'",
121 xml_filename, zip_file_uri);
122 }
123 /* Load whole contents of the internal file in the xml buffer */
124 else {
125 guint8 buf[XML_BUFFER_SIZE];
126 size_t remaining_size, chunk_size, accum;
127
128 /* Get whole size of the contents to read */
129 remaining_size = (size_t) gsf_input_size (GSF_INPUT (member));
130
131 /* Note that gsf_input_read() needs to be able to read ALL specified
132 * number of bytes, or it will fail */
133 chunk_size = MIN (remaining_size, XML_BUFFER_SIZE);
134
135 accum = 0;
136 while (!error &&
137 accum <= XML_MAX_BYTES_READ &&
138 chunk_size > 0 &&
139 gsf_input_read (GSF_INPUT (member), chunk_size, buf) != NULL) {
140
141 /* update accumulated count */
142 accum += chunk_size;
143
144 /* Pass the read stream to the context parser... */
145 g_markup_parse_context_parse (context, buf, chunk_size, &error);
pointer targets in passing argument 2 of 'g_markup_parse_context_parse' differ in signedness
(emitted by gcc)
146
147 /* update bytes to be read */
148 remaining_size -= chunk_size;
149 chunk_size = MIN (remaining_size, XML_BUFFER_SIZE);
150 }
151 }
152
153 if (file) {
154 tracker_file_close (file, FALSE);
155 }
156 }
157
158 g_free (filename);
159
160 if (error)
161 g_propagate_error (err, error);
162 if (infile)
163 g_object_unref (infile);
164 if (src)
165 g_object_unref (src);
166 if (member)
167 g_object_unref (member);
168 }