Pileus Git - grits/blob - src/data/grits-http.c

   1 /*
   2  * Copyright (C) 2009-2010 Andy Spencer <andy753421@gmail.com>
   3  *
   4  * This program is free software: you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation, either version 3 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16  */
  17
  18 /**
  19  * SECTION:grits-http
  20  * @short_description: Hyper Text Transfer Protocol
  21  *
  22  * #GritsHttp is a small wrapper around libsoup to provide data access using
  23  * the Hyper Text Transfer Protocol. Each #GritsHttp should be associated with
  24  * a particular server or dataset, all the files downloaded for this dataset
  25  * will be cached together in $HOME/.cache/grits/
  26  */
  27
  28 #include <config.h>
  29 #include <glib.h>
  30 #include <glib/gstdio.h>
  31 #include <libsoup/soup.h>
  32
  33 #include "grits-http.h"
  34
  35 gchar *_get_cache_path(GritsHttp *http, const gchar *local)
  36 {
  37         return g_build_filename(g_get_user_cache_dir(), PACKAGE,
  38                         http->prefix, local, NULL);
  39 }
  40
  41 /**
  42  * grits_http_new:
  43  * @prefix: The prefix in the cache to store the downloaded files.
  44  *          For example: * "/nexrad/level2/".
  45  *
  46  * Create a new #GritsHttp for the given prefix
  47  *
  48  * Returns: the new #GritsHttp
  49  */
  50 GritsHttp *grits_http_new(const gchar *prefix)
  51 {
  52         g_debug("GritsHttp: new - %s", prefix);
  53         GritsHttp *http = g_new0(GritsHttp, 1);
  54         http->soup = soup_session_sync_new();
  55         http->prefix = g_strdup(prefix);
  56         g_object_set(http->soup, "user-agent", PACKAGE_STRING, NULL);
  57         g_object_set(http->soup, "timeout",    10,             NULL);
  58         return http;
  59 }
  60
  61 /**
  62  * grits_http_free:
  63  * @http: the #GritsHttp to free
  64  *
  65  * Frees resources used by @http and cancels any pending requests.
  66  */
  67 void grits_http_free(GritsHttp *http)
  68 {
  69         g_debug("GritsHttp: free - %s", http->prefix);
  70         soup_session_abort(http->soup);
  71         g_object_unref(http->soup);
  72         g_free(http->prefix);
  73         g_free(http);
  74 }
  75
  76 /* For passing data to the chunck callback */
  77 struct _CacheInfo {
  78         FILE  *fp;
  79         gchar *path;
  80         GritsChunkCallback callback;
  81         gpointer user_data;
  82 };
  83 struct _CacheInfoMain {
  84         gchar *path;
  85         GritsChunkCallback callback;
  86         gpointer user_data;
  87         goffset cur, total;
  88 };
  89
  90 /* call the user callback from the main thread,
  91  * since it's usually UI updates */
  92 static gboolean _chunk_main_cb(gpointer _infomain)
  93 {
  94         struct _CacheInfoMain *infomain = _infomain;
  95         infomain->callback(infomain->path,
  96                         infomain->cur, infomain->total,
  97                         infomain->user_data);
  98         g_free(infomain);
  99         return FALSE;
 100 }
 101
 102 /**
 103  * Append data to the file and call the users callback if they supplied one.
 104  */
 105 static void _chunk_cb(SoupMessage *message, SoupBuffer *chunk, gpointer _info)
 106 {
 107         struct _CacheInfo *info = _info;
 108
 109         if (!SOUP_STATUS_IS_SUCCESSFUL(message->status_code)) {
 110                 g_warning("GritsHttp: _chunk_cb - soup failed with %d",
 111                                 message->status_code);
 112                 return;
 113         }
 114
 115         if (!fwrite(chunk->data, chunk->length, 1, info->fp))
 116                 g_error("GritsHttp: _chunk_cb - Unable to write data");
 117
 118         if (info->callback) {
 119                 struct _CacheInfoMain *infomain = g_new0(struct _CacheInfoMain, 1);
 120                 infomain->path      = info->path;
 121                 infomain->callback  = info->callback;
 122                 infomain->user_data = info->user_data;
 123                 infomain->cur       = ftell(info->fp);
 124                 goffset st=0, end=0;
 125                 soup_message_headers_get_content_range(message->response_headers,
 126                                 &st, &end, &infomain->total);
 127                 g_idle_add(_chunk_main_cb, infomain);
 128         }
 129
 130 }
 131
 132 /**
 133  * grits_http_fetch:
 134  * @http:      the #GritsHttp connection to use
 135  * @uri:       the URI to fetch
 136  * @local:     the local name to give to the file
 137  * @mode:      the update type to use when fetching data
 138  * @callback:  callback to call when a chunk of data is received
 139  * @user_data: user data to pass to the callback
 140  *
 141  * Fetch a file from the cache. Whether the file is actually loaded from the
 142  * remote server depends on the value of @mode.
 143  *
 144  * Returns: The local path to the complete file
 145  */
 146 /* TODO: use .part extentions and continue even when using GRITS_ONCE */
 147 gchar *grits_http_fetch(GritsHttp *http, const gchar *uri, const char *local,
 148                 GritsCacheType mode, GritsChunkCallback callback, gpointer user_data)
 149 {
 150         g_debug("GritsHttp: fetch - %s mode=%d", local, mode);
 151         gchar *path = _get_cache_path(http, local);
 152
 153         /* Unlink the file if we're refreshing it */
 154         if (mode == GRITS_REFRESH)
 155                 g_remove(path);
 156
 157         /* Do the cache if necessasairy */
 158         if (!(mode == GRITS_ONCE && g_file_test(path, G_FILE_TEST_EXISTS)) &&
 159                         mode != GRITS_LOCAL) {
 160                 g_debug("GritsHttp: fetch - Caching file %s", local);
 161
 162                 /* Open the file for writting */
 163                 gchar *part = path;
 164                 if (!g_file_test(path, G_FILE_TEST_EXISTS))
 165                         part = g_strdup_printf("%s.part", path);
 166                 FILE *fp = fopen_p(part, "ab");
 167                 fseek(fp, 0, SEEK_END); // "a" is broken on Windows, twice
 168
 169                 /* Make temp data */
 170                 struct _CacheInfo info = {
 171                         .fp        = fp,
 172                         .path      = path,
 173                         .callback  = callback,
 174                         .user_data = user_data,
 175                 };
 176
 177                 /* Download the file */
 178                 SoupMessage *message = soup_message_new("GET", uri);
 179                 if (message == NULL)
 180                         g_error("message is null, cannot parse uri");
 181                 g_signal_connect(message, "got-chunk", G_CALLBACK(_chunk_cb), &info);
 182                 //if (ftell(fp) > 0)
 183                         soup_message_headers_set_range(message->request_headers, ftell(fp), -1);
 184                 if (mode == GRITS_REFRESH)
 185                         soup_message_headers_replace(message->request_headers,
 186                                         "Cache-Control", "max-age=0");
 187                 soup_session_send_message(http->soup, message);
 188
 189                 /* Close file */
 190                 fclose(fp);
 191                 if (path != part && SOUP_STATUS_IS_SUCCESSFUL(message->status_code)) {
 192                         g_rename(part, path);
 193                         g_free(part);
 194                 }
 195
 196                 /* Finished */
 197                 if (message->status_code == SOUP_STATUS_CANCELLED) {
 198                         return NULL;
 199                 } else if (message->status_code == SOUP_STATUS_REQUESTED_RANGE_NOT_SATISFIABLE) {
 200                         /* Range unsatisfiable, file already complete */
 201                 } else if (!SOUP_STATUS_IS_SUCCESSFUL(message->status_code)) {
 202                         g_warning("GritsHttp: done_cb - error copying file, status=%d\n"
 203                                         "\tsrc=%s\n"
 204                                         "\tdst=%s",
 205                                         message->status_code, uri, path);
 206                         return NULL;
 207                 }
 208         }
 209
 210
 211         /* TODO: free everything.. */
 212         return path;
 213 }
 214
 215 /**
 216  * grits_http_available:
 217  * @http:    the #GritsHttp connection to use
 218  * @filter:  filter used to extract files from the index, or NULL
 219  *           For example: "href=\"([^"]*)\""
 220  * @cache:   path to the local cache, or NULL to not search the cache
 221  * @extract: regex used to extract filenames from the page, should match the
 222  *           filename as $1, or NULL to use /http="([^"])"/
 223  * @index:   path to the index page, or NULL to not search online
 224  *
 225  * Look through the cache and an HTTP index page for a list of available files.
 226  * The name of each file that matches the filter is added to the returned list.
 227  *
 228  * The list as well as the strings contained in it should be freed afterwards.
 229  *
 230  * Returns the list of matching filenames
 231  */
 232 GList *grits_http_available(GritsHttp *http,
 233                 gchar *filter, gchar *cache,
 234                 gchar *extract, gchar *index)
 235 {
 236         g_debug("GritsHttp: available - %s~=%s %s~=%s",
 237                         filter, cache, extract, index);
 238         GRegex *filter_re = g_regex_new(filter, 0, 0, NULL);
 239         GList  *files = NULL;
 240
 241         /* Add cached files */
 242         if (cache) {
 243                 const gchar *file;
 244                 gchar *path = _get_cache_path(http, cache);
 245                 GDir  *dir  = g_dir_open(path, 0, NULL);
 246                 while ((file = g_dir_read_name(dir)))
 247                         if (g_regex_match(filter_re, file, 0, NULL))
 248                                 files = g_list_prepend(files, g_strdup(file));
 249                 g_free(path);
 250                 g_dir_close(dir);
 251         }
 252
 253         /* Add online files if online */
 254         if (index) {
 255                 gchar tmp[32];
 256                 g_snprintf(tmp, sizeof(tmp), ".index.%x", g_random_int());
 257                 gchar *path = grits_http_fetch(http, index, tmp,
 258                                 GRITS_REFRESH, NULL, NULL);
 259                 if (!path)
 260                         return files;
 261                 gchar *html;
 262                 g_file_get_contents(path, &html, NULL, NULL);
 263                 if (!html)
 264                         return files;
 265
 266                 /* Match hrefs by default, this regex is not very accurate */
 267                 GRegex *extract_re = g_regex_new(
 268                                 extract ?: "href=\"([^\"]*)\"", 0, 0, NULL);
 269                 GMatchInfo *info;
 270                 g_regex_match(extract_re, html, 0, &info);
 271                 while (g_match_info_matches(info)) {
 272                         gchar *file = g_match_info_fetch(info, 1);
 273                         if (file) {
 274                                 if (g_regex_match(filter_re, file, 0, NULL))
 275                                         files = g_list_prepend(files, file);
 276                                 else
 277                                         g_free(file);
 278                         }
 279                         g_match_info_next(info, NULL);
 280                 }
 281
 282                 g_regex_unref(extract_re);
 283                 g_match_info_free(info);
 284                 g_unlink(path);
 285                 g_free(path);
 286                 g_free(html);
 287         }
 288
 289         g_regex_unref(filter_re);
 290
 291         return files;
 292 }