X-Git-Url: http://pileus.org/git/?p=grits;a=blobdiff_plain;f=src%2Fdata%2Fgis-http.c;h=b5259e47108e15a92f0f95b44c24d4b19828a873;hp=78251bec02b8230a66f6719f2ba19ffc6d49e4db;hb=e7127ea74717754c48779467aca2f9a1596a99f0;hpb=7d7d6ae0531a85361ce0d8b997d988b653282847 diff --git a/src/data/gis-http.c b/src/data/gis-http.c index 78251be..b5259e4 100644 --- a/src/data/gis-http.c +++ b/src/data/gis-http.c @@ -15,6 +15,16 @@ * along with this program. If not, see . */ +/** + * SECTION:gis-http + * @short_description: Hyper Text Transfer Protocol + * + * #GisHttp is a small wrapper around libsoup to provide data access using the + * Hyper Text Transfer Protocol. Each #GisHttp should be associated with a + * particular server or dataset, all the files downloaded for this dataset will + * be cached together in $HOME.cache/libgis/ + */ + #include #include #include @@ -22,6 +32,21 @@ #include "gis-http.h" +gchar *_get_cache_path(GisHttp *http, const gchar *local) +{ + return g_build_filename(g_get_user_cache_dir(), PACKAGE, + http->prefix, local, NULL); +} + +/** + * gis_http_new: + * @prefix: The prefix in the cache to store the downloaded files. + * For example: * "/nexrad/level2/". + * + * Create a new #GisHttp for the given prefix + * + * Returns: the new #GisHttp + */ GisHttp *gis_http_new(const gchar *prefix) { g_debug("GisHttp: new - %s", prefix); @@ -32,6 +57,12 @@ GisHttp *gis_http_new(const gchar *prefix) return http; } +/** + * gis_http_free: + * @http: the #GisHttp to free + * + * Frees resources used by @http and cancels any pending requests. + */ void gis_http_free(GisHttp *http) { g_debug("GisHttp: free - %s", http->prefix); @@ -42,7 +73,7 @@ void gis_http_free(GisHttp *http) } /* For passing data to the chunck callback */ -struct _cache_info { +struct _CacheInfo { FILE *fp; gchar *path; GisChunkCallback callback; @@ -54,7 +85,7 @@ struct _cache_info { */ static void _chunk_cb(SoupMessage *message, SoupBuffer *chunk, gpointer _info) { - struct _cache_info *info = _info; + struct _CacheInfo *info = _info; if (!SOUP_STATUS_IS_SUCCESSFUL(message->status_code)) { g_warning("GisHttp: _chunk_cb - soup failed with %d", @@ -74,15 +105,27 @@ static void _chunk_cb(SoupMessage *message, SoupBuffer *chunk, gpointer _info) } } +/** + * gis_http_fetch: + * @http: the #GisHttp connection to use + * @uri: the URI to fetch + * @local: the local name to give to the file + * @mode: the update type to use when fetching data + * @callback: callback to call when a chunk of data is received + * @user_data: user data to pass to the callback + * + * Fetch a file from the cache. Whether the file is actually loaded from the + * remote server depends on the value of @mode. + * + * Returns: The local path to the complete file + */ /* TODO: use .part extentions and continue even when using GIS_ONCE */ gchar *gis_http_fetch(GisHttp *http, const gchar *uri, const char *local, GisCacheType mode, GisChunkCallback callback, gpointer user_data) { - g_debug("GisHttp: fetch - %.20s... >> %s/%s mode=%d", + g_debug("GisHttp: fetch - %s... >> %s/%s mode=%d", uri, http->prefix, local, mode); - - gchar *path = g_build_filename(g_get_user_cache_dir(), PACKAGE, - http->prefix, local, NULL); + gchar *path = _get_cache_path(http, local); /* Unlink the file if we're refreshing it */ if (mode == GIS_REFRESH) @@ -91,10 +134,14 @@ gchar *gis_http_fetch(GisHttp *http, const gchar *uri, const char *local, /* Do the cache if necessasairy */ if (!(mode == GIS_ONCE && g_file_test(path, G_FILE_TEST_EXISTS)) && mode != GIS_LOCAL) { - g_debug("GisHttp: do_cache - Caching file %s", local); + g_debug("GisHttp: fetch - Caching file %s", local); /* Open the file for writting */ - FILE *fp = fopen_p(path, "a"); + gchar *part = path; + if (!g_file_test(path, G_FILE_TEST_EXISTS)) + part = g_strdup_printf("%s.part", path); + FILE *fp = fopen_p(part, "ab"); + fseek(fp, 0, SEEK_END); // "a" is broken on Windows, twice /* Make temp data */ struct _CacheInfo info = { @@ -112,16 +159,95 @@ gchar *gis_http_fetch(GisHttp *http, const gchar *uri, const char *local, soup_message_headers_set_range(message->request_headers, ftell(fp), -1); soup_session_send_message(http->soup, message); + /* Close file */ + fclose(fp); + if (path != part && SOUP_STATUS_IS_SUCCESSFUL(message->status_code)) { + g_rename(part, path); + g_free(part); + } + /* Finished */ if (message->status_code == 416) { /* Range unsatisfiable, file already complete */ - } else if (!SOUP_STATUS_IS_SUCCESSFUL(message->status_code)) + } else if (!SOUP_STATUS_IS_SUCCESSFUL(message->status_code)) { g_warning("GisHttp: done_cb - error copying file, status=%d\n" "\tsrc=%s\n" "\tdst=%s", message->status_code, uri, path); + return NULL; + } } + /* TODO: free everything.. */ return path; } + +/** + * gis_http_available: + * @http: the #GisHttp connection to use + * @filter: filter used to extract files from the index, or NULL + * For example: "href=\"([^"]*)\"" + * @cache: path to the local cache, or NULL to not search the cache + * @extract: regex used to extract filenames from the page, should match the + * filename as $1, or NULL to use /http="([^"])"/ + * @index: path to the index page, or NULL to not search online + * + * Look through the cache and an HTTP index page for a list of available files. + * The name of each file that matches the filter is added to the returned list. + * + * The list as well as the strings contained in it should be freed afterwards. + * + * Returns the list of matching filenames + */ +GList *gis_http_available(GisHttp *http, + gchar *filter, gchar *cache, + gchar *extract, gchar *index) +{ + g_debug("GisHttp: available - %s~=%s %s~=%s", + filter, cache, extract, index); + GRegex *filter_re = g_regex_new(filter, 0, 0, NULL); + GList *files = NULL; + + /* Add cached files */ + if (cache) { + const gchar *file; + gchar *path = _get_cache_path(http, cache); + GDir *dir = g_dir_open(path, 0, NULL); + while ((file = g_dir_read_name(dir))) + if (g_regex_match(filter_re, file, 0, NULL)) + files = g_list_prepend(files, g_strdup(file)); + g_free(path); + } + + /* Add online files if online */ + if (index) { + gchar tmp[16]; + g_snprintf(tmp, sizeof(tmp), ".index.%x", g_random_int()); + gchar *path = gis_http_fetch(http, index, tmp, + GIS_REFRESH, NULL, NULL); + gchar *html; + g_file_get_contents(path, &html, NULL, NULL); + + /* Match hrefs by default, this regex is not very accurate */ + GRegex *extract_re = g_regex_new( + extract ?: "href=\"([^\"]*)\"", 0, 0, NULL); + GMatchInfo *info; + g_regex_match(extract_re, html, 0, &info); + while (g_match_info_matches(info)) { + gchar *file = g_match_info_fetch(info, 1); + if (g_regex_match(filter_re, file, 0, NULL)) + files = g_list_prepend(files, file); + else + g_free(file); + g_match_info_next(info, NULL); + } + + g_match_info_free(info); + g_unlink(path); + g_free(path); + g_free(html); + } + + return files; +}