From 273c0606742ddbc242b1e628d106c066dd8b922d Mon Sep 17 00:00:00 2001 From: William Brawner Date: Thu, 6 Aug 2020 17:19:31 -0700 Subject: [PATCH] WIP: parse both atom and rss feeds Signed-off-by: William Brawner --- .gitignore | 1 + src/feader.c | 149 ++++++++++++++++++++++++++++----------------------- src/feader.h | 15 +++--- 3 files changed, 92 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index e69dc83..8b2a1c8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ out/ tmp/ *.sln *.swp +tags diff --git a/src/feader.c b/src/feader.c index ba3afe0..adfb2a8 100755 --- a/src/feader.c +++ b/src/feader.c @@ -10,26 +10,23 @@ #include #include -int main(int argc, char **argv) { +int main(int argc, char** argv) { if (argc < 2) { printf("Please provide the URL of a feed to parse.\n"); return 1; } else { printf("Attempting to retrieve XML for URL: %s\n", argv[1]); } - - sqlite3 *db; + + sqlite3* db; setup_database(db); - fead_xml(argv[1]); - close_database(db); - return 0; } void fead_xml(char* url) { - xml *x = malloc(sizeof(xml)); + xml* x = malloc(sizeof(xml)); get_xml_ptr(x, url); int res = get_xml(x); @@ -37,13 +34,9 @@ void fead_xml(char* url) { printf("XML data retrieved from server\n"); } else { printf("Unable to retrieve XML for URL: %s\n", x->url); - printf("%s\n", x->errBuf); } - parse_xml_items(x); - print_xml_elements(x); - cleanup_xml(x); } @@ -54,101 +47,125 @@ void get_xml_ptr(xml* x, char* url) { x->url = url; } -size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x) { +size_t my_write_callback(char* ptr, size_t size, size_t nmemb, xml* x) { int byte_size = (size * nmemb); - printf("byte_size: %d\n", byte_size); int new_size = x->size + byte_size; - printf("new_size size: %d\n", new_size); x->data = realloc(x->data, new_size + 1); memcpy(x->data + x->size, ptr, byte_size); x->data[new_size] = '\0'; x->size = new_size; - printf("New xml size: %d\n", x->size); return byte_size; } size_t get_xml(xml* x) { - CURL *curl = curl_easy_init(); + CURL* curl = curl_easy_init(); if (!curl) { printf("Unable to instantiate curl object. Aborting"); return 1; - } + } curl_easy_setopt(curl, CURLOPT_URL, x->url); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, *my_write_callback); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, my_write_callback); curl_easy_setopt(curl, CURLOPT_WRITEDATA, x); - curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, x->errBuf); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); + // curl_easy_setopt(curl, CURLOPT_VERBOSE, 1); CURLcode res = curl_easy_perform(curl); curl_easy_cleanup(curl); curl_global_cleanup(); return res; } -void parse_xml_items(xml *x) { +void parse_xml_items(xml* x) { xmlChar* xChar = xmlCharStrndup(x->data, x->size); - x->xdp = xmlReadDoc( - xChar, - x->url, - NULL, - XML_PARSE_RECOVER - ); + x->xdp = xmlReadDoc(xChar, x->url, NULL, XML_PARSE_RECOVER); xmlFree(xChar); } +void parse_entry(xmlNode *root) { + +} + +article * parse_item(xmlNode *item) { + printf("Article info:\n"); + do { + if (strncmp("text", item->name, strlen(item->name)) != 0) { + if (strlen(item->children->content) > 200) { + printf("%s: %ld chars\n", item->name, + strlen(item->children->content)); + } else { + printf("%s: %s\n", item->name, item->children->content); + } + } + item = item->next; + } while (item != NULL); + printf("\n"); + return NULL; +} + +feed * parse_feed(xmlNode *feed) { + while (feed && strcmp(feed->name, "entry") != 0) { + feed = feed->next; + } + +} + +feed * parse_rss(xmlNode *rss) { + xmlNode* channel = rss->children->next; + xmlNode* channelChild; + while (channel && strcmp(channel->name, "channel") != 0) { + channel = channel->next; + } + while (channelChild != NULL) { + if (strncmp("title", channelChild->name, strlen(channelChild->name) - 1) == 0) { + if (channelChild->children != NULL) { + printf("%s: %s\n", channelChild->name, channelChild->children->content); + } + } else if (strncmp("description", channelChild->name, + strlen(channelChild->name)) == 0) { + if (channelChild->children != NULL) { + printf("%s: %s\n", channelChild->name, channelChild->children->content); + } + } else if (strncmp("item", channelChild->name, + strlen(channelChild->name)) == 0) { + article *article = parse_item(channelChild->children); + // TODO: Store articles in array to return + } + channelChild = channelChild->next; + } +} + void print_xml_elements(xml* x) { if (x->xdp == NULL || x->xdp->children == NULL) { printf("Unable to parse XML\n"); } - xmlNode* rss = x->xdp->children; - xmlNode* channel = rss->children->next; - xmlNode* channelChild = channel->children; - int articleCount = 0; - while (channelChild != NULL) { - if (strncmp("title", channelChild->name, strlen(channelChild->name)) == 0) { - if (channelChild->children != NULL) { - printf("%s: %s\n", channelChild->name, channelChild->children->content); - } - } else if (strncmp("description", channelChild->name, strlen(channelChild->name)) == 0) { - if (channelChild->children != NULL) { - printf("%s: %s\n", channelChild->name, channelChild->children->content); - } - } else if (strncmp("item", channelChild->name, strlen(channelChild->name)) == 0) { - articleCount++; - xmlNode* itemData = channelChild->children; - printf("Article info:\n"); - do { - if (strncmp("text", itemData->name, strlen(itemData->name)) != 0) { - if (strlen(itemData->children->content) > 200) { - printf("%s: %ld chars\n", itemData->name, strlen(itemData->children->content)); - } else { - printf("%s: %s\n", itemData->name, itemData->children->content); - } - } - itemData = itemData->next; - } while (itemData != NULL); - printf("\n"); - } - channelChild = channelChild->next; + xmlNode* root = x->xdp->children; + if (strcmp(root->name, "rss") == 0) { + parse_rss(root); + } else if (strcmp(root->name, "feed") == 0){ + parse_feed(root); + } else { + printf("Unable to parse XML\n"); + // TODO: Return error + return; } - rss = NULL; - channel = NULL; + int articleCount = 0; - printf("Found %d articles in the feed\n", articleCount); + root = NULL; + printf("Found %d articles in the feed\n", articleCount); } -void cleanup_xml(xml* x) { - free(x->data); - free(x->errBuf); - xmlFreeDoc(x->xdp); +void cleanup_xml(xml* x) { + if (!x) return; + if (x->data) free(x->data); + if (x->xdp) xmlFreeDoc(x->xdp); free(x); } void setup_database(sqlite3* handle) { - char* init_sql = + char* init_sql = #include "schema.sql" ; char* err; @@ -160,6 +177,4 @@ void setup_database(sqlite3* handle) { } } -void close_database(sqlite3* handle) { - sqlite3_close(handle); -} +void close_database(sqlite3* handle) { sqlite3_close(handle); } diff --git a/src/feader.h b/src/feader.h index eab6f6d..c1602e6 100644 --- a/src/feader.h +++ b/src/feader.h @@ -8,7 +8,6 @@ typedef struct { int size; long totalSize; char *url; - char *errBuf; xmlDocPtr xdp; } xml; @@ -19,11 +18,11 @@ typedef struct { long feed_id; char *author; bool is_favorite; - char *featured_image text; - char *content text; - char *excerpt text; + char *featured_image; + char *content; + char *excerpt; bool is_read; - char *published_date date; + char *published_date; } article; typedef struct { @@ -33,11 +32,15 @@ typedef struct { char *feed_url; char *icon; bool is_favorite; - char *last_polled date; + char *last_polled; char *filter_accept; char *filter_reject; } feed; +typedef struct { + +} feed_with_articles; + size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x); size_t get_xml(xml *x);