WIP: parse both atom and rss feeds

Signed-off-by: William Brawner <me@wbrawner.com>
This commit is contained in:
William Brawner 2020-08-06 17:19:31 -07:00
parent abe77be5a7
commit 273c060674
3 changed files with 92 additions and 73 deletions

1
.gitignore vendored
View file

@ -6,3 +6,4 @@ out/
tmp/
*.sln
*.swp
tags

View file

@ -10,26 +10,23 @@
#include <stdlib.h>
#include <string.h>
int main(int argc, char **argv) {
int main(int argc, char** argv) {
if (argc < 2) {
printf("Please provide the URL of a feed to parse.\n");
return 1;
} else {
printf("Attempting to retrieve XML for URL: %s\n", argv[1]);
}
sqlite3 *db;
sqlite3* db;
setup_database(db);
fead_xml(argv[1]);
close_database(db);
return 0;
}
void fead_xml(char* url) {
xml *x = malloc(sizeof(xml));
xml* x = malloc(sizeof(xml));
get_xml_ptr(x, url);
int res = get_xml(x);
@ -37,13 +34,9 @@ void fead_xml(char* url) {
printf("XML data retrieved from server\n");
} else {
printf("Unable to retrieve XML for URL: %s\n", x->url);
printf("%s\n", x->errBuf);
}
parse_xml_items(x);
print_xml_elements(x);
cleanup_xml(x);
}
@ -54,101 +47,125 @@ void get_xml_ptr(xml* x, char* url) {
x->url = url;
}
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x) {
size_t my_write_callback(char* ptr, size_t size, size_t nmemb, xml* x) {
int byte_size = (size * nmemb);
printf("byte_size: %d\n", byte_size);
int new_size = x->size + byte_size;
printf("new_size size: %d\n", new_size);
x->data = realloc(x->data, new_size + 1);
memcpy(x->data + x->size, ptr, byte_size);
x->data[new_size] = '\0';
x->size = new_size;
printf("New xml size: %d\n", x->size);
return byte_size;
}
size_t get_xml(xml* x) {
CURL *curl = curl_easy_init();
CURL* curl = curl_easy_init();
if (!curl) {
printf("Unable to instantiate curl object. Aborting");
return 1;
}
}
curl_easy_setopt(curl, CURLOPT_URL, x->url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, *my_write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, my_write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, x);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, x->errBuf);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
// curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
CURLcode res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
curl_global_cleanup();
return res;
}
void parse_xml_items(xml *x) {
void parse_xml_items(xml* x) {
xmlChar* xChar = xmlCharStrndup(x->data, x->size);
x->xdp = xmlReadDoc(
xChar,
x->url,
NULL,
XML_PARSE_RECOVER
);
x->xdp = xmlReadDoc(xChar, x->url, NULL, XML_PARSE_RECOVER);
xmlFree(xChar);
}
void parse_entry(xmlNode *root) {
}
article * parse_item(xmlNode *item) {
printf("Article info:\n");
do {
if (strncmp("text", item->name, strlen(item->name)) != 0) {
if (strlen(item->children->content) > 200) {
printf("%s: %ld chars\n", item->name,
strlen(item->children->content));
} else {
printf("%s: %s\n", item->name, item->children->content);
}
}
item = item->next;
} while (item != NULL);
printf("\n");
return NULL;
}
feed * parse_feed(xmlNode *feed) {
while (feed && strcmp(feed->name, "entry") != 0) {
feed = feed->next;
}
}
feed * parse_rss(xmlNode *rss) {
xmlNode* channel = rss->children->next;
xmlNode* channelChild;
while (channel && strcmp(channel->name, "channel") != 0) {
channel = channel->next;
}
while (channelChild != NULL) {
if (strncmp("title", channelChild->name, strlen(channelChild->name) - 1) == 0) {
if (channelChild->children != NULL) {
printf("%s: %s\n", channelChild->name, channelChild->children->content);
}
} else if (strncmp("description", channelChild->name,
strlen(channelChild->name)) == 0) {
if (channelChild->children != NULL) {
printf("%s: %s\n", channelChild->name, channelChild->children->content);
}
} else if (strncmp("item", channelChild->name,
strlen(channelChild->name)) == 0) {
article *article = parse_item(channelChild->children);
// TODO: Store articles in array to return
}
channelChild = channelChild->next;
}
}
void print_xml_elements(xml* x) {
if (x->xdp == NULL || x->xdp->children == NULL) {
printf("Unable to parse XML\n");
}
xmlNode* rss = x->xdp->children;
xmlNode* channel = rss->children->next;
xmlNode* channelChild = channel->children;
int articleCount = 0;
while (channelChild != NULL) {
if (strncmp("title", channelChild->name, strlen(channelChild->name)) == 0) {
if (channelChild->children != NULL) {
printf("%s: %s\n", channelChild->name, channelChild->children->content);
}
} else if (strncmp("description", channelChild->name, strlen(channelChild->name)) == 0) {
if (channelChild->children != NULL) {
printf("%s: %s\n", channelChild->name, channelChild->children->content);
}
} else if (strncmp("item", channelChild->name, strlen(channelChild->name)) == 0) {
articleCount++;
xmlNode* itemData = channelChild->children;
printf("Article info:\n");
do {
if (strncmp("text", itemData->name, strlen(itemData->name)) != 0) {
if (strlen(itemData->children->content) > 200) {
printf("%s: %ld chars\n", itemData->name, strlen(itemData->children->content));
} else {
printf("%s: %s\n", itemData->name, itemData->children->content);
}
}
itemData = itemData->next;
} while (itemData != NULL);
printf("\n");
}
channelChild = channelChild->next;
xmlNode* root = x->xdp->children;
if (strcmp(root->name, "rss") == 0) {
parse_rss(root);
} else if (strcmp(root->name, "feed") == 0){
parse_feed(root);
} else {
printf("Unable to parse XML\n");
// TODO: Return error
return;
}
rss = NULL;
channel = NULL;
int articleCount = 0;
printf("Found %d articles in the feed\n", articleCount);
root = NULL;
printf("Found %d articles in the feed\n", articleCount);
}
void cleanup_xml(xml* x) {
free(x->data);
free(x->errBuf);
xmlFreeDoc(x->xdp);
void cleanup_xml(xml* x) {
if (!x) return;
if (x->data) free(x->data);
if (x->xdp) xmlFreeDoc(x->xdp);
free(x);
}
void setup_database(sqlite3* handle) {
char* init_sql =
char* init_sql =
#include "schema.sql"
;
char* err;
@ -160,6 +177,4 @@ void setup_database(sqlite3* handle) {
}
}
void close_database(sqlite3* handle) {
sqlite3_close(handle);
}
void close_database(sqlite3* handle) { sqlite3_close(handle); }

View file

@ -8,7 +8,6 @@ typedef struct {
int size;
long totalSize;
char *url;
char *errBuf;
xmlDocPtr xdp;
} xml;
@ -19,11 +18,11 @@ typedef struct {
long feed_id;
char *author;
bool is_favorite;
char *featured_image text;
char *content text;
char *excerpt text;
char *featured_image;
char *content;
char *excerpt;
bool is_read;
char *published_date date;
char *published_date;
} article;
typedef struct {
@ -33,11 +32,15 @@ typedef struct {
char *feed_url;
char *icon;
bool is_favorite;
char *last_polled date;
char *last_polled;
char *filter_accept;
char *filter_reject;
} feed;
typedef struct {
} feed_with_articles;
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x);
size_t get_xml(xml *x);