WIP: parse both atom and rss feeds
Signed-off-by: William Brawner <me@wbrawner.com>
This commit is contained in:
parent
abe77be5a7
commit
273c060674
3 changed files with 92 additions and 73 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -6,3 +6,4 @@ out/
|
|||
tmp/
|
||||
*.sln
|
||||
*.swp
|
||||
tags
|
||||
|
|
149
src/feader.c
149
src/feader.c
|
@ -10,26 +10,23 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 2) {
|
||||
printf("Please provide the URL of a feed to parse.\n");
|
||||
return 1;
|
||||
} else {
|
||||
printf("Attempting to retrieve XML for URL: %s\n", argv[1]);
|
||||
}
|
||||
|
||||
sqlite3 *db;
|
||||
|
||||
sqlite3* db;
|
||||
setup_database(db);
|
||||
|
||||
fead_xml(argv[1]);
|
||||
|
||||
close_database(db);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fead_xml(char* url) {
|
||||
xml *x = malloc(sizeof(xml));
|
||||
xml* x = malloc(sizeof(xml));
|
||||
get_xml_ptr(x, url);
|
||||
|
||||
int res = get_xml(x);
|
||||
|
@ -37,13 +34,9 @@ void fead_xml(char* url) {
|
|||
printf("XML data retrieved from server\n");
|
||||
} else {
|
||||
printf("Unable to retrieve XML for URL: %s\n", x->url);
|
||||
printf("%s\n", x->errBuf);
|
||||
}
|
||||
|
||||
parse_xml_items(x);
|
||||
|
||||
print_xml_elements(x);
|
||||
|
||||
cleanup_xml(x);
|
||||
}
|
||||
|
||||
|
@ -54,101 +47,125 @@ void get_xml_ptr(xml* x, char* url) {
|
|||
x->url = url;
|
||||
}
|
||||
|
||||
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x) {
|
||||
size_t my_write_callback(char* ptr, size_t size, size_t nmemb, xml* x) {
|
||||
int byte_size = (size * nmemb);
|
||||
printf("byte_size: %d\n", byte_size);
|
||||
int new_size = x->size + byte_size;
|
||||
printf("new_size size: %d\n", new_size);
|
||||
x->data = realloc(x->data, new_size + 1);
|
||||
memcpy(x->data + x->size, ptr, byte_size);
|
||||
x->data[new_size] = '\0';
|
||||
x->size = new_size;
|
||||
printf("New xml size: %d\n", x->size);
|
||||
return byte_size;
|
||||
}
|
||||
|
||||
size_t get_xml(xml* x) {
|
||||
CURL *curl = curl_easy_init();
|
||||
CURL* curl = curl_easy_init();
|
||||
|
||||
if (!curl) {
|
||||
printf("Unable to instantiate curl object. Aborting");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, x->url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, *my_write_callback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, my_write_callback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, x);
|
||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, x->errBuf);
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
||||
// curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
curl_easy_cleanup(curl);
|
||||
curl_global_cleanup();
|
||||
return res;
|
||||
}
|
||||
|
||||
void parse_xml_items(xml *x) {
|
||||
void parse_xml_items(xml* x) {
|
||||
xmlChar* xChar = xmlCharStrndup(x->data, x->size);
|
||||
x->xdp = xmlReadDoc(
|
||||
xChar,
|
||||
x->url,
|
||||
NULL,
|
||||
XML_PARSE_RECOVER
|
||||
);
|
||||
x->xdp = xmlReadDoc(xChar, x->url, NULL, XML_PARSE_RECOVER);
|
||||
xmlFree(xChar);
|
||||
}
|
||||
|
||||
void parse_entry(xmlNode *root) {
|
||||
|
||||
}
|
||||
|
||||
article * parse_item(xmlNode *item) {
|
||||
printf("Article info:\n");
|
||||
do {
|
||||
if (strncmp("text", item->name, strlen(item->name)) != 0) {
|
||||
if (strlen(item->children->content) > 200) {
|
||||
printf("%s: %ld chars\n", item->name,
|
||||
strlen(item->children->content));
|
||||
} else {
|
||||
printf("%s: %s\n", item->name, item->children->content);
|
||||
}
|
||||
}
|
||||
item = item->next;
|
||||
} while (item != NULL);
|
||||
printf("\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
feed * parse_feed(xmlNode *feed) {
|
||||
while (feed && strcmp(feed->name, "entry") != 0) {
|
||||
feed = feed->next;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
feed * parse_rss(xmlNode *rss) {
|
||||
xmlNode* channel = rss->children->next;
|
||||
xmlNode* channelChild;
|
||||
while (channel && strcmp(channel->name, "channel") != 0) {
|
||||
channel = channel->next;
|
||||
}
|
||||
while (channelChild != NULL) {
|
||||
if (strncmp("title", channelChild->name, strlen(channelChild->name) - 1) == 0) {
|
||||
if (channelChild->children != NULL) {
|
||||
printf("%s: %s\n", channelChild->name, channelChild->children->content);
|
||||
}
|
||||
} else if (strncmp("description", channelChild->name,
|
||||
strlen(channelChild->name)) == 0) {
|
||||
if (channelChild->children != NULL) {
|
||||
printf("%s: %s\n", channelChild->name, channelChild->children->content);
|
||||
}
|
||||
} else if (strncmp("item", channelChild->name,
|
||||
strlen(channelChild->name)) == 0) {
|
||||
article *article = parse_item(channelChild->children);
|
||||
// TODO: Store articles in array to return
|
||||
}
|
||||
channelChild = channelChild->next;
|
||||
}
|
||||
}
|
||||
|
||||
void print_xml_elements(xml* x) {
|
||||
if (x->xdp == NULL || x->xdp->children == NULL) {
|
||||
printf("Unable to parse XML\n");
|
||||
}
|
||||
|
||||
xmlNode* rss = x->xdp->children;
|
||||
xmlNode* channel = rss->children->next;
|
||||
xmlNode* channelChild = channel->children;
|
||||
int articleCount = 0;
|
||||
while (channelChild != NULL) {
|
||||
if (strncmp("title", channelChild->name, strlen(channelChild->name)) == 0) {
|
||||
if (channelChild->children != NULL) {
|
||||
printf("%s: %s\n", channelChild->name, channelChild->children->content);
|
||||
}
|
||||
} else if (strncmp("description", channelChild->name, strlen(channelChild->name)) == 0) {
|
||||
if (channelChild->children != NULL) {
|
||||
printf("%s: %s\n", channelChild->name, channelChild->children->content);
|
||||
}
|
||||
} else if (strncmp("item", channelChild->name, strlen(channelChild->name)) == 0) {
|
||||
articleCount++;
|
||||
xmlNode* itemData = channelChild->children;
|
||||
printf("Article info:\n");
|
||||
do {
|
||||
if (strncmp("text", itemData->name, strlen(itemData->name)) != 0) {
|
||||
if (strlen(itemData->children->content) > 200) {
|
||||
printf("%s: %ld chars\n", itemData->name, strlen(itemData->children->content));
|
||||
} else {
|
||||
printf("%s: %s\n", itemData->name, itemData->children->content);
|
||||
}
|
||||
}
|
||||
itemData = itemData->next;
|
||||
} while (itemData != NULL);
|
||||
printf("\n");
|
||||
}
|
||||
channelChild = channelChild->next;
|
||||
xmlNode* root = x->xdp->children;
|
||||
if (strcmp(root->name, "rss") == 0) {
|
||||
parse_rss(root);
|
||||
} else if (strcmp(root->name, "feed") == 0){
|
||||
parse_feed(root);
|
||||
} else {
|
||||
printf("Unable to parse XML\n");
|
||||
// TODO: Return error
|
||||
return;
|
||||
}
|
||||
|
||||
rss = NULL;
|
||||
channel = NULL;
|
||||
int articleCount = 0;
|
||||
|
||||
printf("Found %d articles in the feed\n", articleCount);
|
||||
root = NULL;
|
||||
printf("Found %d articles in the feed\n", articleCount);
|
||||
}
|
||||
|
||||
void cleanup_xml(xml* x) {
|
||||
free(x->data);
|
||||
free(x->errBuf);
|
||||
xmlFreeDoc(x->xdp);
|
||||
void cleanup_xml(xml* x) {
|
||||
if (!x) return;
|
||||
if (x->data) free(x->data);
|
||||
if (x->xdp) xmlFreeDoc(x->xdp);
|
||||
free(x);
|
||||
}
|
||||
|
||||
void setup_database(sqlite3* handle) {
|
||||
char* init_sql =
|
||||
char* init_sql =
|
||||
#include "schema.sql"
|
||||
;
|
||||
char* err;
|
||||
|
@ -160,6 +177,4 @@ void setup_database(sqlite3* handle) {
|
|||
}
|
||||
}
|
||||
|
||||
void close_database(sqlite3* handle) {
|
||||
sqlite3_close(handle);
|
||||
}
|
||||
void close_database(sqlite3* handle) { sqlite3_close(handle); }
|
||||
|
|
15
src/feader.h
15
src/feader.h
|
@ -8,7 +8,6 @@ typedef struct {
|
|||
int size;
|
||||
long totalSize;
|
||||
char *url;
|
||||
char *errBuf;
|
||||
xmlDocPtr xdp;
|
||||
} xml;
|
||||
|
||||
|
@ -19,11 +18,11 @@ typedef struct {
|
|||
long feed_id;
|
||||
char *author;
|
||||
bool is_favorite;
|
||||
char *featured_image text;
|
||||
char *content text;
|
||||
char *excerpt text;
|
||||
char *featured_image;
|
||||
char *content;
|
||||
char *excerpt;
|
||||
bool is_read;
|
||||
char *published_date date;
|
||||
char *published_date;
|
||||
} article;
|
||||
|
||||
typedef struct {
|
||||
|
@ -33,11 +32,15 @@ typedef struct {
|
|||
char *feed_url;
|
||||
char *icon;
|
||||
bool is_favorite;
|
||||
char *last_polled date;
|
||||
char *last_polled;
|
||||
char *filter_accept;
|
||||
char *filter_reject;
|
||||
} feed;
|
||||
|
||||
typedef struct {
|
||||
|
||||
} feed_with_articles;
|
||||
|
||||
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x);
|
||||
|
||||
size_t get_xml(xml *x);
|
||||
|
|
Loading…
Reference in a new issue