WIP: parse both atom and rss feeds
Signed-off-by: William Brawner <me@wbrawner.com>
This commit is contained in:
parent
abe77be5a7
commit
273c060674
3 changed files with 92 additions and 73 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -6,3 +6,4 @@ out/
|
||||||
tmp/
|
tmp/
|
||||||
*.sln
|
*.sln
|
||||||
*.swp
|
*.swp
|
||||||
|
tags
|
||||||
|
|
139
src/feader.c
139
src/feader.c
|
@ -10,7 +10,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char** argv) {
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
printf("Please provide the URL of a feed to parse.\n");
|
printf("Please provide the URL of a feed to parse.\n");
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -18,18 +18,15 @@ int main(int argc, char **argv) {
|
||||||
printf("Attempting to retrieve XML for URL: %s\n", argv[1]);
|
printf("Attempting to retrieve XML for URL: %s\n", argv[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
sqlite3 *db;
|
sqlite3* db;
|
||||||
setup_database(db);
|
setup_database(db);
|
||||||
|
|
||||||
fead_xml(argv[1]);
|
fead_xml(argv[1]);
|
||||||
|
|
||||||
close_database(db);
|
close_database(db);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void fead_xml(char* url) {
|
void fead_xml(char* url) {
|
||||||
xml *x = malloc(sizeof(xml));
|
xml* x = malloc(sizeof(xml));
|
||||||
get_xml_ptr(x, url);
|
get_xml_ptr(x, url);
|
||||||
|
|
||||||
int res = get_xml(x);
|
int res = get_xml(x);
|
||||||
|
@ -37,13 +34,9 @@ void fead_xml(char* url) {
|
||||||
printf("XML data retrieved from server\n");
|
printf("XML data retrieved from server\n");
|
||||||
} else {
|
} else {
|
||||||
printf("Unable to retrieve XML for URL: %s\n", x->url);
|
printf("Unable to retrieve XML for URL: %s\n", x->url);
|
||||||
printf("%s\n", x->errBuf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_xml_items(x);
|
parse_xml_items(x);
|
||||||
|
|
||||||
print_xml_elements(x);
|
print_xml_elements(x);
|
||||||
|
|
||||||
cleanup_xml(x);
|
cleanup_xml(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,21 +47,18 @@ void get_xml_ptr(xml* x, char* url) {
|
||||||
x->url = url;
|
x->url = url;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x) {
|
size_t my_write_callback(char* ptr, size_t size, size_t nmemb, xml* x) {
|
||||||
int byte_size = (size * nmemb);
|
int byte_size = (size * nmemb);
|
||||||
printf("byte_size: %d\n", byte_size);
|
|
||||||
int new_size = x->size + byte_size;
|
int new_size = x->size + byte_size;
|
||||||
printf("new_size size: %d\n", new_size);
|
|
||||||
x->data = realloc(x->data, new_size + 1);
|
x->data = realloc(x->data, new_size + 1);
|
||||||
memcpy(x->data + x->size, ptr, byte_size);
|
memcpy(x->data + x->size, ptr, byte_size);
|
||||||
x->data[new_size] = '\0';
|
x->data[new_size] = '\0';
|
||||||
x->size = new_size;
|
x->size = new_size;
|
||||||
printf("New xml size: %d\n", x->size);
|
|
||||||
return byte_size;
|
return byte_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t get_xml(xml* x) {
|
size_t get_xml(xml* x) {
|
||||||
CURL *curl = curl_easy_init();
|
CURL* curl = curl_easy_init();
|
||||||
|
|
||||||
if (!curl) {
|
if (!curl) {
|
||||||
printf("Unable to instantiate curl object. Aborting");
|
printf("Unable to instantiate curl object. Aborting");
|
||||||
|
@ -76,74 +66,101 @@ size_t get_xml(xml* x) {
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, x->url);
|
curl_easy_setopt(curl, CURLOPT_URL, x->url);
|
||||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, *my_write_callback);
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, my_write_callback);
|
||||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, x);
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, x);
|
||||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, x->errBuf);
|
|
||||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
||||||
|
// curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
|
||||||
CURLcode res = curl_easy_perform(curl);
|
CURLcode res = curl_easy_perform(curl);
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
curl_global_cleanup();
|
curl_global_cleanup();
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_xml_items(xml *x) {
|
void parse_xml_items(xml* x) {
|
||||||
xmlChar* xChar = xmlCharStrndup(x->data, x->size);
|
xmlChar* xChar = xmlCharStrndup(x->data, x->size);
|
||||||
x->xdp = xmlReadDoc(
|
x->xdp = xmlReadDoc(xChar, x->url, NULL, XML_PARSE_RECOVER);
|
||||||
xChar,
|
|
||||||
x->url,
|
|
||||||
NULL,
|
|
||||||
XML_PARSE_RECOVER
|
|
||||||
);
|
|
||||||
xmlFree(xChar);
|
xmlFree(xChar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void parse_entry(xmlNode *root) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
article * parse_item(xmlNode *item) {
|
||||||
|
printf("Article info:\n");
|
||||||
|
do {
|
||||||
|
if (strncmp("text", item->name, strlen(item->name)) != 0) {
|
||||||
|
if (strlen(item->children->content) > 200) {
|
||||||
|
printf("%s: %ld chars\n", item->name,
|
||||||
|
strlen(item->children->content));
|
||||||
|
} else {
|
||||||
|
printf("%s: %s\n", item->name, item->children->content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
item = item->next;
|
||||||
|
} while (item != NULL);
|
||||||
|
printf("\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
feed * parse_feed(xmlNode *feed) {
|
||||||
|
while (feed && strcmp(feed->name, "entry") != 0) {
|
||||||
|
feed = feed->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
feed * parse_rss(xmlNode *rss) {
|
||||||
|
xmlNode* channel = rss->children->next;
|
||||||
|
xmlNode* channelChild;
|
||||||
|
while (channel && strcmp(channel->name, "channel") != 0) {
|
||||||
|
channel = channel->next;
|
||||||
|
}
|
||||||
|
while (channelChild != NULL) {
|
||||||
|
if (strncmp("title", channelChild->name, strlen(channelChild->name) - 1) == 0) {
|
||||||
|
if (channelChild->children != NULL) {
|
||||||
|
printf("%s: %s\n", channelChild->name, channelChild->children->content);
|
||||||
|
}
|
||||||
|
} else if (strncmp("description", channelChild->name,
|
||||||
|
strlen(channelChild->name)) == 0) {
|
||||||
|
if (channelChild->children != NULL) {
|
||||||
|
printf("%s: %s\n", channelChild->name, channelChild->children->content);
|
||||||
|
}
|
||||||
|
} else if (strncmp("item", channelChild->name,
|
||||||
|
strlen(channelChild->name)) == 0) {
|
||||||
|
article *article = parse_item(channelChild->children);
|
||||||
|
// TODO: Store articles in array to return
|
||||||
|
}
|
||||||
|
channelChild = channelChild->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void print_xml_elements(xml* x) {
|
void print_xml_elements(xml* x) {
|
||||||
if (x->xdp == NULL || x->xdp->children == NULL) {
|
if (x->xdp == NULL || x->xdp->children == NULL) {
|
||||||
printf("Unable to parse XML\n");
|
printf("Unable to parse XML\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlNode* rss = x->xdp->children;
|
xmlNode* root = x->xdp->children;
|
||||||
xmlNode* channel = rss->children->next;
|
if (strcmp(root->name, "rss") == 0) {
|
||||||
xmlNode* channelChild = channel->children;
|
parse_rss(root);
|
||||||
int articleCount = 0;
|
} else if (strcmp(root->name, "feed") == 0){
|
||||||
while (channelChild != NULL) {
|
parse_feed(root);
|
||||||
if (strncmp("title", channelChild->name, strlen(channelChild->name)) == 0) {
|
} else {
|
||||||
if (channelChild->children != NULL) {
|
printf("Unable to parse XML\n");
|
||||||
printf("%s: %s\n", channelChild->name, channelChild->children->content);
|
// TODO: Return error
|
||||||
}
|
return;
|
||||||
} else if (strncmp("description", channelChild->name, strlen(channelChild->name)) == 0) {
|
|
||||||
if (channelChild->children != NULL) {
|
|
||||||
printf("%s: %s\n", channelChild->name, channelChild->children->content);
|
|
||||||
}
|
|
||||||
} else if (strncmp("item", channelChild->name, strlen(channelChild->name)) == 0) {
|
|
||||||
articleCount++;
|
|
||||||
xmlNode* itemData = channelChild->children;
|
|
||||||
printf("Article info:\n");
|
|
||||||
do {
|
|
||||||
if (strncmp("text", itemData->name, strlen(itemData->name)) != 0) {
|
|
||||||
if (strlen(itemData->children->content) > 200) {
|
|
||||||
printf("%s: %ld chars\n", itemData->name, strlen(itemData->children->content));
|
|
||||||
} else {
|
|
||||||
printf("%s: %s\n", itemData->name, itemData->children->content);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
itemData = itemData->next;
|
|
||||||
} while (itemData != NULL);
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
channelChild = channelChild->next;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rss = NULL;
|
int articleCount = 0;
|
||||||
channel = NULL;
|
|
||||||
|
|
||||||
|
root = NULL;
|
||||||
printf("Found %d articles in the feed\n", articleCount);
|
printf("Found %d articles in the feed\n", articleCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cleanup_xml(xml* x) {
|
void cleanup_xml(xml* x) {
|
||||||
free(x->data);
|
if (!x) return;
|
||||||
free(x->errBuf);
|
if (x->data) free(x->data);
|
||||||
xmlFreeDoc(x->xdp);
|
if (x->xdp) xmlFreeDoc(x->xdp);
|
||||||
free(x);
|
free(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -160,6 +177,4 @@ void setup_database(sqlite3* handle) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void close_database(sqlite3* handle) {
|
void close_database(sqlite3* handle) { sqlite3_close(handle); }
|
||||||
sqlite3_close(handle);
|
|
||||||
}
|
|
||||||
|
|
15
src/feader.h
15
src/feader.h
|
@ -8,7 +8,6 @@ typedef struct {
|
||||||
int size;
|
int size;
|
||||||
long totalSize;
|
long totalSize;
|
||||||
char *url;
|
char *url;
|
||||||
char *errBuf;
|
|
||||||
xmlDocPtr xdp;
|
xmlDocPtr xdp;
|
||||||
} xml;
|
} xml;
|
||||||
|
|
||||||
|
@ -19,11 +18,11 @@ typedef struct {
|
||||||
long feed_id;
|
long feed_id;
|
||||||
char *author;
|
char *author;
|
||||||
bool is_favorite;
|
bool is_favorite;
|
||||||
char *featured_image text;
|
char *featured_image;
|
||||||
char *content text;
|
char *content;
|
||||||
char *excerpt text;
|
char *excerpt;
|
||||||
bool is_read;
|
bool is_read;
|
||||||
char *published_date date;
|
char *published_date;
|
||||||
} article;
|
} article;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -33,11 +32,15 @@ typedef struct {
|
||||||
char *feed_url;
|
char *feed_url;
|
||||||
char *icon;
|
char *icon;
|
||||||
bool is_favorite;
|
bool is_favorite;
|
||||||
char *last_polled date;
|
char *last_polled;
|
||||||
char *filter_accept;
|
char *filter_accept;
|
||||||
char *filter_reject;
|
char *filter_reject;
|
||||||
} feed;
|
} feed;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
|
||||||
|
} feed_with_articles;
|
||||||
|
|
||||||
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x);
|
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x);
|
||||||
|
|
||||||
size_t get_xml(xml *x);
|
size_t get_xml(xml *x);
|
||||||
|
|
Loading…
Reference in a new issue