Add XML parsing

This commit is contained in:
William Brawner 2018-02-08 21:42:23 -06:00
parent 3af94c0e98
commit 4175231648

View file

@ -1,28 +1,81 @@
#include "curl/curl.h"
#include "curl/easy.h"
#include "libxml/parser.h"
#include "libxml/tree.h"
#include "libxml/xmlreader.h"
#include "libxml/xmlstring.h"
#include <stdlib.h>
#include <string.h>
typedef struct {
char* data;
int size;
char* url;
char* errBuf;
} xml;
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x);
size_t get_xml(xml *x, char *url);
void parse_xml_items(xml *x, xmlDocPtr *doc);
xml x;
xmlDocPtr xdp;
int main(int argc, char **argv) {
xml x;
if (argc < 2) {
printf("Please provide the URL of a feed to parse.\n");
return 1;
} else {
printf("Attempting to retrieve XML for URL: %s\n", argv[1]);
}
x.size = 0;
x.data = malloc(1);
x.data[0] = '\0';
x.url = argv[1];
int res = get_xml(&x, x.url);
// printf("\n%s\n", x.data);
if (res == CURLE_OK) {
printf("XML data retrieved from server\n");
} else {
printf("Unable to retrieve XML for URL: %s\n", x.url);
printf("%s\n", x.errBuf);
}
parse_xml_items(&x, &xdp);
if (xdp == NULL || xdp->children == NULL) {
printf("Unable to parse XML\n");
}
xmlNode* rss = xdp->children;
xmlNode* channel = rss->children->next;
xmlNode* channelChild = channel->children;
int articleCount = 0;
while (channelChild != NULL) {
if (strncmp("title", channelChild->name, strlen(channelChild->name)) == 0) {
if (channelChild->children != NULL) {
printf("%s: %s\n", channelChild->name, channelChild->children->content);
}
} else if (strncmp("description", channelChild->name, strlen(channelChild->name)) == 0) {
if (channelChild->children != NULL) {
printf("%s: %s\n", channelChild->name, channelChild->children->content);
}
} else if (strncmp("item", channelChild->name, strlen(channelChild->name)) == 0) {
articleCount++;
}
channelChild = channelChild->next;
}
printf("Found %d articles in the feed\n", articleCount);
int res = get_xml(&x, "http://127.0.0.1/wbrawner-jekyll/_site/feed.xml");
printf("\n%s\n", x.data);
free(x.data);
return res;
free(xdp);
return 0;
}
size_t my_write_callback(char *ptr, size_t size, size_t nmemb, xml *x) {
@ -48,7 +101,20 @@ size_t get_xml(xml* x, char* url) {
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, *my_write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, x);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, x->errBuf);
CURLcode res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
return res;
}
}
void parse_xml_items(xml *x, xmlDocPtr *doc) {
xmlChar* xChar = malloc(x->size);
xChar = xmlCharStrndup(x->data, x->size);
(*doc) = xmlReadDoc(
xChar,
x->url,
NULL,
XML_PARSE_RECOVER
);
free(xChar);
}