static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) { if (!b) b = ""; htmlParserCtxtPtr parser_context = htmlNewParserCtxt(); htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR | HTML_PARSE_RECOVER); htmlFreeParserCtxt(parser_context); return document; } static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) { xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document); xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx); xmlXPathFreeContext(xpath_ctx); return nodes; } typedef void (*node_function_t)(xmlNodePtr node, void * data); static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) { xmlNodeSetPtr nodeset = nodes->nodesetval; int i, size = nodeset->nodeNr; for (i = 0; i < size; i++) { xmlNodePtr cur; cur = (xmlNodePtr)nodeset->nodeTab[i]; f(cur, data); } } void printLinkNode(xmlNodePtr node, void * data) { if (node->type == XML_ELEMENT_NODE) { xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href"); if (href) { printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href")); } } }