00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include <stdio.h>
00031 #include <tidy/tidy.h>
00032 #include <tidy/buffio.h>
00033 #include <curl/curl.h>
00034
00035
00036 uint write_cb(char *in, uint size, uint nmemb, TidyBuffer *out)
00037 {
00038 uint r;
00039 r = size * nmemb;
00040 tidyBufAppend(out, in, r);
00041 return r;
00042 }
00043
00044
00045 void dumpNode(TidyDoc doc, TidyNode tnod, int indent)
00046 {
00047 TidyNode child;
00048 for(child = tidyGetChild(tnod); child; child = tidyGetNext(child) ) {
00049 ctmbstr name = tidyNodeGetName(child);
00050 if(name) {
00051
00052 TidyAttr attr;
00053 printf("%*.*s%s ", indent, indent, "<", name);
00054
00055 for(attr=tidyAttrFirst(child); attr; attr=tidyAttrNext(attr) ) {
00056 printf(tidyAttrName(attr));
00057 tidyAttrValue(attr)?printf("=\"%s\" ",
00058 tidyAttrValue(attr)):printf(" ");
00059 }
00060 printf(">\n");
00061 }
00062 else {
00063
00064 TidyBuffer buf;
00065 tidyBufInit(&buf);
00066 tidyNodeGetText(doc, child, &buf);
00067 printf("%*.*s\n", indent, indent, buf.bp?(char *)buf.bp:"");
00068 tidyBufFree(&buf);
00069 }
00070 dumpNode(doc, child, indent + 4);
00071 }
00072 }
00073
00074
00075 int main(int argc, char **argv)
00076 {
00077 CURL *curl;
00078 char curl_errbuf[CURL_ERROR_SIZE];
00079 TidyDoc tdoc;
00080 TidyBuffer docbuf = {0};
00081 TidyBuffer tidy_errbuf = {0};
00082 int err;
00083 if(argc == 2) {
00084 curl = curl_easy_init();
00085 curl_easy_setopt(curl, CURLOPT_URL, argv[1]);
00086 curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
00087 curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
00088 curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
00089 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
00090
00091 tdoc = tidyCreate();
00092 tidyOptSetBool(tdoc, TidyForceOutput, yes);
00093 tidyOptSetInt(tdoc, TidyWrapLen, 4096);
00094 tidySetErrorBuffer(tdoc, &tidy_errbuf);
00095 tidyBufInit(&docbuf);
00096
00097 curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf);
00098 err=curl_easy_perform(curl);
00099 if(!err) {
00100 err = tidyParseBuffer(tdoc, &docbuf);
00101 if(err >= 0) {
00102 err = tidyCleanAndRepair(tdoc);
00103 if(err >= 0) {
00104 err = tidyRunDiagnostics(tdoc);
00105 if(err >= 0) {
00106 dumpNode(tdoc, tidyGetRoot(tdoc), 0);
00107 fprintf(stderr, "%s\n", tidy_errbuf.bp);
00108 }
00109 }
00110 }
00111 }
00112 else
00113 fprintf(stderr, "%s\n", curl_errbuf);
00114
00115
00116 curl_easy_cleanup(curl);
00117 tidyBufFree(&docbuf);
00118 tidyBufFree(&tidy_errbuf);
00119 tidyRelease(tdoc);
00120 return err;
00121
00122 }
00123 else
00124 printf("usage: %s <url>\n", argv[0]);
00125
00126 return 0;
00127 }