00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <stdio.h>
00034 #include <string.h>
00035 #include <stdlib.h>
00036 #include <string>
00037 #include <curl/curl.h>
00038 #include <libxml/HTMLparser.h>
00039
00040
00041
00042
00043
00044 #ifdef _MSC_VER
00045 #define COMPARE(a, b) (!_stricmp((a), (b)))
00046 #else
00047 #define COMPARE(a, b) (!strcasecmp((a), (b)))
00048 #endif
00049
00050
00051
00052
00053
00054 struct Context
00055 {
00056 Context(): addTitle(false) { }
00057
00058 bool addTitle;
00059 std::string title;
00060 };
00061
00062
00063
00064
00065 static char errorBuffer[CURL_ERROR_SIZE];
00066 static std::string buffer;
00067
00068
00069
00070
00071
00072 static int writer(char *data, size_t size, size_t nmemb,
00073 std::string *writerData)
00074 {
00075 if (writerData == NULL)
00076 return 0;
00077
00078 writerData->append(data, size*nmemb);
00079
00080 return size * nmemb;
00081 }
00082
00083
00084
00085
00086
00087 static bool init(CURL *&conn, char *url)
00088 {
00089 CURLcode code;
00090
00091 conn = curl_easy_init();
00092
00093 if (conn == NULL)
00094 {
00095 fprintf(stderr, "Failed to create CURL connection\n");
00096
00097 exit(EXIT_FAILURE);
00098 }
00099
00100 code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
00101 if (code != CURLE_OK)
00102 {
00103 fprintf(stderr, "Failed to set error buffer [%d]\n", code);
00104
00105 return false;
00106 }
00107
00108 code = curl_easy_setopt(conn, CURLOPT_URL, url);
00109 if (code != CURLE_OK)
00110 {
00111 fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
00112
00113 return false;
00114 }
00115
00116 code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
00117 if (code != CURLE_OK)
00118 {
00119 fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
00120
00121 return false;
00122 }
00123
00124 code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
00125 if (code != CURLE_OK)
00126 {
00127 fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
00128
00129 return false;
00130 }
00131
00132 code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
00133 if (code != CURLE_OK)
00134 {
00135 fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
00136
00137 return false;
00138 }
00139
00140 return true;
00141 }
00142
00143
00144
00145
00146
00147 static void StartElement(void *voidContext,
00148 const xmlChar *name,
00149 const xmlChar **attributes)
00150 {
00151 Context *context = (Context *)voidContext;
00152
00153 if (COMPARE((char *)name, "TITLE"))
00154 {
00155 context->title = "";
00156 context->addTitle = true;
00157 }
00158 (void) attributes;
00159 }
00160
00161
00162
00163
00164
00165 static void EndElement(void *voidContext,
00166 const xmlChar *name)
00167 {
00168 Context *context = (Context *)voidContext;
00169
00170 if (COMPARE((char *)name, "TITLE"))
00171 context->addTitle = false;
00172 }
00173
00174
00175
00176
00177
00178 static void handleCharacters(Context *context,
00179 const xmlChar *chars,
00180 int length)
00181 {
00182 if (context->addTitle)
00183 context->title.append((char *)chars, length);
00184 }
00185
00186
00187
00188
00189
00190 static void Characters(void *voidContext,
00191 const xmlChar *chars,
00192 int length)
00193 {
00194 Context *context = (Context *)voidContext;
00195
00196 handleCharacters(context, chars, length);
00197 }
00198
00199
00200
00201
00202
00203 static void cdata(void *voidContext,
00204 const xmlChar *chars,
00205 int length)
00206 {
00207 Context *context = (Context *)voidContext;
00208
00209 handleCharacters(context, chars, length);
00210 }
00211
00212
00213
00214
00215
00216 static htmlSAXHandler saxHandler =
00217 {
00218 NULL,
00219 NULL,
00220 NULL,
00221 NULL,
00222 NULL,
00223 NULL,
00224 NULL,
00225 NULL,
00226 NULL,
00227 NULL,
00228 NULL,
00229 NULL,
00230 NULL,
00231 NULL,
00232 StartElement,
00233 EndElement,
00234 NULL,
00235 Characters,
00236 NULL,
00237 NULL,
00238 NULL,
00239 NULL,
00240 NULL,
00241 NULL,
00242 NULL,
00243 cdata,
00244 NULL
00245 };
00246
00247
00248
00249
00250
00251 static void parseHtml(const std::string &html,
00252 std::string &title)
00253 {
00254 htmlParserCtxtPtr ctxt;
00255 Context context;
00256
00257 ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
00258 XML_CHAR_ENCODING_NONE);
00259
00260 htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
00261 htmlParseChunk(ctxt, "", 0, 1);
00262
00263 htmlFreeParserCtxt(ctxt);
00264
00265 title = context.title;
00266 }
00267
00268 int main(int argc, char *argv[])
00269 {
00270 CURL *conn = NULL;
00271 CURLcode code;
00272 std::string title;
00273
00274
00275
00276 if (argc != 2)
00277 {
00278 fprintf(stderr, "Usage: %s <url>\n", argv[0]);
00279
00280 exit(EXIT_FAILURE);
00281 }
00282
00283 curl_global_init(CURL_GLOBAL_DEFAULT);
00284
00285
00286
00287 if (!init(conn, argv[1]))
00288 {
00289 fprintf(stderr, "Connection initializion failed\n");
00290
00291 exit(EXIT_FAILURE);
00292 }
00293
00294
00295
00296 code = curl_easy_perform(conn);
00297 curl_easy_cleanup(conn);
00298
00299 if (code != CURLE_OK)
00300 {
00301 fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
00302
00303 exit(EXIT_FAILURE);
00304 }
00305
00306
00307
00308 parseHtml(buffer, title);
00309
00310
00311
00312 printf("Title: %s\n", title.c_str());
00313
00314 return EXIT_SUCCESS;
00315 }