Public Member Functions | |
| def | __init__ |
| def | extract_germandeli_structure |
| def | load_structure_from_file |
| def | log |
| def | process_node |
| def | process_page |
| def | process_product_page |
| def | save_document |
| def | save_product |
| def | start |
| def | trace_structure_node_to_xml |
| def | wait_for_threads |
Public Attributes | |
| base_url | |
| data_directory | |
| max_threads | |
Static Public Attributes | |
| string | base_url = "http://www.germandeli.com/" |
| string | data_directory = "" |
| doc = None | |
| logger = None | |
Definition at line 68 of file crawler.py.
| def crawler.GermanDeliCrawler.__init__ | ( | self, | |
| data_directory, | |||
max_threads = 999999, |
|||
base_url = "http://www.germandeli.com/" |
|||
| ) |
Definition at line 76 of file crawler.py.
Definition at line 98 of file crawler.py.
Definition at line 136 of file crawler.py.
| def crawler.GermanDeliCrawler.log | ( | self, | |
| message | |||
| ) |
Definition at line 290 of file crawler.py.
| def crawler.GermanDeliCrawler.process_node | ( | self, | |
| node, | |||
| data_directory | |||
| ) |
Definition at line 153 of file crawler.py.
| def crawler.GermanDeliCrawler.process_page | ( | self, | |
| page, | |||
| xml_node, | |||
| data_directory | |||
| ) |
Definition at line 178 of file crawler.py.
| def crawler.GermanDeliCrawler.process_product_page | ( | self, | |
| product_page | |||
| ) |
Definition at line 224 of file crawler.py.
| def crawler.GermanDeliCrawler.save_document | ( | self | ) |
Definition at line 141 of file crawler.py.
| def crawler.GermanDeliCrawler.save_product | ( | self, | |
| product_id, | |||
| picture_url, | |||
| data_directory | |||
| ) |
Definition at line 279 of file crawler.py.
| def crawler.GermanDeliCrawler.start | ( | self | ) |
Definition at line 287 of file crawler.py.
| def crawler.GermanDeliCrawler.trace_structure_node_to_xml | ( | self, | |
| node, | |||
| xml_node | |||
| ) |
Definition at line 119 of file crawler.py.
| def crawler.GermanDeliCrawler.wait_for_threads | ( | self | ) |
Definition at line 213 of file crawler.py.
string crawler.GermanDeliCrawler::base_url = "http://www.germandeli.com/" [static] |
Definition at line 70 of file crawler.py.
Definition at line 76 of file crawler.py.
string crawler.GermanDeliCrawler::data_directory = "" [static] |
Definition at line 71 of file crawler.py.
Definition at line 76 of file crawler.py.
crawler.GermanDeliCrawler::doc = None [static] |
Definition at line 73 of file crawler.py.
crawler.GermanDeliCrawler::logger = None [static] |
Definition at line 74 of file crawler.py.
Definition at line 76 of file crawler.py.