Public Member Functions | |
def | __init__ |
def | extract_germandeli_structure |
def | load_structure_from_file |
def | log |
def | process_node |
def | process_page |
def | process_product_page |
def | save_document |
def | save_product |
def | start |
def | trace_structure_node_to_xml |
def | wait_for_threads |
Public Attributes | |
base_url | |
data_directory | |
max_threads | |
Static Public Attributes | |
string | base_url = "http://www.germandeli.com/" |
string | data_directory = "" |
doc = None | |
logger = None |
Definition at line 68 of file crawler.py.
def crawler.GermanDeliCrawler.__init__ | ( | self, | |
data_directory, | |||
max_threads = 999999 , |
|||
base_url = "http://www.germandeli.com/" |
|||
) |
Definition at line 76 of file crawler.py.
Definition at line 98 of file crawler.py.
Definition at line 136 of file crawler.py.
def crawler.GermanDeliCrawler.log | ( | self, | |
message | |||
) |
Definition at line 290 of file crawler.py.
def crawler.GermanDeliCrawler.process_node | ( | self, | |
node, | |||
data_directory | |||
) |
Definition at line 153 of file crawler.py.
def crawler.GermanDeliCrawler.process_page | ( | self, | |
page, | |||
xml_node, | |||
data_directory | |||
) |
Definition at line 178 of file crawler.py.
def crawler.GermanDeliCrawler.process_product_page | ( | self, | |
product_page | |||
) |
Definition at line 224 of file crawler.py.
def crawler.GermanDeliCrawler.save_document | ( | self | ) |
Definition at line 141 of file crawler.py.
def crawler.GermanDeliCrawler.save_product | ( | self, | |
product_id, | |||
picture_url, | |||
data_directory | |||
) |
Definition at line 279 of file crawler.py.
def crawler.GermanDeliCrawler.start | ( | self | ) |
Definition at line 287 of file crawler.py.
def crawler.GermanDeliCrawler.trace_structure_node_to_xml | ( | self, | |
node, | |||
xml_node | |||
) |
Definition at line 119 of file crawler.py.
def crawler.GermanDeliCrawler.wait_for_threads | ( | self | ) |
Definition at line 213 of file crawler.py.
string crawler.GermanDeliCrawler::base_url = "http://www.germandeli.com/" [static] |
Definition at line 70 of file crawler.py.
Definition at line 76 of file crawler.py.
string crawler.GermanDeliCrawler::data_directory = "" [static] |
Definition at line 71 of file crawler.py.
Definition at line 76 of file crawler.py.
crawler.GermanDeliCrawler::doc = None [static] |
Definition at line 73 of file crawler.py.
crawler.GermanDeliCrawler::logger = None [static] |
Definition at line 74 of file crawler.py.
Definition at line 76 of file crawler.py.