$search
Public Member Functions | |
def | __init__ |
def | extract_germandeli_structure |
def | load_structure_from_file |
def | log |
def | process_node |
def | process_page |
def | process_product_page |
def | save_document |
def | save_product |
def | start |
def | trace_structure_node_to_xml |
def | wait_for_threads |
Public Attributes | |
base_url | |
data_directory | |
max_threads | |
Static Public Attributes | |
string | base_url = "http://www.germandeli.com/" |
string | data_directory = "" |
doc = None | |
logger = None |
Definition at line 68 of file crawler.py.
def crawler::GermanDeliCrawler::__init__ | ( | self, | ||
data_directory, | ||||
max_threads = 999999 , |
||||
base_url = "http://www.germandeli.com/" | ||||
) |
Definition at line 76 of file crawler.py.
def crawler::GermanDeliCrawler::extract_germandeli_structure | ( | self | ) |
Definition at line 98 of file crawler.py.
def crawler::GermanDeliCrawler::load_structure_from_file | ( | self | ) |
Definition at line 136 of file crawler.py.
def crawler::GermanDeliCrawler::log | ( | self, | ||
message | ||||
) |
Definition at line 290 of file crawler.py.
def crawler::GermanDeliCrawler::process_node | ( | self, | ||
node, | ||||
data_directory | ||||
) |
Definition at line 153 of file crawler.py.
def crawler::GermanDeliCrawler::process_page | ( | self, | ||
page, | ||||
xml_node, | ||||
data_directory | ||||
) |
Definition at line 178 of file crawler.py.
def crawler::GermanDeliCrawler::process_product_page | ( | self, | ||
product_page | ||||
) |
Definition at line 224 of file crawler.py.
def crawler::GermanDeliCrawler::save_document | ( | self | ) |
Definition at line 141 of file crawler.py.
def crawler::GermanDeliCrawler::save_product | ( | self, | ||
product_id, | ||||
picture_url, | ||||
data_directory | ||||
) |
Definition at line 279 of file crawler.py.
def crawler::GermanDeliCrawler::start | ( | self | ) |
Definition at line 287 of file crawler.py.
def crawler::GermanDeliCrawler::trace_structure_node_to_xml | ( | self, | ||
node, | ||||
xml_node | ||||
) |
Definition at line 119 of file crawler.py.
def crawler::GermanDeliCrawler::wait_for_threads | ( | self | ) |
Definition at line 213 of file crawler.py.
Definition at line 77 of file crawler.py.
string crawler::GermanDeliCrawler::base_url = "http://www.germandeli.com/" [static] |
Definition at line 70 of file crawler.py.
Definition at line 79 of file crawler.py.
string crawler::GermanDeliCrawler::data_directory = "" [static] |
Definition at line 71 of file crawler.py.
crawler::GermanDeliCrawler::doc = None [static] |
Definition at line 73 of file crawler.py.
crawler::GermanDeliCrawler::logger = None [static] |
Definition at line 74 of file crawler.py.
Definition at line 78 of file crawler.py.