$search
00001 # Aseba - an event-based framework for distributed robot control 00002 # Copyright (C) 2007--2011: 00003 # Stephane Magnenat <stephane at magnenat dot net> 00004 # (http://stephane.magnenat.net) 00005 # and other contributors, see authors.txt for details 00006 # 00007 # This program is free software: you can redistribute it and/or modify 00008 # it under the terms of the GNU Lesser General Public License as published 00009 # by the Free Software Foundation, version 3 of the License. 00010 # 00011 # This program is distributed in the hope that it will be useful, 00012 # but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 # GNU Lesser General Public License for more details. 00015 # 00016 # You should have received a copy of the GNU Lesser General Public License 00017 # along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 00019 # System lib 00020 import os 00021 import os.path 00022 import sys 00023 import urlparse 00024 00025 # Custom lib 00026 from wikidot.myparser import MyParser 00027 from wikidot.urltoname import urltoname 00028 00029 00030 class FixURL(MyParser): 00031 """Fix HTML links (as well as images), so they point to 00032 the local files. If a local file is not available, the full 00033 link to the remote file is built. 00034 00035 The list of available local files is given at initialization.""" 00036 00037 def __init__(self, links, host): 00038 """Initialization. 00039 00040 links: set of available local files 00041 host: full path to remote host""" 00042 MyParser.__init__(self) 00043 self.local_links = links 00044 self.remote_host = host 00045 self.reset() 00046 00047 def reset(self): 00048 MyParser.reset(self) 00049 self.local_set = set() # Set of local links 00050 self.remote_set = set() # Set of remote links 00051 00052 # Public functions 00053 def get_local_links(self): 00054 return self.local_set 00055 00056 def get_remote_links(self): 00057 return self.remote_set 00058 00059 # Private functions 00060 def __is_link_local__(self, link): 00061 """Private - Tell if a link match a local file. 00062 00063 Output: 00064 True if a local file match the link 00065 False otherwise""" 00066 if urltoname(link) in self.local_links: 00067 return True 00068 else: 00069 return False 00070 00071 def __is_link_toc__(self, link): 00072 """Private - Tell if the link is part of the Table of Content. 00073 00074 Output: 00075 True if the link is of the form #tocXYZ 00076 False otherwise""" 00077 if link.find('#toc') == 0: 00078 return True 00079 else: 00080 return False 00081 00082 def __fix_link__(self, link): 00083 """Private - Take a link and convert it, 00084 either as a local link, either as a link pointing 00085 to the remote host.""" 00086 if self.__is_link_toc__(link) == True: 00087 # don't touch it! 00088 return link 00089 00090 if self.__is_link_local__(link) == True: 00091 # Convert link 00092 new_link = urltoname(link) 00093 self.local_set.add(new_link) 00094 else: 00095 # Remote link 00096 new_link = urlparse.urljoin(self.remote_host, link) 00097 self.remote_set.add(new_link) 00098 return new_link 00099 00100 00101 # Inherited functions 00102 def handle_starttag(self, tag, attrs): 00103 """Overidden - Parse links and convert them. 00104 00105 <a> and <img> tags are looked for links.""" 00106 # Special case 1: links 00107 if tag == 'a': 00108 for index, attr in enumerate(attrs): 00109 if attr[0] == 'href': 00110 attrs[index] = attr[0], self.__fix_link__(attr[1]) 00111 break 00112 # Special case 2: images 00113 elif tag == 'img': 00114 for index, attr in enumerate(attrs): 00115 if attr[0] == 'src': 00116 attrs[index] = attr[0], self.__fix_link__(attr[1]) 00117 break 00118 00119 MyParser.handle_starttag(self, tag, attrs) 00120 00121 00122 00123 def fixurls(directory, base_url): 00124 """Iterate over the files of a directory, and fix the links to point to 00125 local files.""" 00126 # List all files, then HTML files to be fixed 00127 files = os.listdir(directory) 00128 html_files = [x for x in files if '.html' in x] 00129 # Create the 'fixer' 00130 fix = FixURL(files, base_url) 00131 print >> sys.stderr, "\nFixing URLs..." 00132 local_set = set() 00133 remote_set = set() 00134 for x in html_files: 00135 file_name = os.path.join(directory, x) 00136 print >> sys.stderr, "Processing ", file_name 00137 # Parse the file 00138 f = open(file_name, 'r') 00139 fix.feed(f.read()) 00140 f.close() 00141 # Write result to the file 00142 f = open(file_name, 'w') 00143 f.write(fix.get_doc()) 00144 f.close() 00145 # Reset parser 00146 local_set.update(fix.get_local_links()) 00147 remote_set.update(fix.get_remote_links()) 00148 fix.reset() 00149 00150 print >> sys.stderr, "\nUpdated local URLs: " 00151 for x in sorted(local_set): 00152 print >> sys.stderr, " ", x 00153 print >> sys.stderr, "\nRemote URLs: " 00154 for x in sorted(remote_set): 00155 print >> sys.stderr, " ", x 00156 00157