aseba: tools.py Source File

Go to the documentation of this file.
00001 #   Aseba - an event-based framework for distributed robot control
00002 #   Copyright (C) 2007--2011:
00003 #           Stephane Magnenat <stephane at magnenat dot net>
00004 #           (http://stephane.magnenat.net)
00005 #           and other contributors, see authors.txt for details
00006 #
00007 #   This program is free software: you can redistribute it and/or modify
00008 #   it under the terms of the GNU Lesser General Public License as published
00009 #   by the Free Software Foundation, version 3 of the License.
00010 #
00011 #   This program is distributed in the hope that it will be useful,
00012 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 #   GNU Lesser General Public License for more details.
00015 #
00016 #   You should have received a copy of the GNU Lesser General Public License
00017 #   along with this program. If not, see <http://www.gnu.org/licenses/>.
00018 
00019 # System lib
00020 import os
00021 import os.path
00022 import sys
00023 import urllib2
00024 import mimetypes
00025 import subprocess
00026 
00027 # Custom lib
00028 import wikidot.debug
00029 import wikidot.structure
00030 from wikidot.urltoname import urltoname
00031 from wikidot.parser import WikidotParser
00032 import wikidot.tex2png
00033 
00034 def __fix_breadcrumbs__(breadcrumbs, toplevel):
00035     # check if toplevel is part of the breadcrumbs,
00036     # and rebase the basecrumb on this level
00037     if toplevel == '':
00038         return breadcrumbs
00039     output = list()
00040     output_enable = False
00041     for bc in breadcrumbs:
00042         if output_enable:
00043             output.append(bc)
00044         if toplevel in bc:
00045             output_enable = True
00046     return output
00047 
00048 def fetchurl(page, offline_name, breadcrumbs = ''):
00049     """Given a wikidot URL, fetch it, convert it and store it locally.
00050 
00051     Inputs:
00052         page:           URL of the wikidot page
00053         offline_name    Local file name for storage purpose
00054         breadcrumbs     (Optional) Name of the root page. If given,
00055                         checks that the breadcrumbs reference this page
00056                         (child page). If not, page is discarded
00057     Output:
00058         Links to other pages / images."""
00059     try:
00060         # Get the page
00061         print >> sys.stderr, "Connecting to {}...".format(page)
00062         response = urllib2.urlopen(page)
00063     except urllib2.HTTPError, e:
00064         print >> sys.stderr, e.code
00065     except urllib2.URLError, e:
00066         print >> sys.stderr, e.reason
00067     else:
00068         retval = dict()
00069         retval['links'] = set()
00070         retval['breadcrumbs'] = list()
00071         # Check MIME type
00072         mime = mimetypes.guess_type(page)[0]
00073         if (mime == None) or ('html' in mime):
00074             # HTML or unknown type
00075             # Convert the wikidot page and check the breakcrumbs
00076             print >> sys.stderr, "Parsing..."
00077             parser = WikidotParser()
00078             parser.feed(response.read())
00079             # Test if the breadcrumbs links to the main page
00080             page_breadcrumbs = parser.get_breadcrumbs()
00081             page_breadcrumbs = __fix_breadcrumbs__(page_breadcrumbs, breadcrumbs)
00082             retval['breadcrumbs'].extend(page_breadcrumbs)
00083             if (breadcrumbs == '') or (len(page_breadcrumbs) > 0):
00084                 # Ok, page valid
00085                 wikidot.structure.insert(parser.get_title(), page, page_breadcrumbs if breadcrumbs != '' else set())
00086                 data = parser.get_doc()
00087                 retval['links'] = parser.get_links()
00088             else:
00089                 # Page is not linked to the main page
00090                 print >> sys.stderr, "*** Page is not part of the documentation. Page skipped."
00091                 return retval
00092         elif ('image' in mime):
00093             # Image
00094             data = response.read()
00095         else:
00096             # Type is not supported
00097             if wikidot.debug.ENABLE_DEBUG == True:
00098                 print >> sys.stderr, "*** This is not a supported type of file. File skipped."
00099             return retval
00100         # Save
00101         print >> sys.stderr, "Saving the result to {}...".format(offline_name)
00102         f = open(offline_name, 'w')
00103         f.write(data)
00104         f.close()
00105         if wikidot.debug.ENABLE_DEBUG == True:
00106             print >> sys.stderr, "***DEBUG: links: ", reval['links']
00107         return retval
00108 
00109 def tidy(directory):
00110     html_files = [x for x in os.listdir(directory) if '.html' in x]
00111     print >> sys.stderr, "\nCleaning HTML files..."
00112     for x in html_files:
00113         filename = os.path.join(directory, x)
00114         print >> sys.stderr, "Processing ", filename
00115         retcode = subprocess.call(["tidy","-config", "wikidot/tidy.config", "-q", "-o", filename, filename])
00116 
00117 def fix_latex(directory):
00118     """Given a directory, convert LaTeX code to PNG images for every HTML file.
00119 
00120     Inputs:
00121         directory:      Source directory
00122 
00123     Output:
00124         No output."""
00125 
00126     html_files = [x for x in os.listdir(directory) if '.html' in x]
00127     print >> sys.stderr, "\nConverting LaTeX equations embedded in HTML files..."
00128     for x in html_files:
00129         filename = os.path.join(directory, x)
00130         print >> sys.stderr, "Processing ", filename
00131         wikidot.tex2png.from_html(filename, os.path.splitext(filename)[0])
00132