websocket_gui: exampleparser.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 import sys
00004 import os
00005 import re
00006 import urllib2
00007 import time
00008 from xml.dom.minidom import Document
00009 
00010 try:
00011     import xml.etree.ElementTree as ElementTree 
00012 except ImportError:
00013     try:
00014         import cElementTree as ElementTree
00015     except ImportError:
00016         try:
00017             import elementtree.ElementTree as ElementTree
00018         except ImportError:
00019             import lxml.etree as ElementTree
00020 
00021 missing_deps = False
00022 try:
00023     import json
00024 except ImportError:
00025     try:
00026         import simplejson as json
00027     except ImportError, E:
00028         missing_deps = E 
00029     
00030 try:
00031     from BeautifulSoup import BeautifulSoup
00032 except ImportError, E:
00033     missing_deps = E 
00034 
00035 feedName = "example-list.xml"
00036 feedPath = "http://openlayers.org/dev/examples/"
00037 
00038 def getListOfOnlineExamples(baseUrl):
00039     """
00040     useful if you want to get a list of examples a url. not used by default.
00041     """
00042     html = urllib2.urlopen(baseUrl)
00043     soup = BeautifulSoup(html)
00044     examples = soup.findAll('li')
00045     examples = [example.find('a').get('href') for example in examples]
00046     examples = [example for example in examples if example.endswith('.html')]
00047     examples = [example for example in examples]
00048     return examples
00049     
00050 def getListOfExamples(relPath):
00051     """
00052     returns list of .html filenames within a given path - excludes example-list.html
00053     """
00054     examples = os.listdir(relPath)
00055     examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"]
00056     return examples
00057     
00058 
00059 def getExampleHtml(location):
00060     """
00061     returns html of a specific example that is available online or locally
00062     """
00063     print '.',
00064     if location.startswith('http'):
00065         return urllib2.urlopen(location).read()
00066     else:
00067         f = open(location)
00068         html = f.read()
00069         f.close()
00070         return html
00071         
00072     
00073 def extractById(soup, tagId, value=None):
00074     """
00075     returns full contents of a particular tag id
00076     """
00077     beautifulTag = soup.find(id=tagId)
00078     if beautifulTag:
00079         if beautifulTag.contents: 
00080             value = str(beautifulTag.renderContents()).strip()
00081             value = value.replace('\t','')
00082             value = value.replace('\n','')
00083     return value
00084 
00085 def getRelatedClasses(html):
00086     """
00087     parses the html, and returns a list of all OpenLayers Classes 
00088     used within (ie what parts of OL the javascript uses).  
00089     """
00090     rawstr = r'''(?P<class>OpenLayers\..*?)\('''
00091     return re.findall(rawstr, html)
00092 
00093 def parseHtml(html,ids):
00094     """
00095     returns dictionary of items of interest
00096     """
00097     soup = BeautifulSoup(html)
00098     d = {}
00099     for tagId in ids:
00100         d[tagId] = extractById(soup,tagId)
00101     #classes should eventually be parsed from docs - not automatically created.
00102     classes = getRelatedClasses(html)
00103     d['classes'] = classes
00104     return d
00105 
00106 def getSvnInfo(path):
00107     h = os.popen("svn info %s --xml" % path)
00108     tree = ElementTree.fromstring(h.read())
00109     h.close()
00110     d = {
00111         'url': tree.findtext('entry/url'),
00112         'author': tree.findtext('entry/commit/author'),
00113         'date': tree.findtext('entry/commit/date')
00114     }
00115     return d
00116     
00117 def createFeed(examples):
00118     doc = Document()
00119     atomuri = "http://www.w3.org/2005/Atom"
00120     feed = doc.createElementNS(atomuri, "feed")
00121     feed.setAttribute("xmlns", atomuri)
00122     title = doc.createElementNS(atomuri, "title")
00123     title.appendChild(doc.createTextNode("OpenLayers Examples"))
00124     feed.appendChild(title)
00125     link = doc.createElementNS(atomuri, "link")
00126     link.setAttribute("rel", "self")
00127     link.setAttribute("href", feedPath + feedName)
00128     
00129     modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
00130     id = doc.createElementNS(atomuri, "id")
00131     id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime)))
00132     feed.appendChild(id)
00133     
00134     updated = doc.createElementNS(atomuri, "updated")
00135     updated.appendChild(doc.createTextNode(modtime))
00136     feed.appendChild(updated)
00137 
00138     examples.sort(key=lambda x:x["modified"])
00139     for example in sorted(examples, key=lambda x:x["modified"], reverse=True):
00140         entry = doc.createElementNS(atomuri, "entry")
00141         
00142         title = doc.createElementNS(atomuri, "title")
00143         title.appendChild(doc.createTextNode(example["title"] or example["example"]))
00144         entry.appendChild(title)
00145               
00146         tags = doc.createElementNS(atomuri, "tags")
00147         tags.appendChild(doc.createTextNode(example["tags"] or example["example"]))
00148         entry.appendChild(tags)
00149         
00150         link = doc.createElementNS(atomuri, "link")
00151         link.setAttribute("href", "%s%s" % (feedPath, example["example"]))
00152         entry.appendChild(link)
00153     
00154         summary = doc.createElementNS(atomuri, "summary")
00155         summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"]))
00156         entry.appendChild(summary)
00157         
00158         updated = doc.createElementNS(atomuri, "updated")
00159         updated.appendChild(doc.createTextNode(example["modified"]))
00160         entry.appendChild(updated)
00161         
00162         author = doc.createElementNS(atomuri, "author")
00163         name = doc.createElementNS(atomuri, "name")
00164         name.appendChild(doc.createTextNode(example["author"]))
00165         author.appendChild(name)
00166         entry.appendChild(author)
00167         
00168         id = doc.createElementNS(atomuri, "id")
00169         id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"])))
00170         entry.appendChild(id)
00171         
00172         feed.appendChild(entry)
00173 
00174     doc.appendChild(feed)
00175     return doc    
00176     
00177 def wordIndex(examples):
00178     """
00179     Create an inverted index based on words in title and shortdesc.  Keys are
00180     lower cased words.  Values are dictionaries with example index keys and
00181     count values.
00182     """
00183     index = {}
00184     unword = re.compile("\\W+")
00185     keys = ["shortdesc", "title", "tags"]
00186     for i in range(len(examples)):
00187         for key in keys:
00188             text = examples[i][key]
00189             if text:
00190                 words = unword.split(text)
00191                 for word in words:
00192                     if word:
00193                         word = word.lower()
00194                         if index.has_key(word):
00195                             if index[word].has_key(i):
00196                                 index[word][i] += 1
00197                             else:
00198                                 index[word][i] = 1
00199                         else:
00200                             index[word] = {i: 1}
00201     return index
00202     
00203 if __name__ == "__main__":
00204 
00205     if missing_deps:
00206         print "This script requires json or simplejson and BeautifulSoup. You don't have them. \n(%s)" % E
00207         sys.exit()
00208     
00209     if len(sys.argv) > 1:
00210         outFile = open(sys.argv[1],'w')
00211     else:
00212         outFile = open('../examples/example-list.js','w')
00213     
00214     examplesLocation = '../examples'
00215     print 'Reading examples from %s and writing out to %s' % (examplesLocation, outFile.name)
00216    
00217     exampleList = []
00218     docIds = ['title','shortdesc','tags']
00219    
00220     #comment out option to create docs from online resource
00221     #examplesLocation = 'http://svn.openlayers.org/sandbox/docs/examples/'
00222     #examples = getListOfOnlineExamples(examplesLocation)
00223 
00224     examples = getListOfExamples(examplesLocation)
00225 
00226     modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
00227 
00228     for example in examples:
00229         url = os.path.join(examplesLocation,example)
00230         html = getExampleHtml(url)
00231         tagvalues = parseHtml(html,docIds)
00232         tagvalues['example'] = example
00233         # add in svn info
00234         d = getSvnInfo(url)
00235         tagvalues["modified"] = d["date"] or modtime
00236         tagvalues["author"] = d["author"] or "anonymous"
00237         tagvalues['link'] = example
00238 
00239         exampleList.append(tagvalues)
00240         
00241     print
00242     
00243     exampleList.sort(key=lambda x:x['example'].lower())
00244     
00245     index = wordIndex(exampleList)
00246 
00247     json = json.dumps({"examples": exampleList, "index": index})
00248     #give the json a global variable we can use in our js.  This should be replaced or made optional.
00249     json = 'var info=' + json 
00250     outFile.write(json)
00251     outFile.close()
00252 
00253     print "writing feed to ../examples/%s " % feedName
00254     atom = open('../examples/%s' % feedName, 'w')
00255     doc = createFeed(exampleList)
00256     atom.write(doc.toxml())
00257     atom.close()
00258 
00259 
00260     print 'complete'
00261 
00262