Source code for catkin.tidy_xml

# Software License Agreement (BSD License)
#
# Copyright (c) 2008, Willow Garage, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following
#    disclaimer in the documentation and/or other materials provided
#    with the distribution.
#  * Neither the name of Willow Garage, Inc. nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import unicode_literals
import codecs
import os
import re

# unit test suites are not good about screening out illegal unicode characters (#603)
# recipe from http://boodebr.org/main/python/all-about-python-and-unicode#UNI_XML
# code copied from rosunit/src/junitxml.py
try:
    char = unichr
except NameError:
    char = chr
RE_XML_ILLEGAL = ('([%s-%s%s-%s%s-%s%s-%s])' + \
    '|' + \
    '([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])') % \
    (char(0x0000), char(0x0008), char(0x000b), char(0x000c),
     char(0x000e), char(0x001f), char(0xfffe), char(0xffff),
     char(0xd800), char(0xdbff), char(0xdc00), char(0xdfff),
     char(0xd800), char(0xdbff), char(0xdc00), char(0xdfff),
     char(0xd800), char(0xdbff), char(0xdc00), char(0xdfff))
_SAFE_XML_REGEX = re.compile(RE_XML_ILLEGAL)


[docs]def tidy_xml(filename):
    '''
    read in file, screen out unsafe unicode characters, write back file in utf-8

    :param filename: str
    :returns: False if unable to read from file
    '''
    if not os.path.isfile(filename):
        raise ValueError('file does not exist')

    # try first utf-8 then iso. This is ugly, but the files in
    # question that are problematic do not declare unicode type
    data = None
    for ftype in ['utf-8', 'iso8859-1']:
        fhand = None
        try:
            fhand = codecs.open(filename, 'r', ftype)
            data = fhand.read()
            break
        except ValueError:
            continue
        finally:
            if fhand is not None:
                fhand.close()

    if data is None:
        return False

    for match in _SAFE_XML_REGEX.finditer(data):
        data = data[:match.start()] + '?' + data[match.end():]

    with open(filename, 'w') as fhand:
        fhand.write(data)
    return True
Navigation

Quick search

Source code for catkin.tidy_xml

Navigation