#!/var/www/html/soyintegro/venv/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2009 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # import zipfile from xml.sax import make_parser,handler from xml.sax.xmlreader import InputSource import xml.sax.saxutils import sys from odf.opendocument import OpenDocument from odf import element, grammar from odf.namespaces import * from odf.attrconverters import attrconverters, cnv_string from io import BytesIO if sys.version_info[0]==3: unicode=str extension_attributes = { "OpenOffice.org" : { (METANS,u'template'): ( (XLINKNS,u'role'), ), (STYLENS,u'graphic-properties'): ( (STYLENS,u'background-transparency'), ), (STYLENS,u'paragraph-properties'): ( (TEXTNS,u'enable-numbering'), (STYLENS,u'join-border'), ), (STYLENS,u'table-cell-properties'): ( (STYLENS,u'writing-mode'), ), (STYLENS,u'table-row-properties'): ( (STYLENS,u'keep-together'), ), }, "KOffice" : { (STYLENS,u'graphic-properties'): ( (KOFFICENS,u'frame-behavior-on-new-page'), ), (DRAWNS,u'page'): ( (KOFFICENS,u'name'), ), (PRESENTATIONNS,u'show-shape'): ( (KOFFICENS,u'order-id'), ), (PRESENTATIONNS,u'hide-shape'): ( (KOFFICENS,u'order-id'), ), (CHARTNS,u'legend'): ( (KOFFICENS,u'title'), ), } } printed_errors = [] def print_error(str): if str not in printed_errors: printed_errors.append(str) print (str) def chop_arg(arg): if len(arg) > 20: return "%s..." % arg[0:20] return arg def make_qname(tag): return "%s:%s" % (nsdict.get(tag[0],tag[0]), tag[1]) def allowed_attributes(tag): return grammar.allowed_attributes.get(tag) class ODFElementHandler(handler.ContentHandler): """ Extract headings from content.xml of an ODT file """ def __init__(self, document): self.doc = document self.tagstack = [] self.data = [] self.currtag = None def characters(self, data): self.data.append(data) def startElementNS(self, tag, qname, attrs): """ Pseudo-create an element """ allowed_attrs = grammar.allowed_attributes.get(tag) attrdict = {} for (att,value) in attrs.items(): prefix = nsdict.get(att[0],att[0]) # Check if it is a known extension notan_extension = True for product, ext_attrs in extension_attributes.items(): allowed_ext_attrs = ext_attrs.get(tag) if allowed_ext_attrs and att in allowed_ext_attrs: print_error("Warning: Attribute %s in element <%s> is illegal - %s extension" % ( make_qname(att), make_qname(tag), product)) notan_extension = False # Check if it is an allowed attribute if notan_extension and allowed_attrs and att not in allowed_attrs: print_error("Error: Attribute %s:%s is not allowed in element <%s>" % ( prefix, att[1], make_qname(tag))) # Check the value try: convert = attrconverters.get(att, cnv_string) convert(att, value, tag) except ValueError as res: print_error("Error: Bad value '%s' for attribute %s:%s in tag: <%s> - %s" % (chop_arg(value), prefix, att[1], make_qname(tag), res)) self.tagstack.append(tag) self.data = [] # Check that the parent allows this child element if tag not in ( (OFFICENS, 'document'), (OFFICENS, 'document-content'), (OFFICENS, 'document-styles'), (OFFICENS, 'document-meta'), (OFFICENS, 'document-settings'), (MANIFESTNS,'manifest')): try: parent = self.tagstack[-2] allowed_children = grammar.allowed_children.get(parent) except: print_error("Error: This document starts with the wrong tag: <%s>" % make_qname(tag)) allowed_children = None if allowed_children and tag not in allowed_children: print_error("Error: Element %s is not allowed in element %s" % ( make_qname(tag), make_qname(parent))) # Test that all mandatory attributes have been added. required = grammar.required_attributes.get(tag) if required: for r in required: if attrs.get(r) is None: print_error("Error: Required attribute missing: %s in <%s>" % (make_qname(r), make_qname(tag))) def endElementNS(self, tag, qname): self.currtag = self.tagstack.pop() str = ''.join(self.data).strip() # Check that only elements that can take text have text # But only elements we know exist in grammar if tag in grammar.allowed_children: if str != '' and tag not in grammar.allows_text: print_error("Error: %s does not allow text data" % make_qname(tag)) self.data = [] class ODFDTDHandler(handler.DTDHandler): def notationDecl(self, name, public_id, system_id): """ Ignore DTDs """ print_error("Warning: ODF doesn't use DOCTYPEs") def exitwithusage(exitcode=2): """ print out usage information """ sys.stderr.write("Usage: %s inputfile\n" % sys.argv[0]) sys.stderr.write("\tInputfile must be OpenDocument format\n") sys.exit(exitcode) def lint(odffile): if not zipfile.is_zipfile(odffile): print_error("Error: This is not a zipped file") return zfd = zipfile.ZipFile(odffile) try: mimetype = zfd.read('mimetype') except: mimetype='' d = OpenDocument(unicode(mimetype)) first = True for zi in zfd.infolist(): if first: if zi.filename == 'mimetype': if zi.compress_type != zipfile.ZIP_STORED: print_error("Error: The 'mimetype' member must be stored - not deflated") if zi.comment != "": print_error("Error: The 'mimetype' member must not have extra header info") else: print_error("Warning: The first member in the archive should be the mimetype") first = False if zi.filename in ('META-INF/manifest.xml', 'content.xml', 'meta.xml', 'styles.xml', 'settings.xml'): content = zfd.read(zi.filename) parser = make_parser() parser.setFeature(handler.feature_namespaces, True) parser.setFeature(handler.feature_external_ges, False) parser.setContentHandler(ODFElementHandler(d)) dtdh = ODFDTDHandler() parser.setDTDHandler(dtdh) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() if not isinstance(content, str): content=content inpsrc.setByteStream(BytesIO(content)) parser.parse(inpsrc) if len(sys.argv) != 2: exitwithusage() lint(unicode(sys.argv[1])) # Local Variables: *** # mode: python *** # End: ***