#!/var/www/html/soyintegro/venv/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2006 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
# 2 or at your option any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# Contributor(s):
#
from __future__ import print_function
import zipfile
from xml.sax import make_parser,handler
from xml.sax.xmlreader import InputSource
import xml.sax.saxutils
import sys
from odf.namespaces import TEXTNS, TABLENS, DRAWNS
try:
from cStringIO import StringIO
except ImportError:
from io import StringIO
def getxmlpart(odffile, xmlfile):
""" Get the content out of the ODT file"""
z = zipfile.ZipFile(odffile)
content = z.read(xmlfile)
z.close()
return content
#
# Extract headings from content.xml
#
class ODTHeadingHandler(handler.ContentHandler):
""" Extract headings from content.xml of an ODT file """
def __init__(self, eater):
self.r = eater
self.data = []
self.level = 0
def characters(self, data):
self.data.append(data)
def startElementNS(self, tag, qname, attrs):
if tag == (TEXTNS, 'h'):
self.level = 0
for (att,value) in attrs.items():
if att == (TEXTNS, 'outline-level'):
self.level = int(value)
self.data = []
def endElementNS(self, tag, qname):
if tag == (TEXTNS, 'h'):
str = ''.join(self.data)
self.data = []
self.r.append("%d%*s%s" % (self.level, self.level, '', str))
class ODTSheetHandler(handler.ContentHandler):
""" Extract sheet names from content.xml of an ODS file """
def __init__(self, eater):
self.r = eater
def startElementNS(self, tag, qname, attrs):
if tag == (TABLENS, 'table'):
sheetname = attrs.get((TABLENS, 'name'))
if sheetname:
self.r.append(sheetname)
class ODTSlideHandler(handler.ContentHandler):
""" Extract headings from content.xml of an ODT file """
def __init__(self, eater):
self.r = eater
self.data = []
self.pagenum = 0
def characters(self, data):
self.data.append(data)
def startElementNS(self, tag, qname, attrs):
if tag == (DRAWNS, 'page'):
self.pagenum = self.pagenum + 1
self.r.append("SLIDE %d: %s" % ( self.pagenum, attrs.get((DRAWNS, 'name'),'')))
if tag == (TEXTNS, 'p'):
self.data = []
def endElementNS(self, tag, qname):
if tag == (TEXTNS, 'p'):
str = ''.join(self.data)
self.data = []
if len(str) > 0:
self.r.append(" " + str)
def odtheadings(odtfile):
mimetype = getxmlpart(odtfile,'mimetype')
content = getxmlpart(odtfile,'content.xml')
lines = []
parser = make_parser()
parser.setFeature(handler.feature_namespaces, 1)
if not isinstance(mimetype, str):
mimetype=mimetype.decode("utf-8")
if mimetype in ('application/vnd.oasis.opendocument.text',
'application/vnd.oasis.opendocument.text-template'):
parser.setContentHandler(ODTHeadingHandler(lines))
elif mimetype in ('application/vnd.oasis.opendocument.spreadsheet',
'application/vnd.oasis.opendocument.spreadsheet-template'):
parser.setContentHandler(ODTSheetHandler(lines))
elif mimetype in ('application/vnd.oasis.opendocument.presentation'
'application/vnd.oasis.opendocument.presentation-template'):
parser.setContentHandler(ODTSlideHandler(lines))
else:
print ("Unsupported fileformat")
sys.exit(2)
parser.setErrorHandler(handler.ErrorHandler())
inpsrc = InputSource()
if not isinstance(content, str):
content=content.decode("utf-8")
inpsrc.setByteStream(StringIO(content))
parser.parse(inpsrc)
return lines
if __name__ == "__main__":
filler = " "
for heading in odtheadings(sys.argv[1]):
print (heading)
# Local Variables: ***
# mode: python ***
# End: ***