"""
weasyprint.tests.test_api
-------------------------
Test the public API.
:copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import gzip
import io
import math
import os
import sys
import unicodedata
import zlib
from pathlib import Path
from urllib.parse import urljoin, uses_relative
import cairocffi as cairo
import py
import pytest
from .. import CSS, HTML, __main__, default_url_fetcher
from ..urls import path2url
from .test_draw import assert_pixels_equal, image_to_pixels, parse_pixels
from .testing_utils import (
FakeHTML, assert_no_logs, capture_logs, http_server, resource_filename)
def _test_resource(class_, basename, check, **kwargs):
"""Common code for testing the HTML and CSS classes."""
absolute_filename = resource_filename(basename)
absolute_path = Path(absolute_filename)
url = path2url(absolute_filename)
check(class_(absolute_filename, **kwargs))
check(class_(absolute_path, **kwargs))
check(class_(guess=absolute_filename, **kwargs))
check(class_(guess=absolute_path, **kwargs))
check(class_(filename=absolute_filename, **kwargs))
check(class_(filename=absolute_path, **kwargs))
check(class_(url, **kwargs))
check(class_(guess=url, **kwargs))
check(class_(url=url, **kwargs))
with open(absolute_filename, 'rb') as fd:
check(class_(fd, **kwargs))
with open(absolute_filename, 'rb') as fd:
check(class_(guess=fd, **kwargs))
with open(absolute_filename, 'rb') as fd:
check(class_(file_obj=fd, **kwargs))
with open(absolute_filename, 'rb') as fd:
content = fd.read()
py.path.local(os.path.dirname(__file__)).chdir()
relative_filename = os.path.join('resources', basename)
relative_path = Path(relative_filename)
check(class_(relative_filename, **kwargs))
check(class_(relative_path, **kwargs))
check(class_(string=content, base_url=relative_filename, **kwargs))
encoding = kwargs.get('encoding') or 'utf8'
check(class_(string=content.decode(encoding), # unicode
base_url=relative_filename, **kwargs))
with pytest.raises(TypeError):
class_(filename='foo', url='bar')
def _check_doc1(html, has_base_url=True):
"""Check that a parsed HTML document looks like resources/doc1.html"""
root = html.etree_element
assert root.tag == 'html'
assert [child.tag for child in root] == ['head', 'body']
_head, body = root
assert [child.tag for child in body] == ['h1', 'p', 'ul', 'div']
h1, p, ul, div = body
assert h1.text == 'WeasyPrint test document (with Ünicōde)'
if has_base_url:
url = urljoin(html.base_url, 'pattern.png')
assert url.startswith('file:')
assert url.endswith('weasyprint/tests/resources/pattern.png')
else:
assert html.base_url is None
def _run(args, stdin=b''):
stdin = io.BytesIO(stdin)
stdout = io.BytesIO()
try:
__main__.HTML = FakeHTML
__main__.main(args.split(), stdin=stdin, stdout=stdout)
finally:
__main__.HTML = HTML
return stdout.getvalue()
class _fake_file(object):
def __init__(self):
self.chunks = []
def write(self, data):
self.chunks.append(bytes(data[:]))
def getvalue(self):
return b''.join(self.chunks)
def _png_size(result):
png_bytes, width, height = result
surface = cairo.ImageSurface.create_from_png(io.BytesIO(png_bytes))
assert (surface.get_width(), surface.get_height()) == (width, height)
return width, height
def _round_meta(pages):
"""Eliminate errors of floating point arithmetic for metadata.
(eg. 49.99999999999994 instead of 50)
"""
for page in pages:
anchors = page.anchors
for anchor_name, (pos_x, pos_y) in anchors.items():
anchors[anchor_name] = round(pos_x, 6), round(pos_y, 6)
links = page.links
for i, link in enumerate(links):
link_type, target, (pos_x, pos_y, width, height) = link
link = (
link_type, target, (round(pos_x, 6), round(pos_y, 6),
round(width, 6), round(height, 6)))
links[i] = link
bookmarks = page.bookmarks
for i, (level, label, (pos_x, pos_y), state) in enumerate(bookmarks):
bookmarks[i] = (level, label,
(round(pos_x, 6), round(pos_y, 6)), state)
@assert_no_logs
def test_html_parsing():
"""Test the constructor for the HTML class."""
_test_resource(FakeHTML, 'doc1.html', _check_doc1)
_test_resource(FakeHTML, 'doc1_UTF-16BE.html', _check_doc1,
encoding='UTF-16BE')
py.path.local(os.path.dirname(__file__)).chdir()
filename = os.path.join('resources', 'doc1.html')
with open(filename, encoding='utf-8') as fd:
string = fd.read()
_check_doc1(FakeHTML(string=string, base_url=filename))
_check_doc1(FakeHTML(string=string), has_base_url=False)
string_with_meta = string.replace(
''
combined = b'' + html
linked = b'' + html
py.path.local(resource_filename('')).chdir()
# Reference
html_obj = FakeHTML(string=combined, base_url='dummy.html')
# pdf_bytes = html_obj.write_pdf()
png_bytes = html_obj.write_png()
x2_png_bytes = html_obj.write_png(resolution=192)
rotated_png_bytes = FakeHTML(string=combined, base_url='dummy.html',
media_type='screen').write_png()
empty_png_bytes = FakeHTML(
string=b'').write_png()
check_png_pattern(png_bytes)
check_png_pattern(rotated_png_bytes, rotated=True)
check_png_pattern(empty_png_bytes, blank=True)
tmpdir.chdir()
with open(resource_filename('pattern.png'), 'rb') as pattern_fd:
pattern_bytes = pattern_fd.read()
tmpdir.join('pattern.png').write_binary(pattern_bytes)
tmpdir.join('no_css.html').write_binary(html)
tmpdir.join('combined.html').write_binary(combined)
tmpdir.join('combined-UTF-16BE.html').write_binary(
combined.decode('ascii').encode('UTF-16BE'))
tmpdir.join('linked.html').write_binary(linked)
tmpdir.join('style.css').write_binary(css)
_run('combined.html out1.png')
_run('combined.html out2.pdf')
assert tmpdir.join('out1.png').read_binary() == png_bytes
# TODO: check PDF content? How?
# assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes
_run('combined-UTF-16BE.html out3.png --encoding UTF-16BE')
assert tmpdir.join('out3.png').read_binary() == png_bytes
_run(tmpdir.join('combined.html').strpath + ' out4.png')
assert tmpdir.join('out4.png').read_binary() == png_bytes
_run(path2url(tmpdir.join('combined.html').strpath) + ' out5.png')
assert tmpdir.join('out5.png').read_binary() == png_bytes
_run('linked.html out6.png') # test relative URLs
assert tmpdir.join('out6.png').read_binary() == png_bytes
_run('combined.html out7 -f png')
_run('combined.html out8 --format pdf')
assert tmpdir.join('out7').read_binary() == png_bytes
# assert tmpdir.join('out8').read_binary(), pdf_bytes
_run('no_css.html out9.png')
_run('no_css.html out10.png -s style.css')
assert tmpdir.join('out9.png').read_binary() != png_bytes
# assert tmpdir.join('out10.png').read_binary() == png_bytes
stdout = _run('--format png combined.html -')
assert stdout == png_bytes
_run('- out11.png', stdin=combined)
check_png_pattern(tmpdir.join('out11.png').read_binary())
assert tmpdir.join('out11.png').read_binary() == png_bytes
stdout = _run('--format png - -', stdin=combined)
assert stdout == png_bytes
_run('combined.html out13.png --media-type screen')
_run('combined.html out12.png -m screen')
_run('linked.html out14.png -m screen')
assert tmpdir.join('out12.png').read_binary() == rotated_png_bytes
assert tmpdir.join('out13.png').read_binary() == rotated_png_bytes
assert tmpdir.join('out14.png').read_binary() == rotated_png_bytes
stdout = _run('-f pdf combined.html -')
assert stdout.count(b'attachment') == 0
stdout = _run('-f pdf -a pattern.png combined.html -')
assert stdout.count(b'attachment') == 1
stdout = _run('-f pdf -a style.css -a pattern.png combined.html -')
assert stdout.count(b'attachment') == 2
stdout = _run('-f png -r 192 linked.html -')
assert stdout == x2_png_bytes
stdout = _run('-f png --resolution 192 linked.html -')
assert _run('linked.html - -f png --resolution 192') == x2_png_bytes
assert stdout == x2_png_bytes
os.mkdir('subdirectory')
py.path.local('subdirectory').chdir()
with capture_logs() as logs:
stdout = _run('--format png - -', stdin=combined)
assert len(logs) == 1
assert logs[0].startswith('ERROR: Failed to load image')
assert stdout == empty_png_bytes
stdout = _run('--format png --base-url .. - -', stdin=combined)
assert stdout == png_bytes
@assert_no_logs
def test_unicode_filenames(tmpdir):
"""Test non-ASCII filenames both in Unicode or bytes form."""
# Replicate pattern.png in CSS so that base_url does not matter.
html = b'''
depth 1
depth 2
depth 1
depth 2
depth 3
''', [[ (2, 'A', (0, 0), 'open'), (4, 'B', (0, 20), 'open'), (2, 'C', (0, 40), 'open'), (3, 'D', (0, 60), 'open'), (4, 'E', (0, 80), 'open'), ]], [ ('A', (0, 0, 0), [ ('B', (0, 0, 20), [], 'open')], 'open'), ('C', (0, 0, 40), [ ('D', (0, 0, 60), [ ('E', (0, 0, 80), [], 'open')], 'open')], 'open'), ], False), ('''h2 depth 1
h4 depth 2
h3 depth 2
h5 depth 3
h1 depth 1
h2 depth 2
h2 depth 2
h4 depth 3
h1 depth 1
''', [[ (2, 'A', (0, 0), 'open'), (4, 'B', (0, 20), 'open'), (3, 'C', (0, 40), 'open'), (5, 'D', (0, 60), 'open'), (1, 'E', (0, 70), 'open'), (2, 'F', (0, 90), 'open'), (2, 'G', (0, 110), 'open'), (4, 'H', (0, 130), 'open'), (1, 'I', (0, 150), 'open'), ]], [ ('A', (0, 0, 0), [ ('B', (0, 0, 20), [], 'open'), ('C', (0, 0, 40), [ ('D', (0, 0, 60), [], 'open')], 'open')], 'open'), ('E', (0, 0, 70), [ ('F', (0, 0, 90), [], 'open'), ('G', (0, 0, 110), [ ('H', (0, 0, 130), [], 'open')], 'open')], 'open'), ('I', (0, 0, 150), [], 'open'), ], False), ('Hello, World
''', [ [ ('external', 'http://weasyprint.org', (0, 0, 30, 20)), ('external', 'http://weasyprint.org', (0, 0, 30, 30)), ('internal', 'lipsum', (10, 100, 32, 20)), ('internal', 'lipsum', (10, 100, 32, 32)) ], [('internal', 'hello', (0, 0, 200, 30))], ], [ {'hello': (0, 200)}, {'lipsum': (0, 0)} ], [ ( [ ('external', 'http://weasyprint.org', (0, 0, 30, 20)), ('external', 'http://weasyprint.org', (0, 0, 30, 30)), ('internal', 'lipsum', (10, 100, 32, 20)), ('internal', 'lipsum', (10, 100, 32, 32)) ], [('hello', 0, 200)], ), ( [ ('internal', 'hello', (0, 0, 200, 30)) ], [('lipsum', 0, 0)]), ]) assert_links( ''' ''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9', (5, 10, 190, 0))]], [{}], [([('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9', (5, 10, 190, 0))], [])], base_url='http://weasyprint.org/foo/bar/') assert_links( '''