',
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
with pytest.raises(AttributeError):
pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)
assert len(logs) == 1
assert 'WARNING: Ignored `-weasy-link: url("../lipsum")`' in logs[0]
assert 'Relative URI reference without a base URI' in logs[0]
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_relative_links_internal():
# Internal URI reference without a base URI: OK
fileobj = io.BytesIO()
FakeHTML(
string='
a',
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_relative_links_anchors():
fileobj = io.BytesIO()
FakeHTML(
string='
a',
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_missing_links():
fileobj = io.BytesIO()
with capture_logs() as logs:
FakeHTML(string='''
a
''', base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP - 15, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP - 15, RIGHT, TOP))
assert len(logs) == 1
assert 'ERROR: No anchor #missing for internal URI reference' in logs[0]
@assert_no_logs
def test_embed_gif():
assert b'/Filter /DCTDecode' not in FakeHTML(
base_url=resource_filename('dummy.html'),
string='
').write_pdf()
@assert_no_logs
def test_embed_jpeg():
# JPEG-encoded image, embedded in PDF:
assert b'/Filter /DCTDecode' in FakeHTML(
base_url=resource_filename('dummy.html'),
string='
').write_pdf()
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_document_info():
fileobj = io.BytesIO()
FakeHTML(string='''
Test document
Another title
''').write_pdf(target=fileobj)
info = pdf.PDFFile(fileobj).info
assert info.get_value('Author', '(.*)') == b'(I Me & Myself)'
assert info.get_value('Title', '(.*)') == b'(Test document)'
assert info.get_value('Creator', '(.*)') == (
b'
')
assert info.get_value('Keywords', '(.*)') == b'(html, css, pdf)'
assert info.get_value('Subject', '(.*)') == (
b'')
assert info.get_value('CreationDate', '(.*)') == b"(20110421230000+00'00)"
assert info.get_value('ModDate', '(.*)') == b"(20130721234600+01'00)"
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_embedded_files_attachments(tmpdir):
absolute_tmp_file = tmpdir.join('some_file.txt').strpath
adata = b'12345678'
with open(absolute_tmp_file, 'wb') as afile:
afile.write(adata)
absolute_url = path2url(absolute_tmp_file)
assert absolute_url.startswith('file://')
relative_tmp_file = tmpdir.join('äöü.txt').strpath
rdata = b'abcdefgh'
with open(relative_tmp_file, 'wb') as rfile:
rfile.write(rdata)
fileobj = io.BytesIO()
FakeHTML(
string='''
Test document
Heading 1
Heading 2
'''.format(absolute_url, os.path.basename(relative_tmp_file)),
base_url=tmpdir.strpath,
).write_pdf(
target=fileobj,
attachments=[
Attachment('data:,oob attachment', description='Hello'),
'data:,raw URL',
io.BytesIO(b'file like obj')
]
)
pdf_bytes = fileobj.getvalue()
assert (
'<{}>'.format(hashlib.md5(b'hi there').hexdigest()).encode('ascii')
in pdf_bytes)
assert b'/F ()' in pdf_bytes
assert (
b'/UF (\xfe\xff\x00a\x00t\x00t\x00a\x00c\x00h\x00m\x00e\x00n'
b'\x00t\x00.\x00b\x00i\x00n)' in pdf_bytes)
assert (
b'/Desc (\xfe\xff\x00s\x00o\x00m\x00e\x00 \x00f\x00i\x00l\x00e'
b'\x00 \x00a\x00t\x00t\x00a\x00c\x00h\x00m\x00e\x00n\x00t\x00 '
b'\x00\xe4\x00\xf6\x00\xfc)' in pdf_bytes)
assert hashlib.md5(adata).hexdigest().encode('ascii') in pdf_bytes
assert (
os.path.basename(absolute_tmp_file).encode('utf-16-be')
in pdf_bytes)
assert hashlib.md5(rdata).hexdigest().encode('ascii') in pdf_bytes
assert (
os.path.basename(relative_tmp_file).encode('utf-16-be')
in pdf_bytes)
assert (
hashlib.md5(b'oob attachment').hexdigest().encode('ascii')
in pdf_bytes)
assert b'/Desc (\xfe\xff\x00H\x00e\x00l\x00l\x00o)' in pdf_bytes
assert (
hashlib.md5(b'raw URL').hexdigest().encode('ascii')
in pdf_bytes)
assert (
hashlib.md5(b'file like obj').hexdigest().encode('ascii')
in pdf_bytes)
assert b'/EmbeddedFiles' in pdf_bytes
assert b'/Outlines' in pdf_bytes
@assert_no_logs
def test_attachments_data():
fileobj = io.BytesIO()
FakeHTML(string='''
Test document 2
''').write_pdf(target=fileobj)
md5 = '<{}>'.format(hashlib.md5(b'some data').hexdigest()).encode('ascii')
assert md5 in fileobj.getvalue()
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_attachments_none():
fileobj = io.BytesIO()
FakeHTML(string='''
Test document 3
Heading
''').write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert b'Names' not in pdf_bytes
assert b'Outlines' in pdf_bytes
@assert_no_logs
def test_attachments_none_empty():
fileobj = io.BytesIO()
FakeHTML(string='''
Test document 3
''').write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert b'Names' not in pdf_bytes
assert b'Outlines' not in pdf_bytes
@assert_no_logs
def test_annotations():
pdf_bytes = FakeHTML(string='''
Test document
A link that lets you download an attachment
''').write_pdf()
assert hashlib.md5(b'some data').hexdigest().encode('ascii') in pdf_bytes
assert b'/FileAttachment' in pdf_bytes
assert b'/EmbeddedFiles' not in pdf_bytes
@pytest.mark.parametrize('style, media, bleed, trim', (
('bleed: 30pt; size: 10pt',
[0, 0, 70, 70],
[20.0, 20.0, 50.0, 50.0],
[30.0, 30.0, 40.0, 40.0]),
('bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt',
[0, 0, 33, 36],
[8.0, 5.0, 33.0, 36.0],
[18.0, 15.0, 30.0, 30.0]),
))
@assert_no_logs
def test_bleed(style, media, bleed, trim):
fileobj = io.BytesIO()
FakeHTML(string='''
Test document
test
''' % style).write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert (
'/MediaBox [ {} {} {} {} ]'.format(*media).encode('ascii')
in pdf_bytes)
assert (
'/BleedBox [ {} {} {} {} ]'.format(*bleed).encode('ascii')
in pdf_bytes)
assert (
'/TrimBox [ {} {} {} {} ]'.format(*trim).encode('ascii')
in pdf_bytes)