Hello, World

')).write_pdf(target=fileobj) pdf_file = pdf.PDFFile(fileobj) links = [ annot for page in pdf_file.pages for annot in page.get_indirect_dict_array('Annots', pdf_file)] # 30pt wide (like the image), 20pt high (like line-height) assert links[0].get_value('URI', '(.*)') == b'(http://weasyprint.org)' assert links[0].get_value('S', '(.*)') == b'/URI' assert_rect_almost_equal( links[0].get_value('Rect', '(.*)'), (0, TOP - 20, 30, TOP)) # The image itself: 30*30pt assert links[1].get_value('URI', '(.*)') == b'(http://weasyprint.org)' assert links[1].get_value('S', '(.*)') == b'/URI' assert_rect_almost_equal( links[1].get_value('Rect', '(.*)'), (0, TOP - 30, 30, TOP)) # 32pt wide (image + 2 * 1pt of border), 20pt high # TODO: replace these commented tests now that we use named destinations # assert links[2].get_value('Subtype', '(.*)') == b'/Link' # dest = links[2].get_value('Dest', '(.*)').strip(b'[]').split() # assert dest[-4] == b'/XYZ' # assert [round(float(value)) for value in dest[-3:]] == […] assert_rect_almost_equal( links[2].get_value('Rect', '(.*)'), (10, TOP - 100 - 20, 10 + 32, TOP - 100)) # The image itself: 32*32pt # TODO: same as above # assert links[3].get_value('Subtype', '(.*)') == b'/Link' # dest = links[3].get_value('Dest', '(.*)').strip(b'[]').split() # assert dest[-4] == b'/XYZ' # assert [round(float(value)) for value in dest[-3:]] == […] assert_rect_almost_equal( links[3].get_value('Rect', '(.*)'), (10, TOP - 100 - 32, 10 + 32, TOP - 100)) # 100% wide (block), 30pt high assert links[4].get_value('Subtype', '(.*)') == b'/Link' dest = links[4].get_value('Dest', '(.*)').strip(b'[]').split() assert dest == [b'(hello)'] names = ( pdf_file.catalog .get_indirect_dict('Names', pdf_file) .get_indirect_dict('Dests', pdf_file) .byte_string).decode('ascii') assert_rect_almost_equal( re.search( '\\(hello\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names ).group(1), (0, TOP - 200, 0)) assert_rect_almost_equal( links[4].get_value('Rect', '(.*)'), (0, TOP - 30, RIGHT, TOP)) # 100% wide (block), 0pt high fileobj = io.BytesIO() FakeHTML( string='a', base_url='http://weasyprint.org/foo/bar/').write_pdf(target=fileobj) pdf_file = pdf.PDFFile(fileobj) link, = [ annot for page in pdf_file.pages for annot in page.get_indirect_dict_array('Annots', pdf_file)] assert ( link.get_value('URI', '(.*)') == b'(http://weasyprint.org/foo/lipsum)') assert link.get_value('S', '(.*)') == b'/URI' assert_rect_almost_equal( link.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP)) @assert_no_logs @requires('cairo', (1, 15, 4)) def test_relative_links(): # Relative URI reference without a base URI: allowed for anchors fileobj = io.BytesIO() FakeHTML( string='a', base_url=None).write_pdf(target=fileobj) pdf_file = pdf.PDFFile(fileobj) annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0] assert annots.get_value('URI', '(.*)') == b'(../lipsum)' assert annots.get_value('S', '(.*)') == b'/URI' assert_rect_almost_equal( annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP)) @assert_no_logs def test_relative_links_missing_base(): # Relative URI reference without a base URI: not supported for -weasy-link fileobj = io.BytesIO() with capture_logs() as logs: FakeHTML( string='

', base_url=None).write_pdf(target=fileobj) pdf_file = pdf.PDFFile(fileobj) with pytest.raises(AttributeError): pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file) assert len(logs) == 1 assert 'WARNING: Ignored `-weasy-link: url("../lipsum")`' in logs[0] assert 'Relative URI reference without a base URI' in logs[0] @assert_no_logs @requires('cairo', (1, 15, 4)) def test_relative_links_internal(): # Internal URI reference without a base URI: OK fileobj = io.BytesIO() FakeHTML( string='a', base_url=None).write_pdf(target=fileobj) pdf_file = pdf.PDFFile(fileobj) annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0] dest = annots.get_value('Dest', '(.*)') assert dest == b'(lipsum)' names = ( pdf_file.catalog .get_indirect_dict('Names', pdf_file) .get_indirect_dict('Dests', pdf_file) .byte_string).decode('ascii') assert_rect_almost_equal( re.search( '\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names ).group(1), (0, TOP, 0)) assert_rect_almost_equal( annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP)) @assert_no_logs @requires('cairo', (1, 15, 4)) def test_relative_links_anchors(): fileobj = io.BytesIO() FakeHTML( string='

a', base_url=None).write_pdf(target=fileobj) pdf_file = pdf.PDFFile(fileobj) annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0] dest = annots.get_value('Dest', '(.*)') assert dest == b'(lipsum)' names = ( pdf_file.catalog .get_indirect_dict('Names', pdf_file) .get_indirect_dict('Dests', pdf_file) .byte_string).decode('ascii') assert_rect_almost_equal( re.search( '\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names ).group(1), (0, TOP, 0)) assert_rect_almost_equal( annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP)) @assert_no_logs @requires('cairo', (1, 15, 4)) def test_missing_links(): fileobj = io.BytesIO() with capture_logs() as logs: FakeHTML(string=''' a ''', base_url=None).write_pdf(target=fileobj) pdf_file = pdf.PDFFile(fileobj) annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0] dest = annots.get_value('Dest', '(.*)') assert dest == b'(lipsum)' names = ( pdf_file.catalog .get_indirect_dict('Names', pdf_file) .get_indirect_dict('Dests', pdf_file) .byte_string).decode('ascii') assert_rect_almost_equal( re.search( '\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names ).group(1), (0, TOP - 15, 0)) assert_rect_almost_equal( annots.get_value('Rect', '(.*)'), (0, TOP - 15, RIGHT, TOP)) assert len(logs) == 1 assert 'ERROR: No anchor #missing for internal URI reference' in logs[0] @assert_no_logs def test_embed_gif(): assert b'/Filter /DCTDecode' not in FakeHTML( base_url=resource_filename('dummy.html'), string='

').write_pdf() @assert_no_logs def test_embed_jpeg(): # JPEG-encoded image, embedded in PDF: assert b'/Filter /DCTDecode' in FakeHTML( base_url=resource_filename('dummy.html'), string='

').write_pdf() @assert_no_logs @requires('cairo', (1, 15, 4)) def test_document_info(): fileobj = io.BytesIO() FakeHTML(string=''' Test document

Another title

''').write_pdf(target=fileobj) info = pdf.PDFFile(fileobj).info assert info.get_value('Author', '(.*)') == b'(I Me & Myself)' assert info.get_value('Title', '(.*)') == b'(Test document)' assert info.get_value('Creator', '(.*)') == ( b'') assert info.get_value('Keywords', '(.*)') == b'(html, css, pdf)' assert info.get_value('Subject', '(.*)') == ( b'') assert info.get_value('CreationDate', '(.*)') == b"(20110421230000+00'00)" assert info.get_value('ModDate', '(.*)') == b"(20130721234600+01'00)" @assert_no_logs @requires('cairo', (1, 15, 4)) def test_embedded_files_attachments(tmpdir): absolute_tmp_file = tmpdir.join('some_file.txt').strpath adata = b'12345678' with open(absolute_tmp_file, 'wb') as afile: afile.write(adata) absolute_url = path2url(absolute_tmp_file) assert absolute_url.startswith('file://') relative_tmp_file = tmpdir.join('äöü.txt').strpath rdata = b'abcdefgh' with open(relative_tmp_file, 'wb') as rfile: rfile.write(rdata) fileobj = io.BytesIO() FakeHTML( string=''' Test document

Heading 1

Heading 2

'''.format(absolute_url, os.path.basename(relative_tmp_file)), base_url=tmpdir.strpath, ).write_pdf( target=fileobj, attachments=[ Attachment('data:,oob attachment', description='Hello'), 'data:,raw URL', io.BytesIO(b'file like obj') ] ) pdf_bytes = fileobj.getvalue() assert ( '<{}>'.format(hashlib.md5(b'hi there').hexdigest()).encode('ascii') in pdf_bytes) assert b'/F ()' in pdf_bytes assert ( b'/UF (\xfe\xff\x00a\x00t\x00t\x00a\x00c\x00h\x00m\x00e\x00n' b'\x00t\x00.\x00b\x00i\x00n)' in pdf_bytes) assert ( b'/Desc (\xfe\xff\x00s\x00o\x00m\x00e\x00 \x00f\x00i\x00l\x00e' b'\x00 \x00a\x00t\x00t\x00a\x00c\x00h\x00m\x00e\x00n\x00t\x00 ' b'\x00\xe4\x00\xf6\x00\xfc)' in pdf_bytes) assert hashlib.md5(adata).hexdigest().encode('ascii') in pdf_bytes assert ( os.path.basename(absolute_tmp_file).encode('utf-16-be') in pdf_bytes) assert hashlib.md5(rdata).hexdigest().encode('ascii') in pdf_bytes assert ( os.path.basename(relative_tmp_file).encode('utf-16-be') in pdf_bytes) assert ( hashlib.md5(b'oob attachment').hexdigest().encode('ascii') in pdf_bytes) assert b'/Desc (\xfe\xff\x00H\x00e\x00l\x00l\x00o)' in pdf_bytes assert ( hashlib.md5(b'raw URL').hexdigest().encode('ascii') in pdf_bytes) assert ( hashlib.md5(b'file like obj').hexdigest().encode('ascii') in pdf_bytes) assert b'/EmbeddedFiles' in pdf_bytes assert b'/Outlines' in pdf_bytes @assert_no_logs def test_attachments_data(): fileobj = io.BytesIO() FakeHTML(string=''' Test document 2 ''').write_pdf(target=fileobj) md5 = '<{}>'.format(hashlib.md5(b'some data').hexdigest()).encode('ascii') assert md5 in fileobj.getvalue() @assert_no_logs @requires('cairo', (1, 15, 4)) def test_attachments_none(): fileobj = io.BytesIO() FakeHTML(string=''' Test document 3

Heading

''').write_pdf(target=fileobj) pdf_bytes = fileobj.getvalue() assert b'Names' not in pdf_bytes assert b'Outlines' in pdf_bytes @assert_no_logs def test_attachments_none_empty(): fileobj = io.BytesIO() FakeHTML(string=''' Test document 3 ''').write_pdf(target=fileobj) pdf_bytes = fileobj.getvalue() assert b'Names' not in pdf_bytes assert b'Outlines' not in pdf_bytes @assert_no_logs def test_annotations(): pdf_bytes = FakeHTML(string=''' Test document A link that lets you download an attachment ''').write_pdf() assert hashlib.md5(b'some data').hexdigest().encode('ascii') in pdf_bytes assert b'/FileAttachment' in pdf_bytes assert b'/EmbeddedFiles' not in pdf_bytes @pytest.mark.parametrize('style, media, bleed, trim', ( ('bleed: 30pt; size: 10pt', [0, 0, 70, 70], [20.0, 20.0, 50.0, 50.0], [30.0, 30.0, 40.0, 40.0]), ('bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt', [0, 0, 33, 36], [8.0, 5.0, 33.0, 36.0], [18.0, 15.0, 30.0, 30.0]), )) @assert_no_logs def test_bleed(style, media, bleed, trim): fileobj = io.BytesIO() FakeHTML(string=''' Test document test ''' % style).write_pdf(target=fileobj) pdf_bytes = fileobj.getvalue() assert ( '/MediaBox [ {} {} {} {} ]'.format(*media).encode('ascii') in pdf_bytes) assert ( '/BleedBox [ {} {} {} {} ]'.format(*bleed).encode('ascii') in pdf_bytes) assert ( '/TrimBox [ {} {} {} {} ]'.format(*trim).encode('ascii') in pdf_bytes)

Title 1

Title 2

Title 3

Title 4

Title 5

Title 6

Title 7

Title 8

Title 9

Title 10

Title 11

1

2

3

4

5

1

2

3

4

5

6

7

8

9

a

a

a

b

c

d

e

f

g

Another title

Heading 1

Heading 2

Heading