Files
Code/python/pdfcreator/tests.py
local a5934e45b2 improved pdfcreator
1- use CLI
2- refactor code
2026-01-19 23:06:56 +00:00

129 lines
4.1 KiB
Python

import unittest
from pypdf import PdfWriter, PageObject
from types import SimpleNamespace
from pdfaggregator import parse_inputs, strip_numbering, crop_page, extract_page_range, extract_section_prefix, parse_page_range, find_section_with_level, find_end_page
class TestPdfExtractionFunctions(unittest.TestCase):
def setUp(self):
# Dummy PDF with 5 blank pages
self.writer = PdfWriter()
for _ in range(5):
self.writer.add_page(
PageObject.create_blank_page(width=600, height=800))
self.reader = self.writer # pypdf writer can be used as reader for pages list
self.content_writer = PdfWriter()
self.outline_tree = [{"title": "Section1", "page": 0, "children": [
{"title": "Section1.1", "page": 1, "children": []}]}]
def test_extract_page_range(self):
current_page, REFERENCE_BOX, toc = extract_page_range(
"@1-3", self.reader, self.content_writer, 0, None)
self.assertEqual(len(self.content_writer.pages), 3) # pages 0 and 1
self.assertEqual(toc["title"], "Pages 1-3")
self.assertEqual(current_page, 3)
def test_extract_section_prefix(self):
current_page, REFERENCE_BOX, toc = extract_section_prefix(
"Section1", self.reader, self.content_writer, 0, None, self.outline_tree)
self.assertEqual(len(self.content_writer.pages),
5) # page 0 + subsection 1
self.assertEqual(toc["title"], "Section1")
self.assertEqual(current_page, 5)
class TestPdfAggregator(unittest.TestCase):
def test_parse_page_range(self):
self.assertEqual(parse_page_range("@1-5"), [0, 1, 2, 3, 4])
self.assertEqual(parse_page_range("@10-12"), [9, 10, 11])
self.assertIsNone(parse_page_range("1.3"))
self.assertIsNone(parse_page_range("Introduction-Overview"))
def test_strip_numbering(self):
self.assertEqual(strip_numbering("1.3 Background"), "Background")
self.assertEqual(strip_numbering(
"2.1.5 Experimental Setup"), "Experimental Setup")
self.assertEqual(strip_numbering("NoNumberingHere"), "NoNumberingHere")
def test_crop_page(self):
page = PageObject.create_blank_page(width=600, height=800)
crop_page(page, top_ratio=0.1, bottom_ratio=0.05)
llx, lly = page.cropbox.lower_left
urx, ury = page.cropbox.upper_right
self.assertAlmostEqual(ury - lly, 800 * 0.85)
class TestParseInputs(unittest.TestCase):
def test_single_pdf_single_section(self):
args = SimpleNamespace(
inputs=["doc1.pdf:1.3"]
)
result = parse_inputs(args)
self.assertEqual(result, [
{
"file": "doc1.pdf",
"sections": ["1.3"]
}
])
def test_single_pdf_multiple_sections(self):
args = SimpleNamespace(
inputs=["doc1.pdf:1.3,2.1,@10-20"]
)
result = parse_inputs(args)
self.assertEqual(result, [
{
"file": "doc1.pdf",
"sections": ["1.3", "2.1", "@10-20"]
}
])
def test_multiple_pdfs(self):
args = SimpleNamespace(
inputs=[
"doc1.pdf:1.3,@5-10",
"doc2.pdf:Introduction,3.2"
]
)
result = parse_inputs(args)
self.assertEqual(result, [
{
"file": "doc1.pdf",
"sections": ["1.3", "@5-10"]
},
{
"file": "doc2.pdf",
"sections": ["Introduction", "3.2"]
}
])
def test_whitespace_is_trimmed(self):
args = SimpleNamespace(
inputs=["doc1.pdf: 1.3 , @5-10 , Introduction "]
)
result = parse_inputs(args)
self.assertEqual(result[0]["sections"], [
"1.3", "@5-10", "Introduction"])
def test_missing_colon_raises_error(self):
args = SimpleNamespace(
inputs=["doc1.pdf"]
)
with self.assertRaises(ValueError):
parse_inputs(args)
if __name__ == "__main__":
unittest.main()