improved pdfcreator
1- use CLI 2- refactor code
This commit is contained in:
128
python/pdfcreator/tests.py
Normal file
128
python/pdfcreator/tests.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import unittest
|
||||
from pypdf import PdfWriter, PageObject
|
||||
from types import SimpleNamespace
|
||||
from pdfaggregator import parse_inputs, strip_numbering, crop_page, extract_page_range, extract_section_prefix, parse_page_range, find_section_with_level, find_end_page
|
||||
|
||||
|
||||
class TestPdfExtractionFunctions(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Dummy PDF with 5 blank pages
|
||||
self.writer = PdfWriter()
|
||||
for _ in range(5):
|
||||
self.writer.add_page(
|
||||
PageObject.create_blank_page(width=600, height=800))
|
||||
self.reader = self.writer # pypdf writer can be used as reader for pages list
|
||||
self.content_writer = PdfWriter()
|
||||
self.outline_tree = [{"title": "Section1", "page": 0, "children": [
|
||||
{"title": "Section1.1", "page": 1, "children": []}]}]
|
||||
|
||||
def test_extract_page_range(self):
|
||||
current_page, REFERENCE_BOX, toc = extract_page_range(
|
||||
"@1-3", self.reader, self.content_writer, 0, None)
|
||||
self.assertEqual(len(self.content_writer.pages), 3) # pages 0 and 1
|
||||
self.assertEqual(toc["title"], "Pages 1-3")
|
||||
self.assertEqual(current_page, 3)
|
||||
|
||||
def test_extract_section_prefix(self):
|
||||
current_page, REFERENCE_BOX, toc = extract_section_prefix(
|
||||
"Section1", self.reader, self.content_writer, 0, None, self.outline_tree)
|
||||
self.assertEqual(len(self.content_writer.pages),
|
||||
5) # page 0 + subsection 1
|
||||
self.assertEqual(toc["title"], "Section1")
|
||||
self.assertEqual(current_page, 5)
|
||||
|
||||
|
||||
class TestPdfAggregator(unittest.TestCase):
|
||||
|
||||
def test_parse_page_range(self):
|
||||
self.assertEqual(parse_page_range("@1-5"), [0, 1, 2, 3, 4])
|
||||
self.assertEqual(parse_page_range("@10-12"), [9, 10, 11])
|
||||
self.assertIsNone(parse_page_range("1.3"))
|
||||
self.assertIsNone(parse_page_range("Introduction-Overview"))
|
||||
|
||||
def test_strip_numbering(self):
|
||||
self.assertEqual(strip_numbering("1.3 Background"), "Background")
|
||||
self.assertEqual(strip_numbering(
|
||||
"2.1.5 Experimental Setup"), "Experimental Setup")
|
||||
self.assertEqual(strip_numbering("NoNumberingHere"), "NoNumberingHere")
|
||||
|
||||
def test_crop_page(self):
|
||||
page = PageObject.create_blank_page(width=600, height=800)
|
||||
crop_page(page, top_ratio=0.1, bottom_ratio=0.05)
|
||||
llx, lly = page.cropbox.lower_left
|
||||
urx, ury = page.cropbox.upper_right
|
||||
self.assertAlmostEqual(ury - lly, 800 * 0.85)
|
||||
|
||||
|
||||
class TestParseInputs(unittest.TestCase):
|
||||
|
||||
def test_single_pdf_single_section(self):
|
||||
args = SimpleNamespace(
|
||||
inputs=["doc1.pdf:1.3"]
|
||||
)
|
||||
|
||||
result = parse_inputs(args)
|
||||
|
||||
self.assertEqual(result, [
|
||||
{
|
||||
"file": "doc1.pdf",
|
||||
"sections": ["1.3"]
|
||||
}
|
||||
])
|
||||
|
||||
def test_single_pdf_multiple_sections(self):
|
||||
args = SimpleNamespace(
|
||||
inputs=["doc1.pdf:1.3,2.1,@10-20"]
|
||||
)
|
||||
|
||||
result = parse_inputs(args)
|
||||
|
||||
self.assertEqual(result, [
|
||||
{
|
||||
"file": "doc1.pdf",
|
||||
"sections": ["1.3", "2.1", "@10-20"]
|
||||
}
|
||||
])
|
||||
|
||||
def test_multiple_pdfs(self):
|
||||
args = SimpleNamespace(
|
||||
inputs=[
|
||||
"doc1.pdf:1.3,@5-10",
|
||||
"doc2.pdf:Introduction,3.2"
|
||||
]
|
||||
)
|
||||
|
||||
result = parse_inputs(args)
|
||||
|
||||
self.assertEqual(result, [
|
||||
{
|
||||
"file": "doc1.pdf",
|
||||
"sections": ["1.3", "@5-10"]
|
||||
},
|
||||
{
|
||||
"file": "doc2.pdf",
|
||||
"sections": ["Introduction", "3.2"]
|
||||
}
|
||||
])
|
||||
|
||||
def test_whitespace_is_trimmed(self):
|
||||
args = SimpleNamespace(
|
||||
inputs=["doc1.pdf: 1.3 , @5-10 , Introduction "]
|
||||
)
|
||||
|
||||
result = parse_inputs(args)
|
||||
|
||||
self.assertEqual(result[0]["sections"], [
|
||||
"1.3", "@5-10", "Introduction"])
|
||||
|
||||
def test_missing_colon_raises_error(self):
|
||||
args = SimpleNamespace(
|
||||
inputs=["doc1.pdf"]
|
||||
)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
parse_inputs(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user