update on various python tools

This commit is contained in:
local
2026-01-19 15:21:44 +00:00
parent 31684ecded
commit 7891956d52
7 changed files with 164 additions and 1 deletions

16
python/.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

5
python/.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"python-envs.defaultEnvManager": "ms-python.python:conda",
"python-envs.defaultPackageManager": "ms-python.python:conda",
"python-envs.pythonProjects": []
}

View File

@@ -0,0 +1,13 @@
import struct
def float_bin(number):
return bin(struct.unpack('<I', struct.pack('<f', number))[0])
def main():
print(float_bin(5.75))
if __name__ == "__main__":
main()

Binary file not shown.

BIN
python/pdfcreator/input.pdf Normal file

Binary file not shown.

129
python/pdfcreator/main.py Normal file
View File

@@ -0,0 +1,129 @@
from pypdf import PdfReader, PdfWriter
INPUT_PDF = "pdfcreator/input.pdf"
OUTPUT_PDF = "pdfcreator/extracted_section.pdf"
TARGET_SECTION_TITLE = "1.3"
def crop_page(page, top_ratio=0.12, bottom_ratio=0.12):
"""
Crop the visible area of a PDF page.
top_ratio: fraction of page height to remove from the top
bottom_ratio: fraction of page height to remove from the bottom
"""
llx, lly, urx, ury = page.mediabox
height = ury - lly
new_lly = lly + height * bottom_ratio
new_ury = ury - height * top_ratio
if new_ury <= new_lly:
raise ValueError("Invalid crop ratios: page height would be negative")
page.cropbox.lower_left = (llx, new_lly)
page.cropbox.upper_right = (urx, new_ury)
def build_outline_tree(reader):
def _build(outline):
tree = []
for item in outline:
if isinstance(item, list):
tree[-1]["children"] = _build(item)
else:
tree.append({
"title": item.title.strip(),
"page": reader.get_destination_page_number(item),
"children": []
})
return tree
return _build(reader.outline)
def find_section(nodes, title):
for node in nodes:
if node["title"] == title or node["title"].startswith(title + " "):
return node
found = find_section(node["children"], title)
if found:
return found
return None
def collect_subtree_pages(node, pages=None):
"""
Collect all page numbers belonging to this section and its descendants.
"""
if pages is None:
pages = []
pages.append(node["page"])
for child in node["children"]:
collect_subtree_pages(child, pages)
return pages
def flatten_outline_pages(nodes, pages=None):
"""
Collect all outline entry page numbers in document order.
"""
if pages is None:
pages = []
for node in nodes:
pages.append(node["page"])
flatten_outline_pages(node["children"], pages)
return pages
def find_end_page(target_node, outline_tree, total_pages):
"""
End page = first outline page after the last descendant page.
"""
subtree_pages = collect_subtree_pages(target_node)
last_section_page = max(subtree_pages)
all_outline_pages = flatten_outline_pages(outline_tree)
all_outline_pages = sorted(set(all_outline_pages))
for page in all_outline_pages:
if page > last_section_page:
return page
return total_pages
def extract_section():
reader = PdfReader(INPUT_PDF)
writer = PdfWriter()
outline_tree = build_outline_tree(reader)
total_pages = len(reader.pages)
target = find_section(outline_tree, TARGET_SECTION_TITLE)
if not target:
raise ValueError(f"Section '{TARGET_SECTION_TITLE}' not found")
start_page = target["page"]
end_page = find_end_page(target, outline_tree, total_pages)
for p in range(start_page, end_page):
page = reader.pages[p]
crop_page(page)
writer.add_page(page)
with open(OUTPUT_PDF, "wb") as f:
writer.write(f)
print(
f"Extracted '{target['title']}' "
f"(pages {start_page + 1}{end_page})"
)
if __name__ == "__main__":
extract_section()

View File

@@ -94,7 +94,7 @@ def main():
print("No speech detected. Try again.") print("No speech detected. Try again.")
continue continue
# print(f"You said: {text}") print(f"You said: {text}")
pyperclip.copy(text) pyperclip.copy(text)
if (args.nollm == False): if (args.nollm == False):