update on various python tools
This commit is contained in:
16
python/.vscode/launch.json
vendored
Normal file
16
python/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
5
python/.vscode/settings.json
vendored
Normal file
5
python/.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"python-envs.defaultEnvManager": "ms-python.python:conda",
|
||||||
|
"python-envs.defaultPackageManager": "ms-python.python:conda",
|
||||||
|
"python-envs.pythonProjects": []
|
||||||
|
}
|
||||||
13
python/floatingpoint/main.py
Normal file
13
python/floatingpoint/main.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
import struct
|
||||||
|
|
||||||
|
def float_bin(number):
|
||||||
|
return bin(struct.unpack('<I', struct.pack('<f', number))[0])
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
print(float_bin(5.75))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
BIN
python/pdfcreator/extracted_section.pdf
Normal file
BIN
python/pdfcreator/extracted_section.pdf
Normal file
Binary file not shown.
BIN
python/pdfcreator/input.pdf
Normal file
BIN
python/pdfcreator/input.pdf
Normal file
Binary file not shown.
129
python/pdfcreator/main.py
Normal file
129
python/pdfcreator/main.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
from pypdf import PdfReader, PdfWriter
|
||||||
|
|
||||||
|
INPUT_PDF = "pdfcreator/input.pdf"
|
||||||
|
OUTPUT_PDF = "pdfcreator/extracted_section.pdf"
|
||||||
|
TARGET_SECTION_TITLE = "1.3"
|
||||||
|
|
||||||
|
|
||||||
|
def crop_page(page, top_ratio=0.12, bottom_ratio=0.12):
|
||||||
|
"""
|
||||||
|
Crop the visible area of a PDF page.
|
||||||
|
|
||||||
|
top_ratio: fraction of page height to remove from the top
|
||||||
|
bottom_ratio: fraction of page height to remove from the bottom
|
||||||
|
"""
|
||||||
|
llx, lly, urx, ury = page.mediabox
|
||||||
|
height = ury - lly
|
||||||
|
|
||||||
|
new_lly = lly + height * bottom_ratio
|
||||||
|
new_ury = ury - height * top_ratio
|
||||||
|
|
||||||
|
if new_ury <= new_lly:
|
||||||
|
raise ValueError("Invalid crop ratios: page height would be negative")
|
||||||
|
|
||||||
|
page.cropbox.lower_left = (llx, new_lly)
|
||||||
|
page.cropbox.upper_right = (urx, new_ury)
|
||||||
|
|
||||||
|
|
||||||
|
def build_outline_tree(reader):
|
||||||
|
def _build(outline):
|
||||||
|
tree = []
|
||||||
|
for item in outline:
|
||||||
|
if isinstance(item, list):
|
||||||
|
tree[-1]["children"] = _build(item)
|
||||||
|
else:
|
||||||
|
tree.append({
|
||||||
|
"title": item.title.strip(),
|
||||||
|
"page": reader.get_destination_page_number(item),
|
||||||
|
"children": []
|
||||||
|
})
|
||||||
|
return tree
|
||||||
|
|
||||||
|
return _build(reader.outline)
|
||||||
|
|
||||||
|
|
||||||
|
def find_section(nodes, title):
|
||||||
|
for node in nodes:
|
||||||
|
if node["title"] == title or node["title"].startswith(title + " "):
|
||||||
|
return node
|
||||||
|
found = find_section(node["children"], title)
|
||||||
|
if found:
|
||||||
|
return found
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def collect_subtree_pages(node, pages=None):
|
||||||
|
"""
|
||||||
|
Collect all page numbers belonging to this section and its descendants.
|
||||||
|
"""
|
||||||
|
if pages is None:
|
||||||
|
pages = []
|
||||||
|
|
||||||
|
pages.append(node["page"])
|
||||||
|
for child in node["children"]:
|
||||||
|
collect_subtree_pages(child, pages)
|
||||||
|
|
||||||
|
return pages
|
||||||
|
|
||||||
|
|
||||||
|
def flatten_outline_pages(nodes, pages=None):
|
||||||
|
"""
|
||||||
|
Collect all outline entry page numbers in document order.
|
||||||
|
"""
|
||||||
|
if pages is None:
|
||||||
|
pages = []
|
||||||
|
|
||||||
|
for node in nodes:
|
||||||
|
pages.append(node["page"])
|
||||||
|
flatten_outline_pages(node["children"], pages)
|
||||||
|
|
||||||
|
return pages
|
||||||
|
|
||||||
|
|
||||||
|
def find_end_page(target_node, outline_tree, total_pages):
|
||||||
|
"""
|
||||||
|
End page = first outline page after the last descendant page.
|
||||||
|
"""
|
||||||
|
subtree_pages = collect_subtree_pages(target_node)
|
||||||
|
last_section_page = max(subtree_pages)
|
||||||
|
|
||||||
|
all_outline_pages = flatten_outline_pages(outline_tree)
|
||||||
|
all_outline_pages = sorted(set(all_outline_pages))
|
||||||
|
|
||||||
|
for page in all_outline_pages:
|
||||||
|
if page > last_section_page:
|
||||||
|
return page
|
||||||
|
|
||||||
|
return total_pages
|
||||||
|
|
||||||
|
|
||||||
|
def extract_section():
|
||||||
|
reader = PdfReader(INPUT_PDF)
|
||||||
|
writer = PdfWriter()
|
||||||
|
|
||||||
|
outline_tree = build_outline_tree(reader)
|
||||||
|
total_pages = len(reader.pages)
|
||||||
|
|
||||||
|
target = find_section(outline_tree, TARGET_SECTION_TITLE)
|
||||||
|
if not target:
|
||||||
|
raise ValueError(f"Section '{TARGET_SECTION_TITLE}' not found")
|
||||||
|
|
||||||
|
start_page = target["page"]
|
||||||
|
end_page = find_end_page(target, outline_tree, total_pages)
|
||||||
|
|
||||||
|
for p in range(start_page, end_page):
|
||||||
|
page = reader.pages[p]
|
||||||
|
crop_page(page)
|
||||||
|
writer.add_page(page)
|
||||||
|
|
||||||
|
with open(OUTPUT_PDF, "wb") as f:
|
||||||
|
writer.write(f)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"Extracted '{target['title']}' "
|
||||||
|
f"(pages {start_page + 1}–{end_page})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
extract_section()
|
||||||
@@ -94,7 +94,7 @@ def main():
|
|||||||
print("No speech detected. Try again.")
|
print("No speech detected. Try again.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# print(f"You said: {text}")
|
print(f"You said: {text}")
|
||||||
pyperclip.copy(text)
|
pyperclip.copy(text)
|
||||||
|
|
||||||
if (args.nollm == False):
|
if (args.nollm == False):
|
||||||
|
|||||||
Reference in New Issue
Block a user