update on various python tools

2026-01-19 15:21:44 +00:00
parent 31684ecded
commit 7891956d52
7 changed files with 164 additions and 1 deletions
--- a/python/.vscode/launch.json
+++ b/python/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        
+        {
+            "name": "Python Debugger: Current File",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
--- a/python/.vscode/settings.json
+++ b/python/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "python-envs.defaultEnvManager": "ms-python.python:conda",
+    "python-envs.defaultPackageManager": "ms-python.python:conda",
+    "python-envs.pythonProjects": []
+}
--- a/python/floatingpoint/main.py
+++ b/python/floatingpoint/main.py
@@ -0,0 +1,13 @@
+
+import struct
+
+def float_bin(number):
+    return bin(struct.unpack('<I', struct.pack('<f', number))[0])
+
+def main():
+
+    print(float_bin(5.75)) 
+
+
+if __name__ == "__main__":
+    main()
--- a/python/pdfcreator/extracted_section.pdf
+++ b/python/pdfcreator/extracted_section.pdf
--- a/python/pdfcreator/input.pdf
+++ b/python/pdfcreator/input.pdf
--- a/python/pdfcreator/main.py
+++ b/python/pdfcreator/main.py
@@ -0,0 +1,129 @@
+from pypdf import PdfReader, PdfWriter
+
+INPUT_PDF = "pdfcreator/input.pdf"
+OUTPUT_PDF = "pdfcreator/extracted_section.pdf"
+TARGET_SECTION_TITLE = "1.3"
+
+
+def crop_page(page, top_ratio=0.12, bottom_ratio=0.12):
+    """
+    Crop the visible area of a PDF page.
+
+    top_ratio: fraction of page height to remove from the top
+    bottom_ratio: fraction of page height to remove from the bottom
+    """
+    llx, lly, urx, ury = page.mediabox
+    height = ury - lly
+
+    new_lly = lly + height * bottom_ratio
+    new_ury = ury - height * top_ratio
+
+    if new_ury <= new_lly:
+        raise ValueError("Invalid crop ratios: page height would be negative")
+
+    page.cropbox.lower_left = (llx, new_lly)
+    page.cropbox.upper_right = (urx, new_ury)
+
+
+def build_outline_tree(reader):
+    def _build(outline):
+        tree = []
+        for item in outline:
+            if isinstance(item, list):
+                tree[-1]["children"] = _build(item)
+            else:
+                tree.append({
+                    "title": item.title.strip(),
+                    "page": reader.get_destination_page_number(item),
+                    "children": []
+                })
+        return tree
+
+    return _build(reader.outline)
+
+
+def find_section(nodes, title):
+    for node in nodes:
+        if node["title"] == title or node["title"].startswith(title + " "):
+            return node
+        found = find_section(node["children"], title)
+        if found:
+            return found
+    return None
+
+
+def collect_subtree_pages(node, pages=None):
+    """
+    Collect all page numbers belonging to this section and its descendants.
+    """
+    if pages is None:
+        pages = []
+
+    pages.append(node["page"])
+    for child in node["children"]:
+        collect_subtree_pages(child, pages)
+
+    return pages
+
+
+def flatten_outline_pages(nodes, pages=None):
+    """
+    Collect all outline entry page numbers in document order.
+    """
+    if pages is None:
+        pages = []
+
+    for node in nodes:
+        pages.append(node["page"])
+        flatten_outline_pages(node["children"], pages)
+
+    return pages
+
+
+def find_end_page(target_node, outline_tree, total_pages):
+    """
+    End page = first outline page after the last descendant page.
+    """
+    subtree_pages = collect_subtree_pages(target_node)
+    last_section_page = max(subtree_pages)
+
+    all_outline_pages = flatten_outline_pages(outline_tree)
+    all_outline_pages = sorted(set(all_outline_pages))
+
+    for page in all_outline_pages:
+        if page > last_section_page:
+            return page
+
+    return total_pages
+
+
+def extract_section():
+    reader = PdfReader(INPUT_PDF)
+    writer = PdfWriter()
+
+    outline_tree = build_outline_tree(reader)
+    total_pages = len(reader.pages)
+
+    target = find_section(outline_tree, TARGET_SECTION_TITLE)
+    if not target:
+        raise ValueError(f"Section '{TARGET_SECTION_TITLE}' not found")
+
+    start_page = target["page"]
+    end_page = find_end_page(target, outline_tree, total_pages)
+
+    for p in range(start_page, end_page):
+        page = reader.pages[p]
+        crop_page(page)
+        writer.add_page(page)
+
+    with open(OUTPUT_PDF, "wb") as f:
+        writer.write(f)
+
+    print(
+        f"Extracted '{target['title']}' "
+        f"(pages {start_page + 1}–{end_page})"
+    )
+
+
+if __name__ == "__main__":
+    extract_section()
--- a/python/tool-speechtotext/assistant.py
+++ b/python/tool-speechtotext/assistant.py
@@ -94,7 +94,7 @@ def main():
                print("No speech detected. Try again.")
                continue

-            # print(f"You said: {text}")
+            print(f"You said: {text}")
            pyperclip.copy(text)

            if (args.nollm == False):