update on various python tools

2026-01-19 15:21:44 +00:00
parent 31684ecded
commit 7891956d52
7 changed files with 164 additions and 1 deletions
--- a/python/.vscode/launch.json
+++ b/python/.vscode/launch.json
@@ -0,0 +1,16 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python Debugger: Current File",
            "type": "debugpy",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal"
        }
    ]
 }
--- a/python/.vscode/settings.json
+++ b/python/.vscode/settings.json
@@ -0,0 +1,5 @@
 {
    "python-envs.defaultEnvManager": "ms-python.python:conda",
    "python-envs.defaultPackageManager": "ms-python.python:conda",
    "python-envs.pythonProjects": []
 }
--- a/python/floatingpoint/main.py
+++ b/python/floatingpoint/main.py
@@ -0,0 +1,13 @@
 import struct
 def float_bin(number):
    return bin(struct.unpack('<I', struct.pack('<f', number))[0])
 def main():
    print(float_bin(5.75)) 
 if __name__ == "__main__":
    main()
--- a/python/pdfcreator/extracted_section.pdf
+++ b/python/pdfcreator/extracted_section.pdf
--- a/python/pdfcreator/input.pdf
+++ b/python/pdfcreator/input.pdf
--- a/python/pdfcreator/main.py
+++ b/python/pdfcreator/main.py
@@ -0,0 +1,129 @@
 from pypdf import PdfReader, PdfWriter
 INPUT_PDF = "pdfcreator/input.pdf"
 OUTPUT_PDF = "pdfcreator/extracted_section.pdf"
 TARGET_SECTION_TITLE = "1.3"
 def crop_page(page, top_ratio=0.12, bottom_ratio=0.12):
    """
    Crop the visible area of a PDF page.
    top_ratio: fraction of page height to remove from the top
    bottom_ratio: fraction of page height to remove from the bottom
    """
    llx, lly, urx, ury = page.mediabox
    height = ury - lly
    new_lly = lly + height * bottom_ratio
    new_ury = ury - height * top_ratio
    if new_ury <= new_lly:
        raise ValueError("Invalid crop ratios: page height would be negative")
    page.cropbox.lower_left = (llx, new_lly)
    page.cropbox.upper_right = (urx, new_ury)
 def build_outline_tree(reader):
    def _build(outline):
        tree = []
        for item in outline:
            if isinstance(item, list):
                tree[-1]["children"] = _build(item)
            else:
                tree.append({
                    "title": item.title.strip(),
                    "page": reader.get_destination_page_number(item),
                    "children": []
                })
        return tree
    return _build(reader.outline)
 def find_section(nodes, title):
    for node in nodes:
        if node["title"] == title or node["title"].startswith(title + " "):
            return node
        found = find_section(node["children"], title)
        if found:
            return found
    return None
 def collect_subtree_pages(node, pages=None):
    """
    Collect all page numbers belonging to this section and its descendants.
    """
    if pages is None:
        pages = []
    pages.append(node["page"])
    for child in node["children"]:
        collect_subtree_pages(child, pages)
    return pages
 def flatten_outline_pages(nodes, pages=None):
    """
    Collect all outline entry page numbers in document order.
    """
    if pages is None:
        pages = []
    for node in nodes:
        pages.append(node["page"])
        flatten_outline_pages(node["children"], pages)
    return pages
 def find_end_page(target_node, outline_tree, total_pages):
    """
    End page = first outline page after the last descendant page.
    """
    subtree_pages = collect_subtree_pages(target_node)
    last_section_page = max(subtree_pages)
    all_outline_pages = flatten_outline_pages(outline_tree)
    all_outline_pages = sorted(set(all_outline_pages))
    for page in all_outline_pages:
        if page > last_section_page:
            return page
    return total_pages
 def extract_section():
    reader = PdfReader(INPUT_PDF)
    writer = PdfWriter()
    outline_tree = build_outline_tree(reader)
    total_pages = len(reader.pages)
    target = find_section(outline_tree, TARGET_SECTION_TITLE)
    if not target:
        raise ValueError(f"Section '{TARGET_SECTION_TITLE}' not found")
    start_page = target["page"]
    end_page = find_end_page(target, outline_tree, total_pages)
    for p in range(start_page, end_page):
        page = reader.pages[p]
        crop_page(page)
        writer.add_page(page)
    with open(OUTPUT_PDF, "wb") as f:
        writer.write(f)
    print(
        f"Extracted '{target['title']}' "
        f"(pages {start_page + 1}–{end_page})"
    )
 if __name__ == "__main__":
    extract_section()
--- a/python/tool-speechtotext/assistant.py
+++ b/python/tool-speechtotext/assistant.py
@@ -94,7 +94,7 @@ def main():
                print("No speech detected. Try again.")
                continue
-            # print(f"You said: {text}")
+            print(f"You said: {text}")
            pyperclip.copy(text)
            if (args.nollm == False):