misc python code

2024-12-20 21:50:09 +00:00
parent 6dc40ba6af
commit d3dc84416d
44 changed files with 24998 additions and 0 deletions
--- a/code/misc/python/scripts/MITM/driver.py
+++ b/code/misc/python/scripts/MITM/driver.py
@@ -0,0 +1,209 @@
+import asyncio
+import sys
+import re
+import zlib
+import json
+import csv
+import requests
+from mitmproxy import http, ctx
+from mitmproxy import options
+from mitmproxy.tools import dump
+import sqlite3
+from bs4 import BeautifulSoup
+
+# Method 1: Load blocked words from a CSV file
+def load_blocked_words_from_csv(file_path):
+    """
+    Loads blocked words from a CSV file.
+
+    Args:
+        file_path (str): The path to the CSV file containing blocked words.
+
+    Returns:
+        list: A list of blocked words loaded from the CSV file.
+    """
+    blocked_words = []
+    with open(file_path, newline='') as csvfile:
+        reader = csv.reader(csvfile)
+        for row in reader:
+            blocked_words.extend(row)
+    return blocked_words
+
+# Method 2: Load blocked words from a web service
+def load_blocked_words_from_web_service(url):
+    """
+    Fetches blocked words from a web service.
+
+    Args:
+        url (str): The URL of the web service to fetch blocked words from.
+
+    Returns:
+        list: A list of blocked words.
+    """
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an error for bad status codes
+        blocked_words = response.json()  # Assuming the response is a JSON array of words
+        return blocked_words
+    except requests.RequestException as e:
+        ctx.log.error(f"Error fetching blocked words: {e}")
+        return []
+
+# Load blocked words from the web service
+blocked_words = ['fdhfghrth']#load_blocked_words_from_csv('Terms-to-Block.csv')
+#blocked_words = load_blocked_words_from_web_service('https://example.com/api/blocked_words')
+
+# Compile a regex pattern for blocked words
+pattern = re.compile(r'\b(?:' + '|'.join(re.escape(word) for word in blocked_words) + r')\b', re.IGNORECASE)
+
+def check_string_in_list(target_string, string_list):
+    """
+    Checks if any string in a list is present in the target string.
+
+    Args:
+        target_string (str): The string to search within.
+        string_list (list): The list of strings to search for.
+
+    Returns:
+        bool: True if any string in the list is found in the target string, False otherwise.
+    """
+    return any(item in target_string for item in string_list)
+
+def search_blocked_words_in_json(data, pattern):
+    """
+    Recursively searches for blocked words in a JSON object.
+
+    Args:
+        data (dict or list or str): The JSON data to search within.
+        pattern (re.Pattern): The compiled regex pattern for blocked words.
+
+    Returns:
+        list: A list of matches found in the JSON data.
+    """
+    matches = []
+    if isinstance(data, dict):
+        for key, value in data.items():
+            matches.extend(search_blocked_words_in_json(value, pattern))
+    elif isinstance(data, list):
+        for item in data:
+            matches.extend(search_blocked_words_in_json(item, pattern))
+    elif isinstance(data, str):
+        matches.extend(pattern.findall(data))
+    return matches
+
+class RequestLogger:
+    """
+    A class to log and filter HTTP requests and responses.
+    """
+    def __init__(self):
+        """
+        Initializes the RequestLogger and loads the whitelist.
+        """
+        self.whitelist = []
+        asyncio.create_task(self.load_whitelist_periodically())
+
+    async def load_whitelist(self):
+        """
+        Loads the whitelist from the SQLite database.
+
+        Returns:
+            list: A list of whitelisted URLs.
+        """
+        conn = sqlite3.connect('whitelist.db')
+        c = conn.cursor()
+        c.execute("SELECT url FROM whitelist WHERE status='approved'")
+        self.whitelist = [row[0] for row in c.fetchall()]
+        conn.close()
+
+    async def load_whitelist_periodically(self):
+        """
+        Periodically loads the whitelist every 20 seconds.
+        """
+        while True:
+            await self.load_whitelist()
+            await asyncio.sleep(20)
+
+    def request(self, flow: http.HTTPFlow):
+        """
+        Handles HTTP requests. Blocks requests to non-whitelisted URLs.
+
+        Args:
+            flow (http.HTTPFlow): The HTTP flow object.
+        """
+        if flow.request.pretty_host not in self.whitelist:
+            ctx.log.info('Blocked request to %s' % flow.request.host)
+            flow.response = http.Response.make(
+                403,  # Forbidden status code
+                b"Blocked by URL whitelist",  # Response content
+                {"Content-Type": "text/plain"}  # Response headers
+            )
+            return
+
+    def response(self, flow: http.HTTPFlow):
+        """
+        Handles HTTP responses. Searches for blocked words in the response content.
+
+        Args:
+            flow (http.HTTPFlow): The HTTP flow object.
+        """
+        content_type = flow.response.headers.get("Content-Type", "")
+        content = flow.response.content
+
+        # Handle compressed content
+        if "gzip" in flow.response.headers.get("Content-Encoding", ""):
+            content = zlib.decompress(content, zlib.MAX_WBITS | 16)
+
+        if "deflate" in flow.response.headers.get("Content-Encoding", ""):
+            content = zlib.decompress(content)
+
+        # Decode content to text
+        if isinstance(content, bytes):
+            content = content.decode('utf-8', errors='ignore')
+
+        # Check if content is HTML
+        if "text/html" in content_type:
+            soup = BeautifulSoup(content, 'html.parser')
+            text = soup.get_text()
+            matches = pattern.findall(text)
+        elif "application/json" in content_type:
+            data = json.loads(content)
+            matches = search_blocked_words_in_json(data, pattern)
+        else:
+            matches = []
+
+        # Search for matches in the text content
+        if matches:
+            matched_keyword = matches[0]
+            flow.response.text = 'response contains filtered contents'
+            ctx.log.error(matched_keyword)
+
+            flow.response = http.Response.make(
+                200,  # (optional) status code
+                b"Hello World",  # (optional) content
+                {"Content-Type": "text/html"},  # (optional) headers
+            )
+
+async def start_proxy(host, port):
+    """
+    Starts the proxy server.
+
+    Args:
+        host (str): The host address to listen on.
+        port (int): The port to listen on.
+    """
+    opts = options.Options(listen_host=host, listen_port=port)
+
+    master = dump.DumpMaster(
+        opts,
+        with_termlog=False,
+        with_dumper=False,
+    )
+    master.addons.add(RequestLogger())
+
+    await master.run()
+    return master
+
+if __name__ == '__main__':
+    host = sys.argv[1]
+    port = int(sys.argv[2])
+    asyncio.run(start_proxy(host, port))