Initial commit

2026-03-08 01:34:54 +08:00
commit 1f104f73c8
441 changed files with 64911 additions and 0 deletions
--- a/tools/clipboard/save_from_clipboard.py
+++ b/tools/clipboard/save_from_clipboard.py
@@ -0,0 +1,250 @@
+import os
+import datetime
+import re
+import base64
+import shutil
+import requests
+import random
+from urllib.parse import unquote, urlparse
+from io import BytesIO
+
+# Third-party libraries
+import pyperclip
+from PIL import ImageGrab, Image
+import win32clipboard
+from bs4 import BeautifulSoup
+from markdownify import markdownify as md
+
+# Configuration
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+MARKDOWN_DIR = os.path.join(PROJECT_ROOT, 'notebook', 'markdowns')
+IMAGES_DIR = os.path.join(PROJECT_ROOT, 'notebook', 'images')
+
+def sanitize_filename(name):
+    """Sanitize string to be used as filename"""
+    safe_name = "".join([c for c in name if c.isalnum() or c in (' ', '-', '_')]).strip()
+    return safe_name[:100]
+
+def get_html_from_clipboard():
+    """Extract HTML format from Windows Clipboard"""
+    try:
+        win32clipboard.OpenClipboard()
+        
+        # Register/Get HTML Format ID
+        html_format = win32clipboard.RegisterClipboardFormat("HTML Format")
+        
+        if win32clipboard.IsClipboardFormatAvailable(html_format):
+            raw_data = win32clipboard.GetClipboardData(html_format)
+            win32clipboard.CloseClipboard()
+            
+            # Raw data contains headers, we need to decode and parse them
+            # Example Header:
+            # Version:0.9
+            # StartHTML:00000097
+            # EndHTML:00000170
+            # StartFragment:00000133
+            # EndFragment:00000134
+            
+            try:
+                html_str = raw_data.decode('utf-8')
+            except:
+                html_str = raw_data.decode('cp1252', errors='ignore')
+                
+            # Extract the actual HTML fragment using regex or string splitting
+            start_html = re.search(r'StartHTML:(\d+)', html_str)
+            end_html = re.search(r'EndHTML:(\d+)', html_str)
+            
+            if start_html and end_html:
+                start_idx = int(start_html.group(1))
+                end_idx = int(end_html.group(1))
+                return html_str[start_idx:end_idx]
+                
+            return html_str # Fallback to full string if parsing fails
+            
+        win32clipboard.CloseClipboard()
+        return None
+    except Exception as e:
+        print(f"Error reading clipboard HTML: {e}")
+        try:
+            win32clipboard.CloseClipboard()
+        except:
+            pass
+        return None
+
+def process_html_images(html_content, timestamp):
+    """Find images in HTML, save them locally, and update src"""
+    soup = BeautifulSoup(html_content, 'html.parser')
+    
+    for img in soup.find_all('img'):
+        src = img.get('src')
+        if not src:
+            continue
+            
+        new_filename = None
+        
+        # Case 1: Base64 Image
+        if src.startswith('data:image'):
+            try:
+                # Extract header and data
+                # data:image/png;base64,xxxx
+                match = re.match(r'data:image/(\w+);base64,(.+)', src)
+                if match:
+                    ext = match.group(1)
+                    if ext == 'jpeg': ext = 'jpg'
+                    data_str = match.group(2)
+                    
+                    img_data = base64.b64decode(data_str)
+                    new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}.{ext}"
+                    dest_path = os.path.join(IMAGES_DIR, new_filename)
+                    
+                    with open(dest_path, 'wb') as f:
+                        f.write(img_data)
+                    print(f"  - Saved Base64 image: {new_filename}")
+            except Exception as e:
+                print(f"  - Failed to process base64 image: {e}")
+
+        # Case 2: Local File (file://)
+        elif src.startswith('file://'):
+            try:
+                # Remove file:// prefix and decode URL encoded chars
+                local_path = unquote(src[7:])
+                # On Windows, it might be file:///C:/... -> /C:/... -> C:/...
+                if local_path.startswith('/') and ':' in local_path:
+                    local_path = local_path[1:]
+                
+                if os.path.exists(local_path):
+                    ext = os.path.splitext(local_path)[1]
+                    if not ext: ext = '.png'
+                    
+                    new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}{ext}"
+                    dest_path = os.path.join(IMAGES_DIR, new_filename)
+                    
+                    shutil.copy2(local_path, dest_path)
+                    print(f"  - Copied local image: {new_filename}")
+            except Exception as e:
+                print(f"  - Failed to copy local image: {e}")
+                
+        # Case 3: Remote URL (http/https)
+        # Optional: We could download it, but for now let's keep it as is 
+        # or download if it's a direct image link.
+        # Let's try to download to make it truly local/offline
+        elif src.startswith('http'):
+            try:
+                # Basic check if it's an image
+                # We skip downloading if user wants to keep remote links, but usually local is better for notes
+                # Let's try downloading
+                response = requests.get(src, timeout=5)
+                if response.status_code == 200 and 'image' in response.headers.get('content-type', ''):
+                    ext = '.jpg' # default
+                    if 'png' in response.headers['content-type']: ext = '.png'
+                    elif 'gif' in response.headers['content-type']: ext = '.gif'
+                    
+                    new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}{ext}"
+                    dest_path = os.path.join(IMAGES_DIR, new_filename)
+                    
+                    with open(dest_path, 'wb') as f:
+                        f.write(response.content)
+                    print(f"  - Downloaded remote image: {new_filename}")
+            except Exception as e:
+                print(f"  - Failed to download remote image: {e}")
+
+        # Update src in HTML if we saved a file
+        if new_filename:
+            # We use bare filename as requested
+            img['src'] = new_filename
+
+    return str(soup)
+
+def extract_title_from_markdown(md_text):
+    """Try to find the first H1 header to use as title"""
+    lines = md_text.strip().split('\n')
+    for line in lines[:10]:
+        if line.strip().startswith('# '):
+            return sanitize_filename(line.strip()[2:])
+    return None
+
+def save_clipboard_content():
+    # Ensure directories exist
+    os.makedirs(MARKDOWN_DIR, exist_ok=True)
+    os.makedirs(IMAGES_DIR, exist_ok=True)
+
+    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
+    
+    # Priority 1: Check for HTML (Rich Text)
+    # This covers most "Copy from Note App" scenarios
+    html_content = get_html_from_clipboard()
+    
+    if html_content:
+        print("Detected HTML/Rich Text in clipboard...")
+        
+        # 1. Process Images in HTML (Save to disk)
+        processed_html = process_html_images(html_content, timestamp)
+        
+        # 2. Convert to Markdown
+        # heading_style='atx' ensures # Header style instead of underlines
+        md_text = md(processed_html, heading_style='atx')
+        
+        # 3. Clean up extra newlines often introduced by conversion
+        md_text = re.sub(r'\n{3,}', '\n\n', md_text).strip()
+        
+        # 4. Determine Filename
+        title = extract_title_from_markdown(md_text)
+        markdown_filename = f"{title}.md" if title else f"Note_{timestamp}.md"
+        
+        # 5. Save
+        markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
+        # Avoid collision
+        if os.path.exists(markdown_path):
+            markdown_filename = f"{title}_{timestamp}.md" if title else f"Note_{timestamp}_1.md"
+            markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
+            
+        with open(markdown_path, 'w', encoding='utf-8') as f:
+            f.write(md_text)
+            
+        print(f"Saved Rich Text Note to: {markdown_path}")
+        return
+
+    # Priority 2: Check for Bitmap Image (Direct Screenshot Copy)
+    try:
+        image = ImageGrab.grabclipboard()
+        if image and not isinstance(image, list):
+            print("Detected BITMAP image in clipboard...")
+            image_filename = f"clip_image_{timestamp}.png"
+            image_path = os.path.join(IMAGES_DIR, image_filename)
+            image.save(image_path, 'PNG')
+            
+            markdown_filename = f"Image_{timestamp}.md"
+            markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
+            
+            # Use bare filename as requested
+            content = f"# Clipboard Image {timestamp}\n\n![{image_filename}]({image_filename})"
+            
+            with open(markdown_path, 'w', encoding='utf-8') as f:
+                f.write(content)
+            
+            print(f"Saved image to: {image_path}")
+            print(f"Saved markdown to: {markdown_path}")
+            return
+    except Exception as e:
+        print(f"Error checking bitmap: {e}")
+
+    # Priority 3: Fallback to Plain Text
+    text = pyperclip.paste()
+    if text:
+        print("Detected PLAIN TEXT in clipboard...")
+        title = extract_title_from_markdown(text)
+        markdown_filename = f"{title}.md" if title else f"Note_{timestamp}.md"
+        markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
+        
+        if os.path.exists(markdown_path):
+            markdown_filename = f"{title}_{timestamp}.md" if title else f"Note_{timestamp}_1.md"
+            markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
+
+        with open(markdown_path, 'w', encoding='utf-8') as f:
+            f.write(text)
+        print(f"Saved Text Note to: {markdown_path}")
+    else:
+        print("Clipboard is empty.")
+
+if __name__ == "__main__":
+    save_clipboard_content()