Initial commit
This commit is contained in:
250
tools/clipboard/save_from_clipboard.py
Normal file
250
tools/clipboard/save_from_clipboard.py
Normal file
@@ -0,0 +1,250 @@
|
||||
import os
|
||||
import datetime
|
||||
import re
|
||||
import base64
|
||||
import shutil
|
||||
import requests
|
||||
import random
|
||||
from urllib.parse import unquote, urlparse
|
||||
from io import BytesIO
|
||||
|
||||
# Third-party libraries
|
||||
import pyperclip
|
||||
from PIL import ImageGrab, Image
|
||||
import win32clipboard
|
||||
from bs4 import BeautifulSoup
|
||||
from markdownify import markdownify as md
|
||||
|
||||
# Configuration
|
||||
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
MARKDOWN_DIR = os.path.join(PROJECT_ROOT, 'notebook', 'markdowns')
|
||||
IMAGES_DIR = os.path.join(PROJECT_ROOT, 'notebook', 'images')
|
||||
|
||||
def sanitize_filename(name):
|
||||
"""Sanitize string to be used as filename"""
|
||||
safe_name = "".join([c for c in name if c.isalnum() or c in (' ', '-', '_')]).strip()
|
||||
return safe_name[:100]
|
||||
|
||||
def get_html_from_clipboard():
|
||||
"""Extract HTML format from Windows Clipboard"""
|
||||
try:
|
||||
win32clipboard.OpenClipboard()
|
||||
|
||||
# Register/Get HTML Format ID
|
||||
html_format = win32clipboard.RegisterClipboardFormat("HTML Format")
|
||||
|
||||
if win32clipboard.IsClipboardFormatAvailable(html_format):
|
||||
raw_data = win32clipboard.GetClipboardData(html_format)
|
||||
win32clipboard.CloseClipboard()
|
||||
|
||||
# Raw data contains headers, we need to decode and parse them
|
||||
# Example Header:
|
||||
# Version:0.9
|
||||
# StartHTML:00000097
|
||||
# EndHTML:00000170
|
||||
# StartFragment:00000133
|
||||
# EndFragment:00000134
|
||||
|
||||
try:
|
||||
html_str = raw_data.decode('utf-8')
|
||||
except:
|
||||
html_str = raw_data.decode('cp1252', errors='ignore')
|
||||
|
||||
# Extract the actual HTML fragment using regex or string splitting
|
||||
start_html = re.search(r'StartHTML:(\d+)', html_str)
|
||||
end_html = re.search(r'EndHTML:(\d+)', html_str)
|
||||
|
||||
if start_html and end_html:
|
||||
start_idx = int(start_html.group(1))
|
||||
end_idx = int(end_html.group(1))
|
||||
return html_str[start_idx:end_idx]
|
||||
|
||||
return html_str # Fallback to full string if parsing fails
|
||||
|
||||
win32clipboard.CloseClipboard()
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error reading clipboard HTML: {e}")
|
||||
try:
|
||||
win32clipboard.CloseClipboard()
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
def process_html_images(html_content, timestamp):
|
||||
"""Find images in HTML, save them locally, and update src"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
for img in soup.find_all('img'):
|
||||
src = img.get('src')
|
||||
if not src:
|
||||
continue
|
||||
|
||||
new_filename = None
|
||||
|
||||
# Case 1: Base64 Image
|
||||
if src.startswith('data:image'):
|
||||
try:
|
||||
# Extract header and data
|
||||
# data:image/png;base64,xxxx
|
||||
match = re.match(r'data:image/(\w+);base64,(.+)', src)
|
||||
if match:
|
||||
ext = match.group(1)
|
||||
if ext == 'jpeg': ext = 'jpg'
|
||||
data_str = match.group(2)
|
||||
|
||||
img_data = base64.b64decode(data_str)
|
||||
new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}.{ext}"
|
||||
dest_path = os.path.join(IMAGES_DIR, new_filename)
|
||||
|
||||
with open(dest_path, 'wb') as f:
|
||||
f.write(img_data)
|
||||
print(f" - Saved Base64 image: {new_filename}")
|
||||
except Exception as e:
|
||||
print(f" - Failed to process base64 image: {e}")
|
||||
|
||||
# Case 2: Local File (file://)
|
||||
elif src.startswith('file://'):
|
||||
try:
|
||||
# Remove file:// prefix and decode URL encoded chars
|
||||
local_path = unquote(src[7:])
|
||||
# On Windows, it might be file:///C:/... -> /C:/... -> C:/...
|
||||
if local_path.startswith('/') and ':' in local_path:
|
||||
local_path = local_path[1:]
|
||||
|
||||
if os.path.exists(local_path):
|
||||
ext = os.path.splitext(local_path)[1]
|
||||
if not ext: ext = '.png'
|
||||
|
||||
new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}{ext}"
|
||||
dest_path = os.path.join(IMAGES_DIR, new_filename)
|
||||
|
||||
shutil.copy2(local_path, dest_path)
|
||||
print(f" - Copied local image: {new_filename}")
|
||||
except Exception as e:
|
||||
print(f" - Failed to copy local image: {e}")
|
||||
|
||||
# Case 3: Remote URL (http/https)
|
||||
# Optional: We could download it, but for now let's keep it as is
|
||||
# or download if it's a direct image link.
|
||||
# Let's try to download to make it truly local/offline
|
||||
elif src.startswith('http'):
|
||||
try:
|
||||
# Basic check if it's an image
|
||||
# We skip downloading if user wants to keep remote links, but usually local is better for notes
|
||||
# Let's try downloading
|
||||
response = requests.get(src, timeout=5)
|
||||
if response.status_code == 200 and 'image' in response.headers.get('content-type', ''):
|
||||
ext = '.jpg' # default
|
||||
if 'png' in response.headers['content-type']: ext = '.png'
|
||||
elif 'gif' in response.headers['content-type']: ext = '.gif'
|
||||
|
||||
new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}{ext}"
|
||||
dest_path = os.path.join(IMAGES_DIR, new_filename)
|
||||
|
||||
with open(dest_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
print(f" - Downloaded remote image: {new_filename}")
|
||||
except Exception as e:
|
||||
print(f" - Failed to download remote image: {e}")
|
||||
|
||||
# Update src in HTML if we saved a file
|
||||
if new_filename:
|
||||
# We use bare filename as requested
|
||||
img['src'] = new_filename
|
||||
|
||||
return str(soup)
|
||||
|
||||
def extract_title_from_markdown(md_text):
|
||||
"""Try to find the first H1 header to use as title"""
|
||||
lines = md_text.strip().split('\n')
|
||||
for line in lines[:10]:
|
||||
if line.strip().startswith('# '):
|
||||
return sanitize_filename(line.strip()[2:])
|
||||
return None
|
||||
|
||||
def save_clipboard_content():
|
||||
# Ensure directories exist
|
||||
os.makedirs(MARKDOWN_DIR, exist_ok=True)
|
||||
os.makedirs(IMAGES_DIR, exist_ok=True)
|
||||
|
||||
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
|
||||
# Priority 1: Check for HTML (Rich Text)
|
||||
# This covers most "Copy from Note App" scenarios
|
||||
html_content = get_html_from_clipboard()
|
||||
|
||||
if html_content:
|
||||
print("Detected HTML/Rich Text in clipboard...")
|
||||
|
||||
# 1. Process Images in HTML (Save to disk)
|
||||
processed_html = process_html_images(html_content, timestamp)
|
||||
|
||||
# 2. Convert to Markdown
|
||||
# heading_style='atx' ensures # Header style instead of underlines
|
||||
md_text = md(processed_html, heading_style='atx')
|
||||
|
||||
# 3. Clean up extra newlines often introduced by conversion
|
||||
md_text = re.sub(r'\n{3,}', '\n\n', md_text).strip()
|
||||
|
||||
# 4. Determine Filename
|
||||
title = extract_title_from_markdown(md_text)
|
||||
markdown_filename = f"{title}.md" if title else f"Note_{timestamp}.md"
|
||||
|
||||
# 5. Save
|
||||
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
||||
# Avoid collision
|
||||
if os.path.exists(markdown_path):
|
||||
markdown_filename = f"{title}_{timestamp}.md" if title else f"Note_{timestamp}_1.md"
|
||||
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
||||
|
||||
with open(markdown_path, 'w', encoding='utf-8') as f:
|
||||
f.write(md_text)
|
||||
|
||||
print(f"Saved Rich Text Note to: {markdown_path}")
|
||||
return
|
||||
|
||||
# Priority 2: Check for Bitmap Image (Direct Screenshot Copy)
|
||||
try:
|
||||
image = ImageGrab.grabclipboard()
|
||||
if image and not isinstance(image, list):
|
||||
print("Detected BITMAP image in clipboard...")
|
||||
image_filename = f"clip_image_{timestamp}.png"
|
||||
image_path = os.path.join(IMAGES_DIR, image_filename)
|
||||
image.save(image_path, 'PNG')
|
||||
|
||||
markdown_filename = f"Image_{timestamp}.md"
|
||||
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
||||
|
||||
# Use bare filename as requested
|
||||
content = f"# Clipboard Image {timestamp}\n\n"
|
||||
|
||||
with open(markdown_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
print(f"Saved image to: {image_path}")
|
||||
print(f"Saved markdown to: {markdown_path}")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Error checking bitmap: {e}")
|
||||
|
||||
# Priority 3: Fallback to Plain Text
|
||||
text = pyperclip.paste()
|
||||
if text:
|
||||
print("Detected PLAIN TEXT in clipboard...")
|
||||
title = extract_title_from_markdown(text)
|
||||
markdown_filename = f"{title}.md" if title else f"Note_{timestamp}.md"
|
||||
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
||||
|
||||
if os.path.exists(markdown_path):
|
||||
markdown_filename = f"{title}_{timestamp}.md" if title else f"Note_{timestamp}_1.md"
|
||||
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
||||
|
||||
with open(markdown_path, 'w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
print(f"Saved Text Note to: {markdown_path}")
|
||||
else:
|
||||
print("Clipboard is empty.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
save_clipboard_content()
|
||||
Reference in New Issue
Block a user