251 lines
9.7 KiB
Python
251 lines
9.7 KiB
Python
import os
|
|
import datetime
|
|
import re
|
|
import base64
|
|
import shutil
|
|
import requests
|
|
import random
|
|
from urllib.parse import unquote, urlparse
|
|
from io import BytesIO
|
|
|
|
# Third-party libraries
|
|
import pyperclip
|
|
from PIL import ImageGrab, Image
|
|
import win32clipboard
|
|
from bs4 import BeautifulSoup
|
|
from markdownify import markdownify as md
|
|
|
|
# Configuration
|
|
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
MARKDOWN_DIR = os.path.join(PROJECT_ROOT, 'notebook', 'markdowns')
|
|
IMAGES_DIR = os.path.join(PROJECT_ROOT, 'notebook', 'images')
|
|
|
|
def sanitize_filename(name):
|
|
"""Sanitize string to be used as filename"""
|
|
safe_name = "".join([c for c in name if c.isalnum() or c in (' ', '-', '_')]).strip()
|
|
return safe_name[:100]
|
|
|
|
def get_html_from_clipboard():
|
|
"""Extract HTML format from Windows Clipboard"""
|
|
try:
|
|
win32clipboard.OpenClipboard()
|
|
|
|
# Register/Get HTML Format ID
|
|
html_format = win32clipboard.RegisterClipboardFormat("HTML Format")
|
|
|
|
if win32clipboard.IsClipboardFormatAvailable(html_format):
|
|
raw_data = win32clipboard.GetClipboardData(html_format)
|
|
win32clipboard.CloseClipboard()
|
|
|
|
# Raw data contains headers, we need to decode and parse them
|
|
# Example Header:
|
|
# Version:0.9
|
|
# StartHTML:00000097
|
|
# EndHTML:00000170
|
|
# StartFragment:00000133
|
|
# EndFragment:00000134
|
|
|
|
try:
|
|
html_str = raw_data.decode('utf-8')
|
|
except:
|
|
html_str = raw_data.decode('cp1252', errors='ignore')
|
|
|
|
# Extract the actual HTML fragment using regex or string splitting
|
|
start_html = re.search(r'StartHTML:(\d+)', html_str)
|
|
end_html = re.search(r'EndHTML:(\d+)', html_str)
|
|
|
|
if start_html and end_html:
|
|
start_idx = int(start_html.group(1))
|
|
end_idx = int(end_html.group(1))
|
|
return html_str[start_idx:end_idx]
|
|
|
|
return html_str # Fallback to full string if parsing fails
|
|
|
|
win32clipboard.CloseClipboard()
|
|
return None
|
|
except Exception as e:
|
|
print(f"Error reading clipboard HTML: {e}")
|
|
try:
|
|
win32clipboard.CloseClipboard()
|
|
except:
|
|
pass
|
|
return None
|
|
|
|
def process_html_images(html_content, timestamp):
|
|
"""Find images in HTML, save them locally, and update src"""
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
|
for img in soup.find_all('img'):
|
|
src = img.get('src')
|
|
if not src:
|
|
continue
|
|
|
|
new_filename = None
|
|
|
|
# Case 1: Base64 Image
|
|
if src.startswith('data:image'):
|
|
try:
|
|
# Extract header and data
|
|
# data:image/png;base64,xxxx
|
|
match = re.match(r'data:image/(\w+);base64,(.+)', src)
|
|
if match:
|
|
ext = match.group(1)
|
|
if ext == 'jpeg': ext = 'jpg'
|
|
data_str = match.group(2)
|
|
|
|
img_data = base64.b64decode(data_str)
|
|
new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}.{ext}"
|
|
dest_path = os.path.join(IMAGES_DIR, new_filename)
|
|
|
|
with open(dest_path, 'wb') as f:
|
|
f.write(img_data)
|
|
print(f" - Saved Base64 image: {new_filename}")
|
|
except Exception as e:
|
|
print(f" - Failed to process base64 image: {e}")
|
|
|
|
# Case 2: Local File (file://)
|
|
elif src.startswith('file://'):
|
|
try:
|
|
# Remove file:// prefix and decode URL encoded chars
|
|
local_path = unquote(src[7:])
|
|
# On Windows, it might be file:///C:/... -> /C:/... -> C:/...
|
|
if local_path.startswith('/') and ':' in local_path:
|
|
local_path = local_path[1:]
|
|
|
|
if os.path.exists(local_path):
|
|
ext = os.path.splitext(local_path)[1]
|
|
if not ext: ext = '.png'
|
|
|
|
new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}{ext}"
|
|
dest_path = os.path.join(IMAGES_DIR, new_filename)
|
|
|
|
shutil.copy2(local_path, dest_path)
|
|
print(f" - Copied local image: {new_filename}")
|
|
except Exception as e:
|
|
print(f" - Failed to copy local image: {e}")
|
|
|
|
# Case 3: Remote URL (http/https)
|
|
# Optional: We could download it, but for now let's keep it as is
|
|
# or download if it's a direct image link.
|
|
# Let's try to download to make it truly local/offline
|
|
elif src.startswith('http'):
|
|
try:
|
|
# Basic check if it's an image
|
|
# We skip downloading if user wants to keep remote links, but usually local is better for notes
|
|
# Let's try downloading
|
|
response = requests.get(src, timeout=5)
|
|
if response.status_code == 200 and 'image' in response.headers.get('content-type', ''):
|
|
ext = '.jpg' # default
|
|
if 'png' in response.headers['content-type']: ext = '.png'
|
|
elif 'gif' in response.headers['content-type']: ext = '.gif'
|
|
|
|
new_filename = f"paste_img_{timestamp}_{random.randint(1000,9999)}{ext}"
|
|
dest_path = os.path.join(IMAGES_DIR, new_filename)
|
|
|
|
with open(dest_path, 'wb') as f:
|
|
f.write(response.content)
|
|
print(f" - Downloaded remote image: {new_filename}")
|
|
except Exception as e:
|
|
print(f" - Failed to download remote image: {e}")
|
|
|
|
# Update src in HTML if we saved a file
|
|
if new_filename:
|
|
# We use bare filename as requested
|
|
img['src'] = new_filename
|
|
|
|
return str(soup)
|
|
|
|
def extract_title_from_markdown(md_text):
|
|
"""Try to find the first H1 header to use as title"""
|
|
lines = md_text.strip().split('\n')
|
|
for line in lines[:10]:
|
|
if line.strip().startswith('# '):
|
|
return sanitize_filename(line.strip()[2:])
|
|
return None
|
|
|
|
def save_clipboard_content():
|
|
# Ensure directories exist
|
|
os.makedirs(MARKDOWN_DIR, exist_ok=True)
|
|
os.makedirs(IMAGES_DIR, exist_ok=True)
|
|
|
|
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
|
|
# Priority 1: Check for HTML (Rich Text)
|
|
# This covers most "Copy from Note App" scenarios
|
|
html_content = get_html_from_clipboard()
|
|
|
|
if html_content:
|
|
print("Detected HTML/Rich Text in clipboard...")
|
|
|
|
# 1. Process Images in HTML (Save to disk)
|
|
processed_html = process_html_images(html_content, timestamp)
|
|
|
|
# 2. Convert to Markdown
|
|
# heading_style='atx' ensures # Header style instead of underlines
|
|
md_text = md(processed_html, heading_style='atx')
|
|
|
|
# 3. Clean up extra newlines often introduced by conversion
|
|
md_text = re.sub(r'\n{3,}', '\n\n', md_text).strip()
|
|
|
|
# 4. Determine Filename
|
|
title = extract_title_from_markdown(md_text)
|
|
markdown_filename = f"{title}.md" if title else f"Note_{timestamp}.md"
|
|
|
|
# 5. Save
|
|
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
|
# Avoid collision
|
|
if os.path.exists(markdown_path):
|
|
markdown_filename = f"{title}_{timestamp}.md" if title else f"Note_{timestamp}_1.md"
|
|
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
|
|
|
with open(markdown_path, 'w', encoding='utf-8') as f:
|
|
f.write(md_text)
|
|
|
|
print(f"Saved Rich Text Note to: {markdown_path}")
|
|
return
|
|
|
|
# Priority 2: Check for Bitmap Image (Direct Screenshot Copy)
|
|
try:
|
|
image = ImageGrab.grabclipboard()
|
|
if image and not isinstance(image, list):
|
|
print("Detected BITMAP image in clipboard...")
|
|
image_filename = f"clip_image_{timestamp}.png"
|
|
image_path = os.path.join(IMAGES_DIR, image_filename)
|
|
image.save(image_path, 'PNG')
|
|
|
|
markdown_filename = f"Image_{timestamp}.md"
|
|
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
|
|
|
# Use bare filename as requested
|
|
content = f"# Clipboard Image {timestamp}\n\n"
|
|
|
|
with open(markdown_path, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
print(f"Saved image to: {image_path}")
|
|
print(f"Saved markdown to: {markdown_path}")
|
|
return
|
|
except Exception as e:
|
|
print(f"Error checking bitmap: {e}")
|
|
|
|
# Priority 3: Fallback to Plain Text
|
|
text = pyperclip.paste()
|
|
if text:
|
|
print("Detected PLAIN TEXT in clipboard...")
|
|
title = extract_title_from_markdown(text)
|
|
markdown_filename = f"{title}.md" if title else f"Note_{timestamp}.md"
|
|
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
|
|
|
if os.path.exists(markdown_path):
|
|
markdown_filename = f"{title}_{timestamp}.md" if title else f"Note_{timestamp}_1.md"
|
|
markdown_path = os.path.join(MARKDOWN_DIR, markdown_filename)
|
|
|
|
with open(markdown_path, 'w', encoding='utf-8') as f:
|
|
f.write(text)
|
|
print(f"Saved Text Note to: {markdown_path}")
|
|
else:
|
|
print("Clipboard is empty.")
|
|
|
|
if __name__ == "__main__":
|
|
save_clipboard_content()
|