## Copyright 2024, Spirion LLC All Rights Reserved ##

import os
from sys import argv
from io import BytesIO
from pathlib import Path
from datetime import datetime

# pip install pillow pypdf python-docx openpyxl python-pptx exiftool

# Images
from PIL import Image, ImageDraw, ImageFont
from exiftool import ExifToolHelper, ExifTool
# PDFs
from pypdf import PdfWriter, PdfReader
# Word
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# Excel
import openpyxl
# PowerPoint
from pptx import Presentation

# # # # # # # # # # # # #  C U S T O M I Z E  # # # # # # # # # # # #
#                                                                   #
# MARK COUNT: the number of times the watermark appears,            #
# a valaue between 1 and 5 for most document sizes.                 #
MARK_COUNT = 1                                                      #
#                                                                   #
# WATERMARK TEXT: the string to be applied as a watermark.          #
#   NOTE: font size decreases as more chars are added; keep         #
#   watermarks between 3 and 30 characters for images and pdfs.     #
WATERMARK_TEXT = "SPIRION WATERMARK"                                #
#                                                                   #
# LOG PATH: the path for logging.                                   #
LOG_PATH = "c:\\temp"                                               #
#                                                                   #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

def process_file(file_path, WATERMARK_TEXT):
    _, file_extension = os.path.splitext(file_path)
    file_extension = file_extension.lower()
    marked = False
    # TODO: Add GIF support
    if file_extension in ['.jpg', '.jpeg', '.png', '.tiff', '.tif', '.bmp']:
        file_dimensions = get_file_dimensions(file_path)
        watermark_stamp = generate_watermark(WATERMARK_TEXT, file_dimensions)
        if file_extension in ['.jpg', '.jpeg', '.png']:
            has_metadata = get_metadata(file_path)
        else:
            has_metadata = False
        marked = mark_image(file_path, file_extension, watermark_stamp)
        if has_metadata == True:
            set_metadata(file_path)
    elif file_extension == '.pdf':
        file_dimensions = get_file_dimensions(file_path)
        watermark_stamp = generate_watermark(WATERMARK_TEXT, file_dimensions)
        marked = mark_pdf(file_path, watermark_stamp, file_dimensions)
    elif file_extension == '.docx':
        marked = mark_docx(file_path, WATERMARK_TEXT)
    elif file_extension == '.xlsx':
        marked = mark_xlsx(file_path, WATERMARK_TEXT)
    elif file_extension == '.pptx':
        file_dimensions = get_file_dimensions(file_path)
        watermark_stamp = generate_watermark(WATERMARK_TEXT, file_dimensions)        
        marked = mark_pptx(file_path, watermark_stamp)
    else:
        marked = mark_other(file_path)
    if marked:
        log_event(log_location, f"MARKED ---- Successfully applied content mark ('{ file_path }')")

def mark_image(file_path, file_extension, watermark_stamp):
    if file_extension == '.jpg':
        file_extension = '.jpeg'
    if file_extension == '.tif':
        file_extension = '.tiff'
    if file_extension == '.png':
        target_image = Image.open(file_path).convert('RGBA')
    else:
        target_image = Image.open(file_path)
    target_image.paste(watermark_stamp, (0,0), watermark_stamp)
    target_image.save(file_path, format = file_extension[1:])
    target_image.close()

    return True
    
# PIL image saving doesn't preserve XMP metadata
# TODO: Replace "tags.xmp" with in-memory temp
def get_metadata(file_path):
    with ExifTool() as et:
        metadata = et.execute("-tagsFromFile", file_path , "-xmp", "tmp.xmp")

    return True

def set_metadata(file_path):
    with ExifTool() as et:
        et.execute("-tagsFromFile", "tmp.xmp", "-xmp", file_path, "-overwrite_original")       
    os.remove("tmp.xmp")

def mark_pdf(file_path, watermark_stamp, dimensions):
    image_stream = BytesIO()
    watermark_stamp.save(image_stream, 'PDF')
    stamp = PdfReader(image_stream).pages[0]
    writer = PdfWriter(clone_from=file_path)
    for page in writer.pages:
        page_dimensions = [page.mediabox.width, page.mediabox.height]
        if page_dimensions != dimensions:
            new_stamp = generate_watermark(WATERMARK_TEXT, page_dimensions)
            new_stamp.save(image_stream, 'PDF')
            stamp = PdfReader(image_stream).pages[0]
        page.merge_page(stamp, over=True)
    writer.write(file_path)

    return True

def mark_docx(file_path, WATERMARK_TEXT):
    doc = Document(file_path)
    for section in doc.sections:
        header = section.header
        if header is not None:
            label_docx(header, WATERMARK_TEXT)
        footer = section.footer
        if footer is not None:
            label_docx(footer, WATERMARK_TEXT)

    doc.save(file_path)
    
    return True

def label_docx(part, WATERMARK_TEXT):
    # Check if the header/footer already contains text
    if part.paragraphs:
        existing_text = part.paragraphs[0].text
        part.paragraphs[0].text = f"{ WATERMARK_TEXT }\n{ existing_text }"
    else:
        paragraph = part.add_paragraph()
        run = paragraph.add_run(WATERMARK_TEXT)
        run.font.size = Pt(10)  # Adjust font size as needed
        paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

def mark_xlsx(file_path, WATERMARK_TEXT):
    # Insert as "footer" to minimize impact to existing content
    wb = openpyxl.load_workbook(file_path)
    for sheet in wb.worksheets:
        footer_row = sheet.max_row + 1
        sheet.insert_rows(footer_row)
        cell = sheet['A' + str(footer_row)]
        cell.value = WATERMARK_TEXT
    
    wb.save(file_path)
    
    return True

def mark_pptx(file_path, watermark_stamp):
    prs = Presentation(file_path) 
    image_stream = BytesIO()
    watermark_stamp.save(image_stream, 'PNG')
    image_stream.seek(0) 
    # NOTE: First arg of "insert(int, image)" controls z axis
    for slide in prs.slides:
        picture = slide.shapes.add_picture(image_stream, 0, 0)
        slide.shapes._spTree.remove(picture._element)
        slide.shapes._spTree.insert(99, picture._element)
    prs.save(file_path)
    
    return True

def mark_other(file_path):
    log_event(log_location, f"ERROR ----- Unsupported file type ('{ file_path }')")
    
    return False

## Dimensions ##

# NOTE: Some "get_dimensions" functions went unused ("docx" and "xlsx").
# I had originally wanted to embed background images to all file types,
# and hadn't considered that PDFs can have differet image sizes per page.

def get_file_dimensions(file_path):
    _, file_extension = os.path.splitext(file_path)
    file_extension = file_extension.lower()

    if file_extension in ['.jpg', '.jpeg', '.png', '.tiff', '.tif', '.bmp', '.gif']:
        return get_image_dimensions(file_path)
    elif file_extension == '.pdf':
        return get_pdf_dimensions(file_path)
    elif file_extension == '.docx':
        return get_docx_dimensions(file_path)
    elif file_extension == '.xlsx':
        pass
    elif file_extension == '.pptx':
        return get_pptx_dimensions(file_path)

def get_image_dimensions(file_path):
    with Image.open(file_path) as img:
        return img.size

def get_pdf_dimensions(file_path):
    reader = PdfReader(file_path)
    box = reader.pages[0].mediabox
    dimensions = [box.width, box.height]
    
    return dimensions

def get_docx_dimensions(file_path):
    doc = Document(file_path)
    sections = doc.sections
    if sections:
        section = sections[0]
        page_width = section.page_width.pt
        page_height = section.page_height.pt
        return page_width, page_height
    else:
        log_event(log_location, f"ERROR ----- No sections found in file ('{ file_path }')")
        return None

def get_xlsx_dimensions(file_path):
    workbook = openpyxl.load_workbook(file_path)
    sheet = workbook.active
    max_row = sheet.max_row
    max_column = sheet.max_column
    
    return max_row, max_column

def get_pptx_dimensions(file_path):
    presentation = Presentation(file_path)
    slide_width = int(presentation.slide_width.pt)
    slide_height = int(presentation.slide_height.pt)
    
    return slide_width, slide_height

## Drawing ##

# Font scaling 
def get_font_size(text, width, height):
    if width < height:
        font_size = 1.25 * width // len(text)
    else:
        font_size = 1.25 * height // len(text)

    return int(font_size)

def get_tile_size(font_size, width, height, MARK_COUNT):
    # "MARK_COUNT" is set in the CUSTOMIZE section of this script
    tile_x = width
    tile_y = height // MARK_COUNT

    return tile_x, tile_y
    
# TODO: Devise different patterns for watermark stamp
def generate_watermark(text, dimensions):
    width, height = dimensions
    width = int(width)
    height = int(height)
    image = Image.new('RGBA', (width, height), (255, 255, 255, 0))
    font_size = get_font_size(text, width, height)
    tile_size = get_tile_size(font_size, width, height, MARK_COUNT)
    font = ImageFont.load_default(font_size)
    draw = ImageDraw.Draw(image, 'RGBA')
    # NOTE: Tile width is currently set to page width, meaning
    # 1 column of stamps until adjusted above (get_tile_size)
    num_tiles_x = (width + tile_size[0] - 1) // tile_size[0]
    num_tiles_y = (height + tile_size[1] - 1) // tile_size[1]
    for i in range(num_tiles_x):
        for j in range(num_tiles_y):
            x_tile = i * tile_size[0]
            y_tile = j * tile_size[1]
            text_bbox = draw.textbbox((0, 0), text, font=font)
            text_width = text_bbox[2] - text_bbox[0]
            text_height = text_bbox[3] - text_bbox[1]
            x_text = x_tile + (tile_size[0] - text_width) // 2
            y_text = y_tile + (tile_size[1] - text_height) // 2
            # Adjust "fill" value to modify RGBA of watermark
            draw.text((x_text, y_text), text, font=font, fill=(160, 0,0, 64))

    return image

## Logging ##

def get_log_path():
    now = datetime.now()
    stamp = now.strftime("%Y%m%d%H")
    log_name = f"spirion_marking_{ stamp }"
    log_loc = f"{ LOG_PATH }\\{ log_name }.log"
    the_path = Path(log_loc)
    
    return the_path

def get_log_timestamp():
    now = datetime.now()
    timestamp = now.strftime("%Y-%m-%d %H:%M:%S")
    
    return timestamp
    
# Creates log if it doesn't exist
# NOTE: file rotate hourly by default
def set_log(LOG_PATH):
    f = open(LOG_PATH, "a+")
    if LOG_PATH.stat().st_size == 0:
        f.write("TIMESTAMP" + "\t\t" + "LOG MESSAGE" + "\n" + "--------------------------------------" + "\n")
    else:
        pass
    f.close()

def log_event(log_location, message):
    f = open(log_location, "a+")
    f.write(get_log_timestamp() + "\t" + f"{ message }\n")
    f.close()

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    
if __name__ == "__main__":
    # Set log path from variable in "CUSTOMIZE"
    log_location = get_log_path()
    set_log(log_location)
    # Handle input #
    # Define match location from Spirion scan result
    spirion_path = " ".join(argv[1:])
    # Remove quotes (if present -- depends on Spirion script output)
    if spirion_path[0] == spirion_path[-1] == "'":
        spirion_path = spirion_path[1:-1]
    marked_file = process_file(spirion_path, WATERMARK_TEXT)

## Copyright 2024, Spirion LLC All Rights Reserved ##