## Copyright 2023, Spirion LLC All Rights Reserved ## Dependencies ## # pip install boxsdk # pip install "boxsdk[jwt]" from boxsdk import JWTAuth, Client from pathlib import Path # Parse match location from sys import argv # Read input from the Spirion script from datetime import datetime # Handle timestamps for logging ## Customization ## # Path to JWT credentials path_to_config = "C:\\temp\\creds.json" # Log file parent directory path_to_log = "C:\\temp" # Box Shield label to apply shield_label = "Label Name" ## Functions ## # Logging # def get_log_path(): now = datetime.now() stamp = now.strftime("%Y%m%d%H") log_name = f"box_api_log_{stamp}" log_loc = f"{path_to_log}\\{log_name}.log" log_path = Path(log_loc) return log_path def get_log_timestamp(): now = datetime.now() timestamp = now.strftime("%Y-%m-%d %H:%M:%S") return timestamp # Create log file if it doesn't exist # Initial write confirms API session context # NOTE: logs rotate hourly by default def create_log(log_path): f = open(log_path, "a+") if log_path.stat().st_size == 0: f.write(f"Box API connection established at {get_log_timestamp()}") user = client.user('me').get() f.write("\n\n" + "Authenticated User") f.write("\n" + f"Name: {user.name}") f.write("\n" + f"Login: {user.login}") f.write("\n\n" + "TIMESTAMP" + "\t\t" + "LOG MESSAGE" + "\n" + "--------------------------------------" + "\n") else: pass f.close() # Log function for added verbosity - shows all input for API calls def log_variables(spirion_path, result_path, search_term, search_type, match_root, owner_id, shield_label, log_path): f = open(log_path, "a+") f.write(get_log_timestamp() + "\t" + f"INPUT\n\t\t\t=====\n\t\t\tMatch Path: {spirion_path}\n\t\t\tCleaned Path: {result_path}\n\t\t\tSearch Term: {search_term}\n\t\t\tSearch Type: {search_type}\n\t\t\tBox Email: {match_root}\n\t\t\tBox ID: {owner_id}\n\t\t\tClassification: {shield_label}\n\t\t\tLog Path: {log_path}\n\t\t\t=====\n") f.close() # Log function for file path returned in Spirion match def log_match(result_path, log_path, match_owner_id): f = open(log_path, "a+") f.write(get_log_timestamp() + "\t" + f"SEARCHING -- '{result_path}' for Box Account - {match_owner_id}\n") f.close() # Log function if classification succeeds def log_label_applied(shield_label, file_id): f = open(log_path, "a+") f.write(get_log_timestamp() + "\t" + f"SUCCESS -- Classification '{shield_label}' applied for Box File - {file_id}\n") f.close() # Log function if Box API search fails for user account in Spirion match def log_search_failed(search_term, search_type, match_root): f = open(log_path, "a+") f.write(get_log_timestamp() + "\t" + f"FAILURE -- Box File '{search_term}.{search_type}' not found for {match_root}\n") f.close() # Log function if Box Shield label isn't found (by name) def log_label_failed(shield_label, file_id): f = open(log_path, "a+") f.write(get_log_timestamp() + "\t" + f"FAILURE -- Classification '{shield_label}' could not be applied to Box File - {file_id}\n") f.close() # Box API calls # def box_get_authenticated_client(configPath, log_path): """ Get an authenticated Box client for a JWT service account Arguments: configPath {str} -- Path to the JSON config file for your Box JWT app Returns: Client -- A Box client for the JWT service account """ try: auth = JWTAuth.from_settings_file(configPath) auth.authenticate_instance() return Client(auth) except: f = open(f"C:\\temp\\DELETE ME.txt", "a+") f.write(get_log_timestamp() + "\t" + "ERROR -- configPath must be a path to the JSON config file for your Box JWT app\n\nDELETE THIS FILE AFTER CORRECTING") f.close() def get_owner_id(match_root): """ API user search to get owner Box id -- filter should always retrun 1 result """ match_owner = client.users(user_type='all', filter_term=match_root) match_owner_id = match_owner.next().id return match_owner_id def box_confirm_match(match_root, path_object, search_term, search_type): """ Confirms matches by evaluating: 1. The owned_by user, to match with the Spirion path's root 2. The nested file structure using the API's path_collection Needed for Box API filename searches, which cannot be filtered by parent folder(?) NOTE: Refine search query to better limit scope https://developer.box.com/reference/get-search/ """ match_owner_id = get_owner_id(match_root) # Tuple from Spirion result path used as cross reference match_tuple = path_object.parts # Get length of tuple to... tuple_length = len(match_tuple) - 1 # ... cut first and last item off of Spirion results path (as a list) match_list = list(match_tuple[1:tuple_length]) # API file search to get Box id of match location -- filter returns all files of the same name/type box_files = user_client.search().query(query=search_term, file_extensions=[search_type], owner_user_ids=[match_owner_id]) # Evaluate Box API search results to validate Spirion match location for box_file in box_files: box_list = [] box_folders = box_file.path_collection # box_account = box_file.owned_by.login api_array = box_folders["entries"][1:] # Grab folders from each result for box_folder in api_array: match_string = str(box_folder) folder_name = match_string[match_string.find("(")+1:match_string.find(")")] box_list.append(folder_name) if box_list == match_list and box_file.name == f"{search_term}.{search_type}": log_match(result_path, log_path, match_owner_id) return box_file def box_set_classification(box_file, shield_label): """ Apply classificaiton label to single file using the Box File id Overwrites current classification """ if box_file is None: log_search_failed(search_term, search_type, match_root) else: classification = { "Box__Security__Classification__Key": shield_label } try: applied_metadata = user_client.file(file_id=box_file.id).metadata(scope="enterprise", template="securityClassification-6VMVochwUWo").set(classification) if applied_metadata: log_label_applied(shield_label, box_file.id) except: log_label_failed(shield_label, box_file.id) ## Main ## if __name__ == "__main__": # Handle input # # Define match location from Spirion scan result spirion_path = " ".join(argv[1:]) # Remove quotes (if present -- depends on Spirion script output) if spirion_path[0] == spirion_path[-1] == "'": spirion_path = spirion_path[1:-1] # Remove 'Box Sync: ' prefix from Spirion match location result_path = spirion_path[10:] # Parse input path # # Create object from pathlib path = Path(result_path) # User account from base of result_path match_root = path.parts[0] # Search term for API calls -- filename without extension search_term = path.stem # File type to limit search results (without the dot) # NOTE: partition removes any Spirion applied tags (e.g. mdb files) search_type = path.suffix[1:].partition(" ")[0] # Set log path from variable in Configuration section log_path = get_log_path() # Box API context # # Authenticate service account client = box_get_authenticated_client(path_to_config, log_path) # Proxy auth for "as-user" API context relative to Spirion match root spirion_user = client.user(user_id = get_owner_id(match_root)) # Functions reference "user_client" for API calls completed as match owner user_client = client.as_user(spirion_user) # Generate log (hourly) if it doesn't exist already create_log(log_path) # Uncomment below for added log verbosity # log_variables(spirion_path, result_path, search_term, search_type, match_root, get_owner_id(match_root), shield_label, log_path) # Box Shield labeling # # 1. Confirm match location box_file = box_confirm_match(match_root, path, search_term, search_type) # 2. Apply the classification defined by "shield_label" box_set_classification(box_file, shield_label)