#!/usr/bin/env python3 # encoding: utf-8 ## # This script checks all media files in the repository for whether they've been modified or added without updating the file tracking image, sound, and music copyright ## import argparse import contextlib import csv import hashlib from operator import itemgetter import os from pathlib import Path from subprocess import check_output import sys ## # csv file layout: # [0] = current git commit date # [1] = file path, relative to the repository root # [2] = license name(s) # [3] = authorship information # [4] = notes # [5] = new git commit date, if different from the value in [0] # [6] = current md5 hash ## def do_git(file): return str(check_output(["git", "log", "-1", "--format=%ad", "--date=format:%Y/%m/%d", file]), 'UTF-8').rstrip('\n') def do_hash(file): md5 = hashlib.md5() with open(file, 'rb') as f: while True: data = f.read(65536) if not data: break md5.update(data) return str(md5.hexdigest()) ## # program logic start ## args = argparse.ArgumentParser() args.add_argument("--repo", default=".", help="The directory of the Wesnoth repository to run this script against.") args.add_argument("--output", default="output.csv", help="The file to write the results of this script to.") args.add_argument("--input", default="copyrights.csv", help="The file to read the existing copyright data from.") options = args.parse_args() os.chdir(options.repo) with contextlib.suppress(FileNotFoundError): os.remove(options.output) csv_data = {} added = [] changed = [] unchanged = [] removed = [] # Sanity-check that the input is in the same order as the output would be csvfile_needs_sorting = False with open(options.input) as csvfile: reader = csv.reader(csvfile) previous_file = "" for row in reader: if row[0] == "Date": continue file = row[1] if file < previous_file: csvfile_needs_sorting = True previous_file = file if not os.path.exists(file): removed.append(file) continue csv_data[file] = row for root, _, files in os.walk(options.repo): for filename in files: filetype = Path(filename).suffix if filetype == ".png" or filetype == ".jpg" or filetype == ".webp" or filetype == ".wav" or filetype == ".ogg": file = os.path.normpath(os.path.join(root, filename)) hash = do_hash(file) if not file in csv_data: added.append(["", file, "", "", "", do_git(file), hash]) elif csv_data[file][6] != hash: csv_data[file][5] = do_git(file) csv_data[file][6] = hash changed.append(csv_data[file]) else: unchanged.append(csv_data[file]) added.sort(key=itemgetter(1)) changed.sort(key=itemgetter(1)) unchanged.sort(key=itemgetter(1)) final_output = added + changed + unchanged if options.output != "": with open(options.output, 'w') as f: f.write("Date,File,License,Author - Real Name(other name);Real Name(other name);etc,Notes,Needs Update,MD5\n") for row in final_output: f.write(",".join(row)+"\n") else: for row in final_output: print(",".join(row)) any_check_failed = False if len(removed) > 0: any_check_failed = True print("There are "+str(len(removed))+" removed images") if len(added) > 0 or len(changed) > 0: any_check_failed = True print("There are "+str(len(added))+" new images") print("There are "+str(len(changed))+" changed images") if csvfile_needs_sorting: any_check_failed = True print("The input file isn’t sorted by filename") print(" Changed or newly added lines are put at the top for easy editing,\n but you should run the tool again after editing to sort the file.") if any_check_failed: sys.exit(1)