#!/usr/bin/env python3 # encoding: utf-8 ## # This script checks all media files in the repository for whether they've been modified or added without updating the file tracking image, sound, and music copyright ## import argparse import contextlib import csv import hashlib from operator import itemgetter import os from pathlib import Path from subprocess import check_output import sys ## # csv file layout: # [0] = current git commit date # [1] = file path, relative to the repository root # [2] = license name(s) # [3] = authorship information # [4] = notes # [5] = new git commit date, if different from the value in [0] # [6] = current md5 hash ## def do_git(file): return str(check_output(["git", "log", "-1", "--format=%ad", "--date=format:%Y/%m/%d", file]), 'UTF-8').rstrip('\n') def do_hash(file): md5 = hashlib.md5() with open(file, 'rb') as f: while True: data = f.read(65536) if not data: break md5.update(data) return str(md5.hexdigest()) ## # program logic start ## args = argparse.ArgumentParser() args.add_argument("--repo", default=".", help="The directory of the Wesnoth repository to run this script against.") args.add_argument("--output", default="output.csv", help="The file to write the results of this script to.") args.add_argument("--input", default="copyrights.csv", help="The file to read the existing copyright data from.") options = args.parse_args() os.chdir(options.repo) with contextlib.suppress(FileNotFoundError): os.remove(options.output) csv_data = {} added = [] changed = [] unchanged = [] removed = [] with open(options.input) as csvfile: reader = csv.reader(csvfile) for row in reader: if row[0] == "Date": continue file = row[1] if not os.path.exists(file): removed.append(file) continue csv_data[file] = row for root, _, files in os.walk(options.repo): for filename in files: filetype = Path(filename).suffix if filetype == ".png" or filetype == ".jpg" or filetype == ".webp" or filetype == ".wav" or filetype == ".ogg": file = os.path.normpath(os.path.join(root, filename)) hash = do_hash(file) if not file in csv_data: added.append(["", file, "", "", "", do_git(file), hash]) elif csv_data[file][6] != hash: csv_data[file][5] = do_git(file) csv_data[file][6] = hash changed.append(csv_data[file]) else: unchanged.append(csv_data[file]) added.sort(key=itemgetter(1)) changed.sort(key=itemgetter(1)) unchanged.sort(key=itemgetter(1)) final_output = added + changed + unchanged if options.output != "": with open(options.output, 'w') as f: f.write("Date,File,License,Author - Real Name(other name);Real Name(other name);etc,Notes,Needs Update,MD5\n") for row in final_output: f.write(",".join(row)+"\n") else: for row in final_output: print(",".join(row)) if len(removed) > 0: print("There are "+str(len(removed))+" removed images") if len(added) > 0 or len(changed) > 0: print("There are "+str(len(added))+" new images") print("There are "+str(len(changed))+" changed images") sys.exit(1)