mirror of
https://github.com/wesnoth/wesnoth
synced 2025-04-23 21:19:32 +00:00

When the tool finds new or changed files, it puts them at the top of the output .csv file for easy editing. However, this means that those lines move when update_copyrights is run again. It also means that any two PRs touching images are likely to have merge conflicts, as they'll change line 2 of copyrights.csv. Make the CI fail unless the file has been sorted again after editing.
130 lines
3.8 KiB
Python
Executable File
130 lines
3.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
# encoding: utf-8
|
||
|
||
##
|
||
# This script checks all media files in the repository for whether they've been modified or added without updating the file tracking image, sound, and music copyright
|
||
##
|
||
|
||
import argparse
|
||
import contextlib
|
||
import csv
|
||
import hashlib
|
||
from operator import itemgetter
|
||
import os
|
||
from pathlib import Path
|
||
from subprocess import check_output
|
||
import sys
|
||
|
||
##
|
||
# csv file layout:
|
||
# [0] = current git commit date
|
||
# [1] = file path, relative to the repository root
|
||
# [2] = license name(s)
|
||
# [3] = authorship information
|
||
# [4] = notes
|
||
# [5] = new git commit date, if different from the value in [0]
|
||
# [6] = current md5 hash
|
||
##
|
||
|
||
def do_git(file):
|
||
return str(check_output(["git", "log", "-1", "--format=%ad", "--date=format:%Y/%m/%d", file]), 'UTF-8').rstrip('\n')
|
||
|
||
def do_hash(file):
|
||
md5 = hashlib.md5()
|
||
with open(file, 'rb') as f:
|
||
while True:
|
||
data = f.read(65536)
|
||
if not data:
|
||
break
|
||
md5.update(data)
|
||
return str(md5.hexdigest())
|
||
|
||
##
|
||
# program logic start
|
||
##
|
||
|
||
args = argparse.ArgumentParser()
|
||
args.add_argument("--repo", default=".", help="The directory of the Wesnoth repository to run this script against.")
|
||
args.add_argument("--output", default="output.csv", help="The file to write the results of this script to.")
|
||
args.add_argument("--input", default="copyrights.csv", help="The file to read the existing copyright data from.")
|
||
options = args.parse_args()
|
||
os.chdir(options.repo)
|
||
|
||
with contextlib.suppress(FileNotFoundError):
|
||
os.remove(options.output)
|
||
|
||
csv_data = {}
|
||
added = []
|
||
changed = []
|
||
unchanged = []
|
||
removed = []
|
||
# Sanity-check that the input is in the same order as the output would be
|
||
csvfile_needs_sorting = False
|
||
|
||
with open(options.input) as csvfile:
|
||
reader = csv.reader(csvfile)
|
||
previous_file = ""
|
||
for row in reader:
|
||
if row[0] == "Date":
|
||
continue
|
||
|
||
file = row[1]
|
||
if file < previous_file:
|
||
csvfile_needs_sorting = True
|
||
previous_file = file
|
||
|
||
if not os.path.exists(file):
|
||
removed.append(file)
|
||
continue
|
||
|
||
csv_data[file] = row
|
||
|
||
for root, _, files in os.walk(options.repo):
|
||
for filename in files:
|
||
filetype = Path(filename).suffix
|
||
if filetype == ".png" or filetype == ".jpg" or filetype == ".webp" or filetype == ".wav" or filetype == ".ogg":
|
||
file = os.path.normpath(os.path.join(root, filename))
|
||
hash = do_hash(file)
|
||
|
||
if not file in csv_data:
|
||
added.append(["", file, "", "", "", do_git(file), hash])
|
||
elif csv_data[file][6] != hash:
|
||
csv_data[file][5] = do_git(file)
|
||
csv_data[file][6] = hash
|
||
changed.append(csv_data[file])
|
||
else:
|
||
unchanged.append(csv_data[file])
|
||
|
||
added.sort(key=itemgetter(1))
|
||
changed.sort(key=itemgetter(1))
|
||
unchanged.sort(key=itemgetter(1))
|
||
|
||
final_output = added + changed + unchanged
|
||
|
||
if options.output != "":
|
||
with open(options.output, 'w') as f:
|
||
f.write("Date,File,License,Author - Real Name(other name);Real Name(other name);etc,Notes,Needs Update,MD5\n")
|
||
for row in final_output:
|
||
f.write(",".join(row)+"\n")
|
||
else:
|
||
for row in final_output:
|
||
print(",".join(row))
|
||
|
||
any_check_failed = False
|
||
if len(removed) > 0:
|
||
any_check_failed = True
|
||
print("There are "+str(len(removed))+" removed images")
|
||
|
||
if len(added) > 0 or len(changed) > 0:
|
||
any_check_failed = True
|
||
print("There are "+str(len(added))+" new images")
|
||
print("There are "+str(len(changed))+" changed images")
|
||
|
||
if csvfile_needs_sorting:
|
||
any_check_failed = True
|
||
print("The input file isn’t sorted by filename")
|
||
print(" Changed or newly added lines are put at the top for easy editing,\n but you should run the tool again after editing to sort the file.")
|
||
|
||
if any_check_failed:
|
||
sys.exit(1)
|