wesnoth/update_copyrights
P. J. McDermott 4c87a4be74 Fix license on LordBob's portraits, and check licenses
Commit 97c8feb8ca3b (pull #7903) specified for 58 of LordBob's portraits
a license of "GNU GPL v2+;CC BY-SA 4.0".  I don't know if the semicolon
is supposed to mean "and" or "or".

"And" (e.g. a GNU GPL v2+ file with CC BY-SA 4.0 modifications) isn't
legally possible.  "Or" appears incorrect, because LordBob [licensed][1]
his portraits under "the GNU GPL" and I can't find any evidence of him
also licensing them under CC BY-SA 4.0.

Also make update_copyrights check for possibly invalid licenses like
"GNU GPL v2+;CC BY-SA 4.0".

[1]: https://forums.wesnoth.org/viewtopic.php?p=329342#p329342
2024-01-30 21:20:10 -06:00

168 lines
5.4 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# encoding: utf-8
##
# This script checks all media files in the repository for whether they've been modified or added without updating the file tracking image, sound, and music copyright
##
import argparse
import contextlib
import csv
import hashlib
from operator import itemgetter
import os
from pathlib import Path
from subprocess import check_output
import sys
##
# csv file layout:
# [0] = current git commit date
# [1] = file path, relative to the repository root
# [2] = license name(s)
# [3] = authorship information
# [4] = notes
# [5] = new git commit date, if different from the value in [0]
# [6] = current md5 hash
##
##
# Add new licenses to this list:
# Avoid things like "GNU GPL v2+;CC BY-SA 4.0", unless you mean to dual license
# under either GNU GPL v2+ or CC BY-SA 4.0. GNU GPL v2+ and CC BY-SA 4.0 (e.g.
# a GNU GPL v2+ file with CC BY-SA 4.0 modifications) isn't legally possible.
##
known_licenses = (
"CC BY-SA 4.0",
"CC0",
"GNU GPL v2+",
)
def do_git(file):
return str(check_output(["git", "log", "-1", "--format=%ad", "--date=format:%Y/%m/%d", file]), 'UTF-8').rstrip('\n')
def do_hash(file):
md5 = hashlib.md5()
with open(file, 'rb') as f:
while True:
data = f.read(65536)
if not data:
break
md5.update(data)
return str(md5.hexdigest())
##
# program logic start
##
args = argparse.ArgumentParser()
args.add_argument("--repo", default=".", help="The directory of the Wesnoth repository to run this script against.")
args.add_argument("--output", default="output.csv", help="The file to write the results of this script to.")
args.add_argument("--input", default="copyrights.csv", help="The file to read the existing copyright data from.")
options = args.parse_args()
os.chdir(options.repo)
with contextlib.suppress(FileNotFoundError):
os.remove(options.output)
csv_data = {}
added = []
changed = []
# Already mentioned in the CSV file, but lacking something in either the license or author fields
incomplete = []
# Already mentioned in the CSV file, but have something in the needs update field
update = []
unchanged = []
removed = []
# Sanity-check that the input is in the same order as the output would be
csvfile_needs_sorting = False
# Sanity-check for known licenses
unknown_licenses = []
with open(options.input) as csvfile:
reader = csv.reader(csvfile)
previous_file = ""
for row in reader:
if row[0] == "Date":
continue
file = row[1]
if file < previous_file:
csvfile_needs_sorting = True
previous_file = file
if not os.path.exists(file):
removed.append(file)
continue
csv_data[file] = row
for root, _, files in os.walk(options.repo):
for filename in files:
filetype = Path(filename).suffix
if filetype == ".png" or filetype == ".jpg" or filetype == ".webp" or filetype == ".wav" or filetype == ".ogg":
file = os.path.normpath(os.path.join(root, filename))
hash = do_hash(file)
if not file in csv_data:
added.append(["", file, "", "", "", do_git(file), hash])
elif csv_data[file][5] != "":
update.append(csv_data[file])
elif csv_data[file][6] != hash:
csv_data[file][5] = do_git(file)
csv_data[file][6] = hash
changed.append(csv_data[file])
elif csv_data[file][2].strip() == "" or csv_data[file][3].strip() == "":
incomplete.append(csv_data[file])
elif not csv_data[file][2] in known_licenses:
unknown_licenses.append(csv_data[file][2])
incomplete.append(csv_data[file])
else:
unchanged.append(csv_data[file])
added.sort(key=itemgetter(1))
changed.sort(key=itemgetter(1))
incomplete.sort(key=itemgetter(1))
update.sort(key=itemgetter(1))
unchanged.sort(key=itemgetter(1))
final_output = added + changed + incomplete + update + unchanged
if options.output != "":
with open(options.output, 'w') as f:
f.write("Date,File,License,Author - Real Name(other name);Real Name(other name);etc,Notes,Needs Update,MD5\n")
for row in final_output:
f.write(",".join(row)+"\n")
else:
for row in final_output:
print(",".join(row))
any_check_failed = False
if len(removed) > 0:
any_check_failed = True
print("There are "+str(len(removed))+" removed images")
if len(added) > 0 or len(changed) > 0 or len(incomplete) > 0 or len(update) > 0:
any_check_failed = True
print("There are "+str(len(added))+" new images")
print("There are "+str(len(changed))+" changed images")
print("There are "+str(len(incomplete))+" images that lack license or author information")
print("There are "+str(len(update))+" images that need updated information")
if options.output != "":
print("These are at the top of the output for easy editing, run the tool again after editing to sort the file")
if csvfile_needs_sorting:
any_check_failed = True
print("The input file isnt sorted by filename")
if options.output != "" and final_output == unchanged:
print("The new output file is correctly sorted")
if len(unknown_licenses) > 0:
any_check_failed = True
print("Unknown licenses:")
print(" " + "\n ".join(unknown_licenses))
if any_check_failed:
sys.exit(1)