From 1f1a68e94e014b751e6bb085ea9e9ae6a2bbf4f9 Mon Sep 17 00:00:00 2001 From: "P. J. McDermott" Date: Mon, 5 Feb 2024 22:08:55 -0500 Subject: [PATCH] Check for and fix copyrights.csv rows with wrong numbers of fields This should prevent accidental uses of commas in fields without quotes. It would have caught the previous issue of str.join() not quoting fields that contain commas. For now though, it found a different issue: three rows added in commit c6313453143c had duplicated MD5 fields. --- copyrights.csv | 6 +++--- update_copyrights | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/copyrights.csv b/copyrights.csv index 319b929449e..5bf665ce8d7 100644 --- a/copyrights.csv +++ b/copyrights.csv @@ -3066,9 +3066,9 @@ Date,File,License,Author - Real Name(other name);Real Name(other name);etc,Notes 2015/02/20,data/core/images/help/l10n/es/hpxp.png,GNU GPL v2+,unknown,,,e8dbef425934eff5c9f734aabe960214 2015/02/20,data/core/images/help/l10n/es/recruit.png,GNU GPL v2+,unknown,,,d7f9b5862bc9465c4e1bc2513a00263f 2015/02/20,data/core/images/help/l10n/es/tooltip.png,GNU GPL v2+,unknown,,,f3721f44c30dfdb6f9c6086be30835c5 -2023/12/16,data/core/images/help/l10n/fi/hpxp.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,bab0586e45fcad729ee1b2b76cb5177c,bab0586e45fcad729ee1b2b76cb5177c -2023/12/16,data/core/images/help/l10n/fi/recruit.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,1b4e518b8a1829ecbd867ab71e6c54fb,1b4e518b8a1829ecbd867ab71e6c54fb -2023/12/16,data/core/images/help/l10n/fi/tooltip.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,5c7299de7c5f8cb200a921b521f2cc1b,5c7299de7c5f8cb200a921b521f2cc1b +2023/12/16,data/core/images/help/l10n/fi/hpxp.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,bab0586e45fcad729ee1b2b76cb5177c +2023/12/16,data/core/images/help/l10n/fi/recruit.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,1b4e518b8a1829ecbd867ab71e6c54fb +2023/12/16,data/core/images/help/l10n/fi/tooltip.png,CC BY-SA 4.0,Jaakko Saarikko (Styxnix),screenshot,,5c7299de7c5f8cb200a921b521f2cc1b 2015/02/20,data/core/images/help/l10n/fr/hpxp.png,GNU GPL v2+,unknown,,,3a123d59308bbedb3d6ee617ff77f8d1 2015/02/20,data/core/images/help/l10n/fr/recruit.png,GNU GPL v2+,unknown,,,137b03b4cd2569e00d9f7e70d87d6be5 2015/02/20,data/core/images/help/l10n/fr/tooltip.png,GNU GPL v2+,unknown,,,0dd5ff8f1536ce9dee90c6a7f26a0862 diff --git a/update_copyrights b/update_copyrights index 9d791ec761d..9d7dfe834a3 100755 --- a/update_copyrights +++ b/update_copyrights @@ -66,7 +66,13 @@ with contextlib.suppress(FileNotFoundError): os.remove(options.output) csv_data = {} +# Too few fields +missing_fields = [] +# Too many fields, possibly due to an unquoted comma +extra_fields = [] +# New images added = [] +# Changed images changed = [] # Already mentioned in the CSV file, but lacking something in either the license or author fields incomplete = [] @@ -106,6 +112,10 @@ for root, _, files in os.walk(options.repo): if not file in csv_data: added.append(["", file, "", "", "", do_git(file), hash]) + elif len(csv_data[file]) < 7: + missing_fields.append(csv_data[file]) + elif len(csv_data[file]) > 7: + extra_fields.append(csv_data[file]) elif csv_data[file][5] != "": update.append(csv_data[file]) elif csv_data[file][6] != hash: @@ -120,13 +130,15 @@ for root, _, files in os.walk(options.repo): else: unchanged.append(csv_data[file]) +missing_fields.sort(key=itemgetter(1)) +extra_fields.sort(key=itemgetter(1)) added.sort(key=itemgetter(1)) changed.sort(key=itemgetter(1)) incomplete.sort(key=itemgetter(1)) update.sort(key=itemgetter(1)) unchanged.sort(key=itemgetter(1)) -final_output = added + changed + incomplete + update + unchanged +final_output = missing_fields + extra_fields + added + changed + incomplete + update + unchanged if options.output != "": with open(options.output, 'w') as f: @@ -142,8 +154,10 @@ if len(removed) > 0: any_check_failed = True print("There are "+str(len(removed))+" removed images") -if len(added) > 0 or len(changed) > 0 or len(incomplete) > 0 or len(update) > 0: +if len(missing_fields) > 0 or len(extra_fields) > 0 or len(added) > 0 or len(changed) > 0 or len(incomplete) > 0 or len(update) > 0: any_check_failed = True + print("There are "+str(len(missing_fields))+" rows with too few fields") + print("There are "+str(len(extra_fields))+" rows with too many fields, possibly due to an unquoted comma") print("There are "+str(len(added))+" new images") print("There are "+str(len(changed))+" changed images") print("There are "+str(len(incomplete))+" images that lack license or author information")