zotero/scripts/generate-isbn-ranges.py

#!/usr/bin/env python3

import urllib.request, sys, argparse, os, json, re
import xml.etree.ElementTree as ET

# Command line args
parser = argparse.ArgumentParser()
parser.add_argument("out_file", help='File to output to.', nargs="?")
args = parser.parse_args();

# Fetch ranges
# See https://www.isbn-international.org/range_file_generation
url = 'https://www.isbn-international.org/export_rangemessage.xml'
print('Fetching XML file from ' + url, file=sys.stderr)
rangesRoot = ET.parse(urllib.request.urlopen(url))
print('Done', file=sys.stderr)

# Make sure we're always dealing with integers, so that nothing breaks in unexpected ways
intRE = re.compile(r'^\d+$');

# Parse them into JSON
uniqueRanges = {}
sameRanges = {} # To reduce redundancy, we can alias same ranges
for group in rangesRoot.iter('Group'):
	(uccPrefix, groupPrefix) = group.find('Prefix').text.split('-')
	
	if not intRE.match(uccPrefix) or not intRE.match(groupPrefix):
		print("WARNING: Unexpected prefixes: " + uccPrefix + " " + groupPrefix,  file=sys.stderr)
		continue
	
	ranges = []
	for rule in group.iter('Rule'):
		length = int(rule.find('Length').text)
		if length <= 0: # 0 length means that the range has not been assigned yet
			continue
		
		range = rule.find('Range').text.split('-')
		if not intRE.match(range[0]) or not intRE.match(range[1]):
			print("WARNING: Unexpected range: " + range[0] + " " + range[1],  file=sys.stderr)
			continue
		
		ranges.append(range[0][:length])
		ranges.append(range[1][:length])
	
	if len(ranges) == 0:
		continue
	
	# In case this is out of order in the XML file
	# Sort ranges by string length first, then by numeric value
	# 0 9 00 09 100 0005
	ranges.sort(key=lambda x: str(len(x)) + '-' + x)
	
	key = '.'.join(ranges)
	if key in sameRanges:
		sameRanges[key].append([uccPrefix, groupPrefix])
	else:
		if uccPrefix not in uniqueRanges:
			uniqueRanges[uccPrefix] = {}
		
		uniqueRanges[uccPrefix][groupPrefix] = ranges
		sameRanges[key] = [[uccPrefix, groupPrefix]]


# Output to file as JavaScript
file = """/** THIS FILE WAS GENERATED AUTOMATICALLY **/

/**
 * ISBN Registrant ranges from https://www.isbn-international.org/range_file_generation
**/
Zotero.ISBN = {};
Zotero.ISBN.ranges = (function() {
	var ranges = """

rangesJSON = json.dumps(uniqueRanges, separators=(',', ': '), indent="\t", sort_keys=True)
rangesJSON = re.sub(r'(?<= \[|\d"|",)\s+', '', rangesJSON) # Remove newlines in ranges array
file += '\n\t'.join(rangesJSON.split('\n')) # Add extra indent
file += ";\n\t\n\t"

# For same ranges, don't duplicate data, just re-assign it
dupes = []
for _, ranges in sameRanges.items():
	if len(ranges) == 1:
		continue # No duplicates
	
	last = ranges.pop(0) # First range actually contains the value that needs to get assigned, so it needs to end up last
	ranges.sort(key=lambda r: '.'.join(r)) # Try to keep the list stable to keep the diff reasonable
	ranges.append(last)
	
	dupes.append(' = '.join(map(lambda r: "ranges['" + "']['".join(r) + "']", ranges)))

#try to keeps this as stable as possible
dupes.sort()
file += ";\n\t".join(dupes) + ";"

file += """
	
	return ranges;
})();"""


if args.out_file is not None:
	# Try printing to file if one is provided
	print('Writing ranges to ' + args.out_file, file=sys.stderr)

	f = open(args.out_file, 'w')
	print(file, file=f)
else:
	# Print to stdout
	print(file)
Add zotero-build repo as `scripts` folder Minus obsolete 4.0 files 2023-04-23 08:37:35 +00:00			`#!/usr/bin/env python3`

			`import urllib.request, sys, argparse, os, json, re`
			`import xml.etree.ElementTree as ET`

			`# Command line args`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("out_file", help='File to output to.', nargs="?")`
			`args = parser.parse_args();`

			`# Fetch ranges`
			`# See https://www.isbn-international.org/range_file_generation`
			`url = 'https://www.isbn-international.org/export_rangemessage.xml'`
			`print('Fetching XML file from ' + url, file=sys.stderr)`
			`rangesRoot = ET.parse(urllib.request.urlopen(url))`
			`print('Done', file=sys.stderr)`

			`# Make sure we're always dealing with integers, so that nothing breaks in unexpected ways`
			`intRE = re.compile(r'^\d+$');`

			`# Parse them into JSON`
			`uniqueRanges = {}`
			`sameRanges = {} # To reduce redundancy, we can alias same ranges`
			`for group in rangesRoot.iter('Group'):`
			`(uccPrefix, groupPrefix) = group.find('Prefix').text.split('-')`

			`if not intRE.match(uccPrefix) or not intRE.match(groupPrefix):`
			`print("WARNING: Unexpected prefixes: " + uccPrefix + " " + groupPrefix, file=sys.stderr)`
			`continue`

			`ranges = []`
			`for rule in group.iter('Rule'):`
			`length = int(rule.find('Length').text)`
			`if length <= 0: # 0 length means that the range has not been assigned yet`
			`continue`

			`range = rule.find('Range').text.split('-')`
			`if not intRE.match(range[0]) or not intRE.match(range[1]):`
			`print("WARNING: Unexpected range: " + range[0] + " " + range[1], file=sys.stderr)`
			`continue`

			`ranges.append(range[0][:length])`
			`ranges.append(range[1][:length])`

			`if len(ranges) == 0:`
			`continue`

			`# In case this is out of order in the XML file`
			`# Sort ranges by string length first, then by numeric value`
			`# 0 9 00 09 100 0005`
			`ranges.sort(key=lambda x: str(len(x)) + '-' + x)`

			`key = '.'.join(ranges)`
			`if key in sameRanges:`
			`sameRanges[key].append([uccPrefix, groupPrefix])`
			`else:`
			`if uccPrefix not in uniqueRanges:`
			`uniqueRanges[uccPrefix] = {}`

			`uniqueRanges[uccPrefix][groupPrefix] = ranges`
			`sameRanges[key] = [[uccPrefix, groupPrefix]]`


			`# Output to file as JavaScript`
			`file = """/ THIS FILE WAS GENERATED AUTOMATICALLY /`

			`/**`
			`* ISBN Registrant ranges from https://www.isbn-international.org/range_file_generation`
			`**/`
			`Zotero.ISBN = {};`
			`Zotero.ISBN.ranges = (function() {`
			`var ranges = """`

			`rangesJSON = json.dumps(uniqueRanges, separators=(',', ': '), indent="\t", sort_keys=True)`
			`rangesJSON = re.sub(r'(?<= \[\|\d"\|",)\s+', '', rangesJSON) # Remove newlines in ranges array`
			`file += '\n\t'.join(rangesJSON.split('\n')) # Add extra indent`
			`file += ";\n\t\n\t"`

			`# For same ranges, don't duplicate data, just re-assign it`
			`dupes = []`
			`for _, ranges in sameRanges.items():`
			`if len(ranges) == 1:`
			`continue # No duplicates`

			`last = ranges.pop(0) # First range actually contains the value that needs to get assigned, so it needs to end up last`
			`ranges.sort(key=lambda r: '.'.join(r)) # Try to keep the list stable to keep the diff reasonable`
			`ranges.append(last)`

			`dupes.append(' = '.join(map(lambda r: "ranges['" + "']['".join(r) + "']", ranges)))`

			`#try to keeps this as stable as possible`
			`dupes.sort()`
			`file += ";\n\t".join(dupes) + ";"`

			`file += """`

			`return ranges;`
			`})();"""`


			`if args.out_file is not None:`
			`# Try printing to file if one is provided`
			`print('Writing ranges to ' + args.out_file, file=sys.stderr)`

			`f = open(args.out_file, 'w')`
			`print(file, file=f)`
			`else:`
			`# Print to stdout`
			`print(file)`