zotero/scripts/locale/localizer

335 lines
8.3 KiB
Text
Raw Normal View History

#!/usr/bin/env php
<?php
error_reporting(E_ALL);
function error_handler($errno, $errstr, $errfile, $errline) {
echo "$errstr at $errfile:$errline\n";
exit(1);
}
set_error_handler('error_handler');
$english_path = 'en-US-new'; // New English locale
$locale_path = 'locales'; // Source locales from run script
$content_locale_path = 'content-locales'; // Existing content locales from previous version
$output_dir = 'output/locale'; // Directory to save merged locales
$content_output_dir = 'output/content'; // Directory to save merged content locales
$use_content_input_dir = false;
$use_content_output_dir = true; // set to true for XPI, false for BZ
$localeCodeInOutputXML = true; // set to true for XPI, false for BZ
$locale_re = '/([a-z]{2})(\-[A-Z]{2})?/';
$locale_file_re = '/^[a-z].+\.(dtd|properties)$/';
// Zotero files
$english_files = array_filter(
scandir($english_path),
fn ($path) => preg_match($locale_file_re, $path)
);
// Mozilla files
$english_mozilla_files = array_filter(
scandir($english_path . "/mozilla"),
fn ($path) => preg_match($locale_file_re, $path)
);
$english_mozilla_files = array_map(fn ($path) => 'mozilla/' . $path, $english_mozilla_files);
$all_english_files = array_merge($english_files, $english_mozilla_files);
// Make output directories for each locale
$locales = array_filter(scandir($locale_path), fn ($path) => preg_match($locale_re, $path));
foreach ($locales as $locale) {
preg_match($locale_re, $locale, $matches);
if (!$matches) {
continue;
}
$dir = $output_dir . '/' . $locale . '/zotero/';
@mkdir($dir, 0775, true);
}
// Make content output directory for CSL files
if ($use_content_output_dir) {
@mkdir("$content_output_dir/csl/", 0775, true);
}
foreach ($all_english_files as $file) {
echo "Processing $file\n";
$extension = substr(strrchr($file, '.'), 1);
foreach ($locales as $locale) {
preg_match('/([a-z]{2})(\-[A-Z]{2})?/', $locale, $matches);
if (!isset($matches[1])) {
continue;
}
$locale = $matches[1];
if (!empty($matches[2])) {
$locale .= $matches[2];
}
$locale_source_file = "$locale_path/$locale/zotero/$file";
$output_locale_dir = "$output_dir/$locale/zotero/";
if (!file_exists($output_locale_dir . '/mozilla')) {
mkdir($output_locale_dir . '/mozilla', 0755);
}
if ($file == 'locales.xml') {
if (strlen($locale) == 2) {
$csl_locale = $locale . '-' . strtoupper($locale);
}
else {
$csl_locale = $locale;
}
if ($use_content_input_dir) {
$locale_source_file = "$content_locale_path/csl/locales-$csl_locale.xml";
}
if ($use_content_output_dir) {
$output_locale_dir = "$content_output_dir/csl/";
}
if ($localeCodeInOutputXML) {
$save_file = "locales-$csl_locale.xml";
}
else {
$save_file = "locales.xml";
}
echo "Saving {$output_locale_dir}{$save_file}\n";
$string = generate_csl_locale(
"$english_path/$file",
$locale_source_file,
$locale
);
}
else {
echo "Saving {$output_locale_dir}{$file}\n";
$save_file = $file;
$string = generate_locale(
$extension,
"$english_path/$file",
$locale_source_file,
$locale,
// Preserve whitespace in Mozilla files
str_contains($file, 'mozilla')
);
}
// We can't handle this file, so bail
if (!$string) {
throw new Exception("Error generating file");
}
file_put_contents($output_locale_dir . $save_file, $string);
}
}
function parse_strings($type, $file, $locale = null) {
if (!file_exists($file)) {
$msg = "Source file $file doesn't exist";
if ($locale) {
if (strpos($file, 'updates.dtd') !== false) {
$missing_locales = ['br', 'en-GB', 'gl-ES', 'hu-HU', 'mn-MN'];
if (in_array($locale, $missing_locales)) {
echo $msg . "\n";
return [];
}
}
}
throw new Exception($msg);
}
$pairs = array();
switch ($type) {
case 'dtd':
// 1: space after ENTITY
// 2: key
// 3: space after key
// 4: string
// 5: space before >
// 6: newlines
$regex = '/<!ENTITY(\s*)([^\s]*)(\s*)"([^"]*)"(\s*)> *(\n*)(\n|$)/s';
break;
case 'properties':
$regex = '/([^\s]*)\s*= *([^\n]*)(\s*)(\n|$)/s';
break;
default:
throw new Exception("Unsupported extension .$type");
}
preg_match_all($regex, file_get_contents($file), $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
if ($type == 'dtd') {
$pairs[$match[2]] = $match;
}
else if ($type == 'properties') {
$pairs[$match[1]] = $match;
}
else {
throw new Exception("Unsupported");
}
}
return $pairs;
}
function generate_locale($type, $english_file, $locale_file, $locale, $preserveWhitespace = false) {
$output = '';
// Keep copyright block at top of Mozilla files
preg_match('/^(<!--.+?-->|# This Source Code.+?2.0\/\.)\s+/s', file_get_contents($locale_file), $matches);
if ($matches) {
$output .= $matches[0];
}
$english_pairs = parse_strings($type, $english_file);
if (!$english_pairs) {
throw new Exception("No English pairs!");
}
$locale_pairs = parse_strings($type, $locale_file, $locale);
foreach ($english_pairs as $key => $val) {
if (!$val) {
if ($output != '') {
$output .= "\n";
}
continue;
}
$english_val = $english_pairs[$key];
$locale_val = empty($locale_pairs[$key]) ? $english_val : $locale_pairs[$key];
switch ($type) {
case 'dtd':
// Don't replace string with space, only truly empty string, or else we mess up
// zotero.merge.of in Estonian, which apparently doesn't exist
if (empty($locale_val[4])) {
$locale_val = $english_val;
}
// Keep spacing between components
if ($preserveWhitespace) {
$prefix = '<!ENTITY' . $locale_val[1];
$middle = $locale_val[3];
$string = '"' . $locale_val[4] . '"';
$suffix = $locale_val[5] . ">" . $english_val[6];
}
else {
$prefix = '<!ENTITY ';
$middle = " ";
$string = '"' . $locale_val[4];
$suffix = '">' . $english_val[6];
}
break;
case 'properties':
// If empty value, use English
if (empty(trim($locale_val[2]))) {
$locale_val = $english_val;
}
$prefix = '';
$middle = '=';
$string = $locale_val[2];
$suffix = $english_val[3];
break;
default:
echo "Unsupported extension $type\n";
return false;
}
$output .= $prefix . $key . $middle . $string . $suffix . "\n";
}
return trim($output) . "\n";
}
function generate_csl_locale($english_file, $locale_file, $locale) {
$output = '';
$english_str = file_get_contents($english_file);
$english_sxe = new SimpleXMLElement($english_str);
$str = file_get_contents($locale_file);
if (!$str) {
echo "Locale version of locales.xml not found\n";
return $english_str;
}
$locale_sxe = new SimpleXMLElement($str);
$xw = new XMLWriter();
$xw->openMemory();
$xw->startDocument('1.0', 'UTF-8');
$xw->startElement ('terms');
$xw->writeAttribute('xmlns', 'http://purl.org/net/xbiblio/csl');
$xw->startElement('locale');
$xw->writeAttribute('xml:lang', substr($locale, 0, 2));
$locale_sxe->registerXPathNamespace('csl', 'http://purl.org/net/xbiblio/csl');
foreach ($english_sxe->locale->term as $term) {
$name = $term->attributes()->name;
$form = $term->attributes()->form;
if ($form) {
$node = $locale_sxe->xpath("//csl:term[@name='$name' and @form='$form']");
}
else {
$node = $locale_sxe->xpath("//csl:term[@name='$name' and not(@form)]");
}
if (isset($node[0])) {
$node = $node[0];
}
else {
$node = $term;
}
$xw->startElement('term');
$xw->writeAttribute('name', $name);
if ($form) {
$xw->writeAttribute('form', $form);
}
if (sizeOf($term->children()) > 0) {
$xw->startElement('single');
$xw->text($node->single ? $node->single : $term->single);
$xw->endElement();
$xw->startElement('multiple');
$xw->text($node->multiple ? $node->multiple : $term->multiple);
$xw->endElement();
}
else {
// If original had children and we don't any longer, use English
if (sizeOf($node[0]->children()) > 0) {
$xw->text($term);
}
// Otherwise use the locale string
else {
$xw->text($node[0]);
}
}
$xw->endElement(); // </term>
}
$xw->endElement(); // </locale>
$xw->endElement(); // </terms>
$str = $xw->outputMemory(true);
$doc = new DOMDocument('1.0');
$doc->formatOutput = true;
$doc->loadXML($str);
return $doc->saveXML();
}
?>