Various fixes to locale processing script

- Properly handle regular vs. Mozilla locale files
- Avoid whitespaces changes as much as possible
- Keep copyright block at top of all Mozilla files
- Bug fixes
This commit is contained in:
Dan Stillman 2023-05-10 06:20:06 -04:00
parent 08a508b750
commit 27ccff636b

View file

@ -17,7 +17,7 @@ $use_content_output_dir = true; // set to true for XPI, false for BZ
$localeCodeInOutputXML = true; // set to true for XPI, false for BZ $localeCodeInOutputXML = true; // set to true for XPI, false for BZ
$locale_re = '/([a-z]{2})(\-[A-Z]{2})?/'; $locale_re = '/([a-z]{2})(\-[A-Z]{2})?/';
$locale_file_re = '/^[a-z].+\.(dtd|props)$/'; $locale_file_re = '/^[a-z].+\.(dtd|properties)$/';
// Zotero files // Zotero files
$english_files = array_filter( $english_files = array_filter(
@ -110,7 +110,9 @@ foreach ($all_english_files as $file) {
$extension, $extension,
"$english_path/$file", "$english_path/$file",
$locale_source_file, $locale_source_file,
$locale $locale,
// Preserve whitespace in Mozilla files
str_contains($file, 'mozilla')
); );
} }
@ -147,12 +149,13 @@ function parse_strings($type, $file, $locale = null) {
// 2: key // 2: key
// 3: space after key // 3: space after key
// 4: string // 4: string
// 5: space after "> // 5: space before >
$regex = '|<!ENTITY(\s*)([^\s]*)(\s*)"([^"]*)"(\s*>\s*)|s'; // 6: newlines
$regex = '/<!ENTITY(\s*)([^\s]*)(\s*)"([^"]*)"(\s*)> *(\n*)(\n|$)/s';
break; break;
case 'properties': case 'properties':
$regex = '|^(?:#\s*)?([^\s]*)\s*=\s*(.*)$|'; $regex = '/([^\s]*)\s*= *([^\n]*)(\s*)(\n|$)/s';
break; break;
default: default:
@ -164,8 +167,11 @@ function parse_strings($type, $file, $locale = null) {
if ($type == 'dtd') { if ($type == 'dtd') {
$pairs[$match[2]] = $match; $pairs[$match[2]] = $match;
} }
else if ($type == 'properties') {
$pairs[$match[1]] = $match;
}
else { else {
$pairs[$match[1]] = $match[2]; throw new Exception("Unsupported");
} }
} }
@ -174,11 +180,11 @@ function parse_strings($type, $file, $locale = null) {
function generate_locale($type, $english_file, $locale_file, $locale) { function generate_locale($type, $english_file, $locale_file, $locale, $preserveWhitespace = false) {
$output = ''; $output = '';
// Keep copyright block at top of Mozilla files // Keep copyright block at top of Mozilla files
preg_match('/<!--.+?-->\s+/s', file_get_contents($locale_file), $matches); preg_match('/^(<!--.+?-->|# This Source Code.+?2.0\/\.)\s+/s', file_get_contents($locale_file), $matches);
if ($matches) { if ($matches) {
$output .= $matches[0]; $output .= $matches[0];
} }
@ -198,21 +204,42 @@ function generate_locale($type, $english_file, $locale_file, $locale) {
continue; continue;
} }
$source_val = empty($locale_pairs[$key]) ? $english_pairs[$key] : $locale_pairs[$key]; $english_val = $english_pairs[$key];
$locale_val = empty($locale_pairs[$key]) ? $english_val : $locale_pairs[$key];
switch ($type) { switch ($type) {
case 'dtd': case 'dtd':
$prefix = '<!ENTITY' . $source_val[1]; // Don't replace string with space, only truly empty string, or else we mess up
$middle = $source_val[3]; // zotero.merge.of in Estonian, which apparently doesn't exist
$string = '"' . $source_val[4] . '"'; if (empty($locale_val[4])) {
$suffix = $source_val[5]; $locale_val = $english_val;
}
// Keep spacing between components
if ($preserveWhitespace) {
$prefix = '<!ENTITY' . $locale_val[1];
$middle = $locale_val[3];
$string = '"' . $locale_val[4] . '"';
$suffix = $locale_val[5] . ">" . $english_val[6];
}
else {
$prefix = '<!ENTITY ';
$middle = " ";
$string = '"' . $locale_val[4];
$suffix = '">' . $english_val[6];
}
break; break;
case 'properties': case 'properties':
// If empty value, use English
if (empty(trim($locale_val[2]))) {
$locale_val = $english_val;
}
$prefix = ''; $prefix = '';
$middle = '='; $middle = '=';
$string = $source_val; $string = $locale_val[2];
$suffix = '\n'; $suffix = $english_val[3];
break; break;
default: default:
@ -220,10 +247,10 @@ function generate_locale($type, $english_file, $locale_file, $locale) {
return false; return false;
} }
$output .= $prefix . $key . $middle . $string . $suffix; $output .= $prefix . $key . $middle . $string . $suffix . "\n";
} }
return $output; return trim($output) . "\n";
} }