ayaports/backports/postgresql15/icu-collations-hack.patch

893 lines
18 KiB
Diff

From: Jakub Jirutka <jakub@jirutka.cz>
Date: Wed, 03 Aug 2022 20:40:33 +0200
Subject: [PATCH] Hack to generate usable ICU-based collations with
icu-data-en
This is a downstream patch for Alpine Linux, it should never be
upstreamed in this form!
When the PostgreSQL cluster is initialized (using initdb(1)) or the
DB administrator calls `pg_import_system_collations()` directly, this
function creates COLLATIONs in the system catalog (pg_collations).
There are two types: libc-based and ICU-based. The latter are created
based on *locales* (not collations) known to ICU, i.e. based on the ICU
data installed at the time.
collationcmds.c includes the following comment:
> We use uloc_countAvailable()/uloc_getAvailable() rather than
> ucol_countAvailable()/ucol_getAvailable(). The former returns a full
> set of language+region combinations, whereas the latter only returns
> language+region combinations if they are distinct from the language's
> base collation. So there might not be a de-DE or en-GB, which would be
> confusing.
There's a problem with this approach: locales and collations are two
different things. ICU data may include collation algorithms and data for
all or some languages, but not locales (language + country/region).
The collation data is small compared to locales. There are ~800 locales
(combinations of language, country and variants), but only 98 collations.
There's a mapping between collations and locales hidden somewhere in ICU
data.
Since full ICU data is very big (30 MiB), we have created a stripped down
variant with only English locale (package icu-data-en, 2.6 MiB). It also
includes a subset of 18 collations that cover hundreds of languages.
When the cluster is initialized or `pg_import_system_collations()` is
called directly and only icu-data-en (default) is installed, the user
ends up with only und, en and en_GB ICU-based COLLATIONs. The user can
create missing COLLATIONs manually, but this a) is not expected nor
reasonable behaviour, b) it's not easy to find out for which locales
there's a collation available for.
I couldn't find any way how to list all language+country variants for the
given collation. It can be constructed when we iterate over all locales,
but this approach is useless when we don't have the locale data
available... I should also note that the reverse lookup (locale ->
collation) is not a problem for ICU when full locale data is stripped.
So I ended up with a very ugly workaround: pre-generating a list of
collation -> locale mapping and embedding it in the collationcmds.c
source. Then we replace `uloc_countAvailable()`/`uloc_getAvailable()`
with `ucol_countAvailable()` / `ucol_getAvailable()` to iterate over
the collations instead of locales and lookup the locales in the
pre-generated list.
This data is quite stable, there's a very low risk of getting outdated in
a way that would be a problem.
`icu_coll_locales` has been generated using the following code:
#include <stdio.h>
#include <string.h>
#include <unicode/ucol.h>
// Copy-pasted from collationcmds.c.
static char *get_icu_language_tag(const char *localename) {
char buf[ULOC_FULLNAME_CAPACITY];
UErrorCode status = U_ZERO_ERROR;
uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status);
if (U_FAILURE(status)) {
fprintf(stderr, "could not convert locale name \"%s\" to language tag: %s\n",
localename, u_errorName(status));
return strdup(localename);
}
return strdup(buf);
}
int main() {
UErrorCode status = U_ZERO_ERROR;
for (int i = 0; i < uloc_countAvailable(); i++) {
const char *locale = uloc_getAvailable(i);
UCollator *collator = ucol_open(locale, &status);
const char *actual_locale = ucol_getLocaleByType(collator, ULOC_ACTUAL_LOCALE, &status);
// Strip @.*
char *ptr = strchr(actual_locale, '@');
if (ptr != NULL) {
*ptr = '\0';
}
if (strcmp(actual_locale, "root") == 0) {
actual_locale = "";
}
if (strcmp(actual_locale, locale) != 0) {
printf("\"%s\", \"%s\",\n", actual_locale, get_icu_language_tag(locale));
}
ucol_close(collator);
}
return 0;
}
compiled and executed using:
gcc -o main main.c $(pkg-config --libs icu-uc icu-io) && ./main | sort | uniq
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -572,6 +572,715 @@
return result;
}
+
+/*
+ * XXX-Patched: Added a static mapping: collation name (parent) to locale (children)
+ * I'm gonna burn in hell for this...
+ */
+static char* icu_coll_locales[] = {
+ "", "agq",
+ "", "agq-CM",
+ "", "ak",
+ "", "ak-GH",
+ "", "asa",
+ "", "asa-TZ",
+ "", "ast",
+ "", "ast-ES",
+ "", "bas",
+ "", "bas-CM",
+ "", "bem",
+ "", "bem-ZM",
+ "", "bez",
+ "", "bez-TZ",
+ "", "bm",
+ "", "bm-ML",
+ "", "brx",
+ "", "brx-IN",
+ "", "ca",
+ "", "ca-AD",
+ "", "ca-ES",
+ "", "ca-FR",
+ "", "ca-IT",
+ "", "ccp",
+ "", "ccp-BD",
+ "", "ccp-IN",
+ "", "ce",
+ "", "ce-RU",
+ "", "cgg",
+ "", "cgg-UG",
+ "", "ckb",
+ "", "ckb-IQ",
+ "", "ckb-IR",
+ "", "dav",
+ "", "dav-KE",
+ "", "de",
+ "", "de-AT",
+ "", "de-BE",
+ "", "de-CH",
+ "", "de-DE",
+ "", "de-IT",
+ "", "de-LI",
+ "", "de-LU",
+ "", "dje",
+ "", "dje-NE",
+ "", "doi",
+ "", "doi-IN",
+ "", "dua",
+ "", "dua-CM",
+ "", "dyo",
+ "", "dyo-SN",
+ "", "dz",
+ "", "dz-BT",
+ "", "ebu",
+ "", "ebu-KE",
+ "", "en",
+ "", "en-001",
+ "", "en-150",
+ "", "en-AE",
+ "", "en-AG",
+ "", "en-AI",
+ "", "en-AS",
+ "", "en-AT",
+ "", "en-AU",
+ "", "en-BB",
+ "", "en-BE",
+ "", "en-BI",
+ "", "en-BM",
+ "", "en-BS",
+ "", "en-BW",
+ "", "en-BZ",
+ "", "en-CA",
+ "", "en-CC",
+ "", "en-CH",
+ "", "en-CK",
+ "", "en-CM",
+ "", "en-CX",
+ "", "en-CY",
+ "", "en-DE",
+ "", "en-DG",
+ "", "en-DK",
+ "", "en-DM",
+ "", "en-ER",
+ "", "en-FI",
+ "", "en-FJ",
+ "", "en-FK",
+ "", "en-FM",
+ "", "en-GB",
+ "", "en-GD",
+ "", "en-GG",
+ "", "en-GH",
+ "", "en-GI",
+ "", "en-GM",
+ "", "en-GU",
+ "", "en-GY",
+ "", "en-HK",
+ "", "en-IE",
+ "", "en-IL",
+ "", "en-IM",
+ "", "en-IN",
+ "", "en-IO",
+ "", "en-JE",
+ "", "en-JM",
+ "", "en-KE",
+ "", "en-KI",
+ "", "en-KN",
+ "", "en-KY",
+ "", "en-LC",
+ "", "en-LR",
+ "", "en-LS",
+ "", "en-MG",
+ "", "en-MH",
+ "", "en-MO",
+ "", "en-MP",
+ "", "en-MS",
+ "", "en-MT",
+ "", "en-MU",
+ "", "en-MV",
+ "", "en-MW",
+ "", "en-MY",
+ "", "en-NA",
+ "", "en-NF",
+ "", "en-NG",
+ "", "en-NL",
+ "", "en-NR",
+ "", "en-NU",
+ "", "en-NZ",
+ "", "en-PG",
+ "", "en-PH",
+ "", "en-PK",
+ "", "en-PN",
+ "", "en-PR",
+ "", "en-PW",
+ "", "en-RW",
+ "", "en-SB",
+ "", "en-SC",
+ "", "en-SD",
+ "", "en-SE",
+ "", "en-SG",
+ "", "en-SH",
+ "", "en-SI",
+ "", "en-SL",
+ "", "en-SS",
+ "", "en-SX",
+ "", "en-SZ",
+ "", "en-TC",
+ "", "en-TK",
+ "", "en-TO",
+ "", "en-TT",
+ "", "en-TV",
+ "", "en-TZ",
+ "", "en-UG",
+ "", "en-UM",
+ "", "en-US",
+ "", "en-VC",
+ "", "en-VG",
+ "", "en-VI",
+ "", "en-VU",
+ "", "en-WS",
+ "", "en-ZA",
+ "", "en-ZM",
+ "", "en-ZW",
+ "", "eu",
+ "", "eu-ES",
+ "", "ewo",
+ "", "ewo-CM",
+ "", "ff",
+ "", "ff-Latn",
+ "", "ff-Latn-BF",
+ "", "ff-Latn-CM",
+ "", "ff-Latn-GH",
+ "", "ff-Latn-GM",
+ "", "ff-Latn-GN",
+ "", "ff-Latn-GW",
+ "", "ff-Latn-LR",
+ "", "ff-Latn-MR",
+ "", "ff-Latn-NE",
+ "", "ff-Latn-NG",
+ "", "ff-Latn-SL",
+ "", "ff-Latn-SN",
+ "", "fr",
+ "", "fr-BE",
+ "", "fr-BF",
+ "", "fr-BI",
+ "", "fr-BJ",
+ "", "fr-BL",
+ "", "fr-CD",
+ "", "fr-CF",
+ "", "fr-CG",
+ "", "fr-CH",
+ "", "fr-CI",
+ "", "fr-CM",
+ "", "fr-DJ",
+ "", "fr-DZ",
+ "", "fr-FR",
+ "", "fr-GA",
+ "", "fr-GF",
+ "", "fr-GN",
+ "", "fr-GP",
+ "", "fr-GQ",
+ "", "fr-HT",
+ "", "fr-KM",
+ "", "fr-LU",
+ "", "fr-MA",
+ "", "fr-MC",
+ "", "fr-MF",
+ "", "fr-MG",
+ "", "fr-ML",
+ "", "fr-MQ",
+ "", "fr-MR",
+ "", "fr-MU",
+ "", "fr-NC",
+ "", "fr-NE",
+ "", "fr-PF",
+ "", "fr-PM",
+ "", "fr-RE",
+ "", "fr-RW",
+ "", "fr-SC",
+ "", "fr-SN",
+ "", "fr-SY",
+ "", "fr-TD",
+ "", "fr-TG",
+ "", "fr-TN",
+ "", "fr-VU",
+ "", "fr-WF",
+ "", "fr-YT",
+ "", "fur",
+ "", "fur-IT",
+ "", "fy",
+ "", "fy-NL",
+ "", "ga",
+ "", "ga-GB",
+ "", "ga-IE",
+ "", "gd",
+ "", "gd-GB",
+ "", "gsw",
+ "", "gsw-CH",
+ "", "gsw-FR",
+ "", "gsw-LI",
+ "", "guz",
+ "", "guz-KE",
+ "", "gv",
+ "", "gv-IM",
+ "", "ia",
+ "", "ia-001",
+ "", "id",
+ "", "id-ID",
+ "", "ii",
+ "", "ii-CN",
+ "", "it",
+ "", "it-CH",
+ "", "it-IT",
+ "", "it-SM",
+ "", "it-VA",
+ "", "jgo",
+ "", "jgo-CM",
+ "", "jmc",
+ "", "jmc-TZ",
+ "", "jv",
+ "", "jv-ID",
+ "", "kab",
+ "", "kab-DZ",
+ "", "kam",
+ "", "kam-KE",
+ "", "kde",
+ "", "kde-TZ",
+ "", "kea",
+ "", "kea-CV",
+ "", "kgp",
+ "", "kgp-BR",
+ "", "khq",
+ "", "khq-ML",
+ "", "ki",
+ "", "ki-KE",
+ "", "kkj",
+ "", "kkj-CM",
+ "", "kln",
+ "", "kln-KE",
+ "", "ks",
+ "", "ks-Arab",
+ "", "ks-Arab-IN",
+ "", "ks-Deva",
+ "", "ks-Deva-IN",
+ "", "ksb",
+ "", "ksb-TZ",
+ "", "ksf",
+ "", "ksf-CM",
+ "", "ksh",
+ "", "ksh-DE",
+ "", "kw",
+ "", "kw-GB",
+ "", "lag",
+ "", "lag-TZ",
+ "", "lb",
+ "", "lb-LU",
+ "", "lg",
+ "", "lg-UG",
+ "", "lrc",
+ "", "lrc-IQ",
+ "", "lrc-IR",
+ "", "lu",
+ "", "lu-CD",
+ "", "luo",
+ "", "luo-KE",
+ "", "luy",
+ "", "luy-KE",
+ "", "mai",
+ "", "mai-IN",
+ "", "mas",
+ "", "mas-KE",
+ "", "mas-TZ",
+ "", "mer",
+ "", "mer-KE",
+ "", "mfe",
+ "", "mfe-MU",
+ "", "mg",
+ "", "mg-MG",
+ "", "mgh",
+ "", "mgh-MZ",
+ "", "mgo",
+ "", "mgo-CM",
+ "", "mi",
+ "", "mi-NZ",
+ "", "mni",
+ "", "mni-Beng",
+ "", "mni-Beng-IN",
+ "", "ms",
+ "", "ms-BN",
+ "", "ms-ID",
+ "", "ms-MY",
+ "", "ms-SG",
+ "", "mua",
+ "", "mua-CM",
+ "", "mzn",
+ "", "mzn-IR",
+ "", "naq",
+ "", "naq-NA",
+ "", "nd",
+ "", "nd-ZW",
+ "", "nl",
+ "", "nl-AW",
+ "", "nl-BE",
+ "", "nl-BQ",
+ "", "nl-CW",
+ "", "nl-NL",
+ "", "nl-SR",
+ "", "nl-SX",
+ "", "nmg",
+ "", "nmg-CM",
+ "", "nnh",
+ "", "nnh-CM",
+ "", "nus",
+ "", "nus-SS",
+ "", "nyn",
+ "", "nyn-UG",
+ "", "os",
+ "", "os-GE",
+ "", "os-RU",
+ "", "pcm",
+ "", "pcm-NG",
+ "", "pt",
+ "", "pt-AO",
+ "", "pt-BR",
+ "", "pt-CH",
+ "", "pt-CV",
+ "", "pt-GQ",
+ "", "pt-GW",
+ "", "pt-LU",
+ "", "pt-MO",
+ "", "pt-MZ",
+ "", "pt-PT",
+ "", "pt-ST",
+ "", "pt-TL",
+ "", "qu",
+ "", "qu-BO",
+ "", "qu-EC",
+ "", "qu-PE",
+ "", "rm",
+ "", "rm-CH",
+ "", "rn",
+ "", "rn-BI",
+ "", "rof",
+ "", "rof-TZ",
+ "", "rw",
+ "", "rw-RW",
+ "", "rwk",
+ "", "rwk-TZ",
+ "", "sa",
+ "", "sa-IN",
+ "", "sah",
+ "", "sah-RU",
+ "", "saq",
+ "", "saq-KE",
+ "", "sat",
+ "", "sat-Olck",
+ "", "sat-Olck-IN",
+ "", "sbp",
+ "", "sbp-TZ",
+ "", "sc",
+ "", "sc-IT",
+ "", "sd",
+ "", "sd-Arab",
+ "", "sd-Arab-PK",
+ "", "sd-Deva",
+ "", "sd-Deva-IN",
+ "", "seh",
+ "", "seh-MZ",
+ "", "ses",
+ "", "ses-ML",
+ "", "sg",
+ "", "sg-CF",
+ "", "shi",
+ "", "shi-Latn",
+ "", "shi-Latn-MA",
+ "", "shi-Tfng",
+ "", "shi-Tfng-MA",
+ "", "sn",
+ "", "sn-ZW",
+ "", "so",
+ "", "so-DJ",
+ "", "so-ET",
+ "", "so-KE",
+ "", "so-SO",
+ "", "su",
+ "", "su-Latn",
+ "", "su-Latn-ID",
+ "", "sw",
+ "", "sw-CD",
+ "", "sw-KE",
+ "", "sw-TZ",
+ "", "sw-UG",
+ "", "teo",
+ "", "teo-KE",
+ "", "teo-UG",
+ "", "tg",
+ "", "tg-TJ",
+ "", "ti",
+ "", "ti-ER",
+ "", "ti-ET",
+ "", "tt",
+ "", "tt-RU",
+ "", "twq",
+ "", "twq-NE",
+ "", "tzm",
+ "", "tzm-MA",
+ "", "vai",
+ "", "vai-Latn",
+ "", "vai-Latn-LR",
+ "", "vai-Vaii",
+ "", "vai-Vaii-LR",
+ "", "vun",
+ "", "vun-TZ",
+ "", "wae",
+ "", "wae-CH",
+ "", "xh",
+ "", "xh-ZA",
+ "", "xog",
+ "", "xog-UG",
+ "", "yav",
+ "", "yav-CM",
+ "", "yrl",
+ "", "yrl-BR",
+ "", "yrl-CO",
+ "", "yrl-VE",
+ "", "zgh",
+ "", "zgh-MA",
+ "", "zu",
+ "", "zu-ZA",
+ "af", "af-NA",
+ "af", "af-ZA",
+ "am", "am-ET",
+ "ar", "ar-001",
+ "ar", "ar-AE",
+ "ar", "ar-BH",
+ "ar", "ar-DJ",
+ "ar", "ar-DZ",
+ "ar", "ar-EG",
+ "ar", "ar-EH",
+ "ar", "ar-ER",
+ "ar", "ar-IL",
+ "ar", "ar-IQ",
+ "ar", "ar-JO",
+ "ar", "ar-KM",
+ "ar", "ar-KW",
+ "ar", "ar-LB",
+ "ar", "ar-LY",
+ "ar", "ar-MA",
+ "ar", "ar-MR",
+ "ar", "ar-OM",
+ "ar", "ar-PS",
+ "ar", "ar-QA",
+ "ar", "ar-SA",
+ "ar", "ar-SD",
+ "ar", "ar-SO",
+ "ar", "ar-SS",
+ "ar", "ar-SY",
+ "ar", "ar-TD",
+ "ar", "ar-TN",
+ "ar", "ar-YE",
+ "as", "as-IN",
+ "az", "az-Cyrl",
+ "az", "az-Cyrl-AZ",
+ "az", "az-Latn",
+ "az", "az-Latn-AZ",
+ "be", "be-BY",
+ "bg", "bg-BG",
+ "bn", "bn-BD",
+ "bn", "bn-IN",
+ "bo", "bo-CN",
+ "bo", "bo-IN",
+ "br", "br-FR",
+ "bs", "bs-Latn",
+ "bs", "bs-Latn-BA",
+ "bs_Cyrl", "bs-Cyrl-BA",
+ "ceb", "ceb-PH",
+ "chr", "chr-US",
+ "cs", "cs-CZ",
+ "cy", "cy-GB",
+ "da", "da-DK",
+ "da", "da-GL",
+ "dsb", "dsb-DE",
+ "ee", "ee-GH",
+ "ee", "ee-TG",
+ "el", "el-CY",
+ "el", "el-GR",
+ "eo", "eo-001",
+ "es", "es-419",
+ "es", "es-AR",
+ "es", "es-BO",
+ "es", "es-BR",
+ "es", "es-BZ",
+ "es", "es-CL",
+ "es", "es-CO",
+ "es", "es-CR",
+ "es", "es-CU",
+ "es", "es-DO",
+ "es", "es-EA",
+ "es", "es-EC",
+ "es", "es-ES",
+ "es", "es-GQ",
+ "es", "es-GT",
+ "es", "es-HN",
+ "es", "es-IC",
+ "es", "es-MX",
+ "es", "es-NI",
+ "es", "es-PA",
+ "es", "es-PE",
+ "es", "es-PH",
+ "es", "es-PR",
+ "es", "es-PY",
+ "es", "es-SV",
+ "es", "es-US",
+ "es", "es-UY",
+ "es", "es-VE",
+ "et", "et-EE",
+ "fa", "fa-IR",
+ "ff_Adlm", "ff-Adlm-BF",
+ "ff_Adlm", "ff-Adlm-CM",
+ "ff_Adlm", "ff-Adlm-GH",
+ "ff_Adlm", "ff-Adlm-GM",
+ "ff_Adlm", "ff-Adlm-GN",
+ "ff_Adlm", "ff-Adlm-GW",
+ "ff_Adlm", "ff-Adlm-LR",
+ "ff_Adlm", "ff-Adlm-MR",
+ "ff_Adlm", "ff-Adlm-NE",
+ "ff_Adlm", "ff-Adlm-NG",
+ "ff_Adlm", "ff-Adlm-SL",
+ "ff_Adlm", "ff-Adlm-SN",
+ "fi", "fi-FI",
+ "fil", "fil-PH",
+ "fo", "fo-DK",
+ "fo", "fo-FO",
+ "gl", "gl-ES",
+ "gu", "gu-IN",
+ "ha", "ha-GH",
+ "ha", "ha-NE",
+ "ha", "ha-NG",
+ "haw", "haw-US",
+ "he", "he-IL",
+ "hi", "hi-IN",
+ "hi", "hi-Latn",
+ "hi", "hi-Latn-IN",
+ "hr", "hr-BA",
+ "hr", "hr-HR",
+ "hsb", "hsb-DE",
+ "hu", "hu-HU",
+ "hy", "hy-AM",
+ "ig", "ig-NG",
+ "is", "is-IS",
+ "ja", "ja-JP",
+ "ka", "ka-GE",
+ "kk", "kk-KZ",
+ "kl", "kl-GL",
+ "km", "km-KH",
+ "kn", "kn-IN",
+ "ko", "ko-KP",
+ "ko", "ko-KR",
+ "kok", "kok-IN",
+ "ku", "ku-TR",
+ "ky", "ky-KG",
+ "lkt", "lkt-US",
+ "ln", "ln-AO",
+ "ln", "ln-CD",
+ "ln", "ln-CF",
+ "ln", "ln-CG",
+ "lo", "lo-LA",
+ "lt", "lt-LT",
+ "lv", "lv-LV",
+ "mk", "mk-MK",
+ "ml", "ml-IN",
+ "mn", "mn-MN",
+ "mr", "mr-IN",
+ "mt", "mt-MT",
+ "my", "my-MM",
+ "ne", "ne-IN",
+ "ne", "ne-NP",
+ "no", "nb",
+ "no", "nb-NO",
+ "no", "nb-SJ",
+ "no", "nn",
+ "no", "nn-NO",
+ "om", "om-ET",
+ "om", "om-KE",
+ "or", "or-IN",
+ "pa", "pa-Arab",
+ "pa", "pa-Arab-PK",
+ "pa", "pa-Guru",
+ "pa", "pa-Guru-IN",
+ "pl", "pl-PL",
+ "ps", "ps-AF",
+ "ps", "ps-PK",
+ "ro", "ro-MD",
+ "ro", "ro-RO",
+ "ru", "ru-BY",
+ "ru", "ru-KG",
+ "ru", "ru-KZ",
+ "ru", "ru-MD",
+ "ru", "ru-RU",
+ "ru", "ru-UA",
+ "se", "se-FI",
+ "se", "se-NO",
+ "se", "se-SE",
+ "si", "si-LK",
+ "sk", "sk-SK",
+ "sl", "sl-SI",
+ "smn", "smn-FI",
+ "sq", "sq-AL",
+ "sq", "sq-MK",
+ "sq", "sq-XK",
+ "sr", "sr-Cyrl",
+ "sr", "sr-Cyrl-BA",
+ "sr", "sr-Cyrl-ME",
+ "sr", "sr-Cyrl-RS",
+ "sr", "sr-Cyrl-XK",
+ "sr_Latn", "sr-Latn-BA",
+ "sr_Latn", "sr-Latn-ME",
+ "sr_Latn", "sr-Latn-RS",
+ "sr_Latn", "sr-Latn-XK",
+ "sv", "sv-AX",
+ "sv", "sv-FI",
+ "sv", "sv-SE",
+ "ta", "ta-IN",
+ "ta", "ta-LK",
+ "ta", "ta-MY",
+ "ta", "ta-SG",
+ "te", "te-IN",
+ "th", "th-TH",
+ "tk", "tk-TM",
+ "to", "to-TO",
+ "tr", "tr-CY",
+ "tr", "tr-TR",
+ "ug", "ug-CN",
+ "uk", "uk-UA",
+ "ur", "ur-IN",
+ "ur", "ur-PK",
+ "uz", "uz-Arab",
+ "uz", "uz-Arab-AF",
+ "uz", "uz-Cyrl",
+ "uz", "uz-Cyrl-UZ",
+ "uz", "uz-Latn",
+ "uz", "uz-Latn-UZ",
+ "vi", "vi-VN",
+ "wo", "wo-SN",
+ "yi", "yi-001",
+ "yo", "yo-BJ",
+ "yo", "yo-NG",
+ "zh", "yue",
+ "zh", "yue-Hans",
+ "zh", "yue-Hans-CN",
+ "zh", "yue-Hant",
+ "zh", "yue-Hant-HK",
+ "zh", "zh-Hans",
+ "zh", "zh-Hans-CN",
+ "zh", "zh-Hans-HK",
+ "zh", "zh-Hans-MO",
+ "zh", "zh-Hans-SG",
+ "zh", "zh-Hant",
+ "zh", "zh-Hant-HK",
+ "zh", "zh-Hant-MO",
+ "zh", "zh-Hant-TW",
+ NULL, NULL,
+};
+
#endif /* USE_ICU */
@@ -772,18 +1481,19 @@
* Start the loop at -1 to sneak in the root locale without too much
* code duplication.
*/
- for (i = -1; i < uloc_countAvailable(); i++)
+ for (i = -1; i < ucol_countAvailable(); i++) /* XXX-Patched: changed from uloc_countAvailable() */
{
const char *name;
char *langtag;
char *icucomment;
const char *iculocstr;
Oid collid;
+ char **ptr; /* XXX-Patched: added */
if (i == -1)
name = ""; /* ICU root locale */
else
- name = uloc_getAvailable(i);
+ name = ucol_getAvailable(i); /* XXX-Patched: changed from uloc_getAvailable() */
langtag = get_icu_language_tag(name);
iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
@@ -812,6 +1523,44 @@
CreateComments(collid, CollationRelationId, 0,
icucomment);
}
+
+ /*
+ * XXX-Patched: The following block is added to create collations also for derived
+ * locales (combination of language+country/region).
+ * It's terribly inefficient, but in the big picture, it doesn't matter that much
+ * (it's typically called only once in the life of the cluster).
+ */
+ for (ptr = icu_coll_locales; *ptr != NULL; ptr++)
+ {
+ /*
+ * icu_coll_locales is a 1D array of pairs: collation name and locale (langtag).
+ * ptr++ moves pointer to the second string of the pair and it's a post-increment,
+ * so after the comparison with name is evaluated.
+ */
+ if (strcmp(*ptr++, name) == 0) {
+ const char *langtag;
+
+ langtag = pstrdup(*ptr);
+ collid = CollationCreate(psprintf("%s-x-icu", langtag),
+ nspid, GetUserId(),
+ COLLPROVIDER_ICU, true, -1,
+ NULL, NULL, langtag,
+ get_collation_actual_version(COLLPROVIDER_ICU, langtag),
+ true, true);
+
+ if (OidIsValid(collid))
+ {
+ ncreated++;
+
+ CommandCounterIncrement();
+
+ icucomment = get_icu_locale_comment(langtag);
+ if (icucomment)
+ CreateComments(collid, CollationRelationId, 0,
+ icucomment);
+ }
+ }
+ }
}
}
#endif /* USE_ICU */