Update unicode.org URLs

Use https, consistent host name, remove references to ftp. Also update the URLs for CLDR, which has moved from Trac to GitHub.

Update unicode.org URLs
Use https, consistent host name, remove references to ftp. Also update the URLs for CLDR, which has moved from Trac to GitHub.
bdb839cb · Peter Eisentraut · 9abb2bfc · bdb839cb · bdb839cb · bdb839cb
Commit bdb839cb authored Oct 13, 2019 by Peter Eisentraut
9 changed files
--- a/contrib/unaccent/generate_unaccent_rules.py
+++ b/contrib/unaccent/generate_unaccent_rules.py
@@ -24,9 +24,9 @@
 # Latin-ASCII.xml, the latest data sets released can be browsed directly
 # via [3].  Note that this script is compatible with at least release 29.
 #
-# [1] http://unicode.org/Public/8.0.0/ucd/UnicodeData.txt
-# [2] http://unicode.org/cldr/trac/export/14746/tags/release-34/common/transforms/Latin-ASCII.xml
-# [3] https://unicode.org/cldr/trac/browser/tags
+# [1] https://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt
+# [2] https://raw.githubusercontent.com/unicode-org/cldr/release-34/common/transforms/Latin-ASCII.xml
+# [3] https://github.com/unicode-org/cldr/tags

 # BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped
 # The approach is to be Python3 compatible with Python2 "backports".
@@ -113,7 +113,7 @@ def is_mark(codepoint):

 def is_letter_with_marks(codepoint, table):
    """Returns true for letters combined with one or more marks."""
-    # See http://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
+    # See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values

    # Letter may have no combining characters, in which case it has
    # no marks.
@@ -226,7 +226,7 @@ def special_cases():
    return charactersSet

 def main(args):
-    # http://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
+    # https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
    decomposition_type_pattern = re.compile(" *<[^>]*> *")

    table = {}
@@ -243,7 +243,7 @@ def main(args):
        for line in unicodeDataFile:
            fields = line.split(";")
            if len(fields) > 5:
-                # http://www.unicode.org/reports/tr44/tr44-14.html#UnicodeData.txt
+                # https://www.unicode.org/reports/tr44/tr44-14.html#UnicodeData.txt
                general_category = fields[2]
                decomposition = fields[5]
                decomposition = re.sub(decomposition_type_pattern, ' ', decomposition)
@@ -281,8 +281,8 @@ def main(args):

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')
-    parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt. See <http://unicode.org/Public/8.0.0/ucd/UnicodeData.txt>.", type=str, required=True, dest='unicodeDataFilePath')
-    parser.add_argument("--latin-ascii-file", help="Path to XML file from Unicode Common Locale Data Repository (CLDR) corresponding to Latin-ASCII transliterator (Latin-ASCII.xml). See <http://unicode.org/cldr/trac/export/12304/tags/release-28/common/transforms/Latin-ASCII.xml>.", type=str, dest='latinAsciiFilePath')
+    parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')
+    parser.add_argument("--latin-ascii-file", help="Path to XML file from Unicode Common Locale Data Repository (CLDR) corresponding to Latin-ASCII transliterator (Latin-ASCII.xml).", type=str, dest='latinAsciiFilePath')
    parser.add_argument("--no-ligatures-expansion", help="Do not expand ligatures and do not use Unicode CLDR Latin-ASCII transliterator. By default, this option is not enabled and \"--latin-ascii-file\" argument is required. If this option is enabled, \"--latin-ascii-file\" argument is optional and ignored.", action="store_true", dest='noLigaturesExpansion')
    args = parser.parse_args()


--- a/doc/src/sgml/acronyms.sgml
+++ b/doc/src/sgml/acronyms.sgml
@@ -728,7 +728,7 @@
    <term><acronym>UTF</acronym></term>
    <listitem>
     <para>
-      <ulink url="http://www.unicode.org/">Unicode Transformation
+      <ulink url="https://www.unicode.org/">Unicode Transformation
      Format</ulink>
     </para>
    </listitem>

--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -832,12 +832,12 @@ CREATE COLLATION german (provider = libc, locale = 'de_DE');
     </varlistentry>
    </variablelist>

-    See <ulink url="http://unicode.org/reports/tr35/tr35-collation.html">Unicode
+    See <ulink url="https://www.unicode.org/reports/tr35/tr35-collation.html">Unicode
    Technical Standard #35</ulink>
    and <ulink url="https://tools.ietf.org/html/bcp47">BCP 47</ulink> for
    details.  The list of possible collation types (<literal>co</literal>
    subtag) can be found in
-    the <ulink url="http://www.unicode.org/repos/cldr/trunk/common/bcp47/collation.xml">CLDR
+    the <ulink url="https://github.com/unicode-org/cldr/blob/master/common/bcp47/collation.xml">CLDR
    repository</ulink>.
    The <ulink url="https://ssl.icu-project.org/icu-bin/locexp">ICU Locale
    Explorer</ulink> can be used to check the details of a particular locale
@@ -900,7 +900,7 @@ CREATE COLLATION french FROM "fr-x-icu";
     different Unicode normal forms.  It is up to the collation provider to
     actually implement such insensitive comparisons; the deterministic flag
     only determines whether ties are to be broken using bytewise comparison.
-     See also <ulink url="https://unicode.org/reports/tr10">Unicode Technical
+     See also <ulink url="https://www.unicode.org/reports/tr10">Unicode Technical
     Standard 10</ulink> for more information on the terminology.
    </para>

@@ -1926,7 +1926,7 @@ RESET client_encoding;
      </varlistentry>

      <varlistentry>
-       <term><ulink url="http://www.unicode.org/"></ulink></term>
+       <term><ulink url="https://www.unicode.org/"></ulink></term>

       <listitem>
        <para>

--- a/src/backend/utils/mb/Unicode/Makefile
+++ b/src/backend/utils/mb/Unicode/Makefile
@@ -119,7 +119,7 @@ DOWNLOAD = wget -O $@ --no-use-server-timestamps
 #DOWNLOAD = curl -o $@

 BIG5.TXT CNS11643.TXT:
-	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/$(@F)
+	$(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/$(@F)

 euc-jis-2004-std.txt sjis-0213-2004-std.txt:
 	$(DOWNLOAD) http://x0213.org/codetable/$(@F)
@@ -131,19 +131,19 @@ GB2312.TXT:
 	$(DOWNLOAD) 'http://trac.greenstone.org/browser/trunk/gsdl/unicode/MAPPINGS/EASTASIA/GB/GB2312.TXT?rev=1842&format=txt'

 JIS0212.TXT:
-	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/$(@F)
+	$(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/$(@F)

 JOHAB.TXT KSX1001.TXT:
-	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/$(@F)
+	$(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/$(@F)

 KOI8-R.TXT KOI8-U.TXT:
-	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/$(@F)
+	$(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/$(@F)

 $(ISO8859TEXTS):
-	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/ISO8859/$(@F)
+	$(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/ISO8859/$(@F)

 $(filter-out CP8%,$(WINTEXTS)) CP932.TXT CP950.TXT:
-	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/$(@F)
+	$(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/$(@F)

 $(filter CP8%,$(WINTEXTS)):
-	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/$(@F)
+	$(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/$(@F)
--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
@@ -8,8 +8,8 @@
 # map files provided by Unicode organization.
 # Unfortunately it is prohibited by the organization
 # to distribute the map files. So if you try to use this script,
-# you have to obtain the map files from the organization's ftp site.
-# ftp://www.unicode.org/Public/MAPPINGS/
+# you have to obtain the map files from the organization's download site.
+# https://www.unicode.org/Public/MAPPINGS/
 #
 # Our "big5" comes from BIG5.TXT, with the addition of the characters
 # in the range 0xf9d6-0xf9dc from CP950.TXT.

--- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@@ -8,8 +8,8 @@
 # map files provided by Unicode organization.
 # Unfortunately it is prohibited by the organization
 # to distribute the map files. So if you try to use this script,
-# you have to obtain the map files from the organization's ftp site.
-# ftp://www.unicode.org/Public/MAPPINGS/
+# you have to obtain the map files from the organization's download site.
+# https://www.unicode.org/Public/MAPPINGS/
 # We assume the file include three tab-separated columns:
 #		 JOHAB code in hex
 #		 UCS-2 code in hex

--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@@ -8,8 +8,8 @@
 # map files provided by Unicode organization.
 # Unfortunately it is prohibited by the organization
 # to distribute the map files. So if you try to use this script,
-# you have to obtain the map files from the organization's ftp site.
-# ftp://www.unicode.org/Public/MAPPINGS/
+# you have to obtain the map files from the organization's download site.
+# https://www.unicode.org/Public/MAPPINGS/
 # We assume the file include three tab-separated columns:
 #		 source character set code in hex
 #		 UCS-2 code in hex

--- a/src/common/unicode/Makefile
+++ b/src/common/unicode/Makefile
@@ -23,7 +23,7 @@ DOWNLOAD = wget -O $@ --no-use-server-timestamps
 # These files are part of the Unicode Character Database. Download
 # them on demand.
 UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt:
-	$(DOWNLOAD) http://unicode.org/Public/UNIDATA/$(@F)
+	$(DOWNLOAD) https://www.unicode.org/Public/UNIDATA/$(@F)

 # Generation of conversion tables used for string normalization with
 # UTF-8 strings.

--- a/src/common/unicode_norm.c
+++ b/src/common/unicode_norm.c
@@ -3,7 +3,7 @@
 *		Normalize a Unicode string to NFKC form
 *
 * This implements Unicode normalization, per the documentation at
- * http://www.unicode.org/reports/tr15/.
+ * https://www.unicode.org/reports/tr15/.
 *
 * Portions Copyright (c) 2017-2019, PostgreSQL Global Development Group
 *
@@ -109,7 +109,7 @@ get_decomposed_size(pg_wchar code)
 	/*
 	 * Fast path for Hangul characters not stored in tables to save memory as
 	 * decomposition is algorithmic. See
-	 * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
+	 * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
 	 * the matter.
 	 */
 	if (code >= SBASE && code < SBASE + SCOUNT)
@@ -234,7 +234,7 @@ decompose_code(pg_wchar code, pg_wchar **result, int *current)
 	/*
 	 * Fast path for Hangul characters not stored in tables to save memory as
 	 * decomposition is algorithmic. See
-	 * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
+	 * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
 	 * the matter.
 	 */
 	if (code >= SBASE && code < SBASE + SCOUNT)
@@ -362,7 +362,7 @@ unicode_normalize_kc(const pg_wchar *input)
 			continue;

 		/*
-		 * Per Unicode (http://unicode.org/reports/tr15/tr15-18.html) annex 4,
+		 * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html) annex 4,
 		 * a sequence of two adjacent characters in a string is an
 		 * exchangeable pair if the combining class (from the Unicode
 		 * Character Database) for the first character is greater than the