This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] sort diacritics left-to-right except in fr_CA locale


On Dec 17, 2014, Roland McGrath <roland@hack.frob.com> wrote:

>> Noted, thanks.  Any other comments on the patch, before I post a revised
>> version mentioning the yet-to-be-filed bug report?

> I am pretty useless in that area of the code, sorry.

Ping? (as in, anyone else? :-)

Here's a revised patch that adds a reference to the newly-filed bug
report.

for  ChangeLog

	[BZ #17750]
	* localedata/Makefile (test-input): Add fr_CA.UTF-8.
	(LOCALES): Likewise.
	* localedata/fr_CA.in: Copied and adjusted from...
	* localedata/fr_FR.in: ... this.  Adjusted too.
	* localedata/locales/de_DE (DIACRIT_FORWARD): Do not define.
	* localedata/locales/lb_LU (DIACRIT_FORWARD): Likewise.
	* localedata/locales/fr_CA (DIACRIT_BACKWARD): Define.
	* localedata/locales/iso14651_t1_common (DIACRIT_FORWARD):
	Make it the new default, overridable with DIACRIT_BACKWARD.
	* NEWS: Note behavior change.
---
 NEWS                                  |   11 +++-
 localedata/Makefile                   |    4 +
 localedata/fr_CA.in                   |   96 +++++++++++++++++++++++++++++++++
 localedata/fr_FR.in                   |   22 ++++----
 localedata/locales/de_DE              |    2 -
 localedata/locales/fr_CA              |    2 +
 localedata/locales/iso14651_t1_common |    6 +-
 localedata/locales/lb_LU              |    2 -
 8 files changed, 124 insertions(+), 21 deletions(-)
 create mode 100644 localedata/fr_CA.in

diff --git a/NEWS b/NEWS
index 0d481c2..0e267eb 100644
--- a/NEWS
+++ b/NEWS
@@ -15,7 +15,7 @@ Version 2.21
   17522, 17555, 17570, 17571, 17572, 17573, 17574, 17581, 17582, 17583,
   17584, 17585, 17589, 17594, 17601, 17608, 17616, 17625, 17630, 17633,
   17634, 17647, 17653, 17657, 17664, 17665, 17668, 17682, 17717, 17719,
-  17722, 17724, 17725, 17733, 17744, 17745, 17746, 17747.
+  17722, 17724, 17725, 17733, 17744, 17745, 17746, 17747, 17750.
 
 * CVE-2104-7817 The wordexp function could ignore the WRDE_NOCMD flag
   under certain input conditions resulting in the execution of a shell for
@@ -46,6 +46,15 @@ Version 2.21
 
 * Merged gettext 0.19.3 into the intl subdirectory.  This fixes building
   with newer versions of bison.
+
+* Collation (sorting) general rules regarding diacritics have been fixed to
+  match those in Unicode CLDR, namely, whether diacritic tie-breaking takes
+  place in a forward or backward pass over the strings or wstrings.  The
+  only locale that sort diacritics with a backward pass is now fr_CA; it
+  already sorted Âcote < cÃte < cotà < cÃtàbefore.  All other locales now
+  use a forward pass, so that they sort Âcote < cotà < cÃte < cÃtÃÂ, which
+  only de_DE and lb_LU did before.  (Bugzilla #17750)
+

 Version 2.20
 
diff --git a/localedata/Makefile b/localedata/Makefile
index 0826b36..4fc523e 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -37,7 +37,7 @@ test-srcs := collate-test xfrm-test tst-fmon tst-rpmatch tst-trans \
 	     tst-ctype tst-langinfo tst-langinfo-static tst-numeric
 test-input := de_DE.ISO-8859-1 en_US.ISO-8859-1 da_DK.ISO-8859-1 \
 	      hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 tr_TR.UTF-8 fr_FR.UTF-8 \
-	      si_LK.UTF-8
+	      si_LK.UTF-8 fr_CA.UTF-8
 test-input-data = $(addsuffix .in, $(basename $(test-input)))
 test-output := $(foreach s, .out .xout, \
 			 $(addsuffix $s, $(basename $(test-input))))
@@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
 	   hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
 	   nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \
 	   zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \
-	   tr_TR.ISO-8859-9 en_GB.UTF-8
+	   tr_TR.ISO-8859-9 en_GB.UTF-8 fr_CA.UTF-8
 LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
 CHARMAPS := $(shell echo "$(LOCALES)" | \
 		    sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/localedata/fr_CA.in b/localedata/fr_CA.in
new file mode 100644
index 0000000..1c05d69
--- /dev/null
+++ b/localedata/fr_CA.in
@@ -0,0 +1,96 @@
+@@@@@
+0000
+9999
+Aalborg
+aide
+aÃeul
+air
+@@@air
+air@@@
+Ãlborg
+aoÃt
+bohÃme
+BohÃme
+BohÃmien
+caennais
+cÃsium
+ÃÃ et lÃ
+C.A.F.
+Canon
+caÃon
+casanier
+cÃlibat
+colon
+cÃlon
+COOP
+CO-OP
+coop
+co-op
+Copenhagen
+COTE
+cote
+CÃTE
+cÃte
+COTÃ
+cotÃ
+CÃTÃ
+cÃtÃ
+du
+dÃ
+ÃlÃve
+ÃlevÃ
+gÃne
+gÃne
+gÃnÃ
+GrÃÃe
+Grossist
+haie
+haÃe
+Ãle
+Ãle d'OrlÃans
+lame
+l'Ãme
+lamÃ
+les
+LÃS
+lÃse
+lÃsÃ
+L'HaÃ-les-Roses
+MÃCON
+maÃon
+McArthur
+Mc Arthur
+Mc Mahon
+MODÃLE
+modelÃ
+NOÃL
+NoÃl
+notre
+nÃtre
+ode
+Åil
+ou
+OÃ
+ovoÃde
+pÃche
+pÃche
+PÃCHÃ
+pÃchÃ
+pÃchÃ
+pÃcher
+pÃcher
+pechÃre
+pÃchÃre
+relÃve
+relevÃ
+resume
+resumÃ
+rÃsumÃ
+rÃvÃle
+rÃvÃlÃ
+vice-president
+vice-prÃsident
+vice-president's offices
+vice-presidents' offices
+VICE-VERSA
+vice versa
diff --git a/localedata/fr_FR.in b/localedata/fr_FR.in
index dd5c533..070eb4dc 100644
--- a/localedata/fr_FR.in
+++ b/localedata/fr_FR.in
@@ -29,16 +29,16 @@ CO-OP
 Copenhagen
 cote
 COTE
-cÃte
-CÃTE
 cotÃ
 COTÃ
+cÃte
+CÃTE
 cÃtÃ
 CÃTÃ
 du
 dÃ
-ÃlÃve
 ÃlevÃ
+ÃlÃve
 gÃne
 gÃne
 gÃnÃ
@@ -49,20 +49,20 @@ haÃe
 Ãle
 Ãle d'OrlÃans
 lame
-l'Ãme
 lamÃ
+l'Ãme
 les
 LÃS
-lÃse
 lÃsÃ
+lÃse
 L'HaÃ-les-Roses
-MÃCON
 maÃon
+MÃCON
 McArthur
 Mc Arthur
 Mc Mahon
-MODÃLE
 modelÃ
+MODÃLE
 NoÃl
 NOÃL
 notre
@@ -72,22 +72,22 @@ ode
 ou
 OÃ
 ovoÃde
-pÃche
-pÃche
 pÃchÃ
 PÃCHÃ
+pÃche
+pÃche
 pÃchÃ
 pÃcher
 pÃcher
 pechÃre
 pÃchÃre
-relÃve
 relevÃ
+relÃve
 resume
 resumÃ
 rÃsumÃ
-rÃvÃle
 rÃvÃlÃ
+rÃvÃle
 vice-president
 vice-prÃsident
 vice-president's offices
diff --git a/localedata/locales/de_DE b/localedata/locales/de_DE
index e2704a7..2c3510a 100644
--- a/localedata/locales/de_DE
+++ b/localedata/locales/de_DE
@@ -76,8 +76,6 @@ END LC_CTYPE
 
 LC_COLLATE
 
-define DIACRIT_FORWARD
-
 % Copy the template from ISO/IEC 14651
 copy "iso14651_t1"
 
diff --git a/localedata/locales/fr_CA b/localedata/locales/fr_CA
index 5e2c5a1..878539b 100644
--- a/localedata/locales/fr_CA
+++ b/localedata/locales/fr_CA
@@ -51,6 +51,8 @@ copy "fr_FR"
 END LC_CTYPE
 
 LC_COLLATE
+define DIACRIT_BACKWARD
+
 copy "en_CA"
 END LC_COLLATE
 
diff --git a/localedata/locales/iso14651_t1_common b/localedata/locales/iso14651_t1_common
index e0c3eaa..1fc214f 100644
--- a/localedata/locales/iso14651_t1_common
+++ b/localedata/locales/iso14651_t1_common
@@ -5060,10 +5060,10 @@ order_start <SPECIAL>;forward;backward;forward;forward,position
 <U009E> IGNORE;IGNORE;IGNORE;<U009E>
 <U009F> IGNORE;IGNORE;IGNORE;<U009F>
 
-ifdef DIACRIT_FORWARD
-order_start <LATIN>;forward;forward;forward;forward,position
-else
+ifdef DIACRIT_BACKWARD
 order_start <LATIN>;forward;backward;forward;forward,position
+else
+order_start <LATIN>;forward;forward;forward;forward,position
 endif
 #
 <U00A0> <U0020>;<BAS>;<MIN>;IGNORE # 170<NBSP>
diff --git a/localedata/locales/lb_LU b/localedata/locales/lb_LU
index a74e162..c8616fd 100644
--- a/localedata/locales/lb_LU
+++ b/localedata/locales/lb_LU
@@ -77,8 +77,6 @@ END LC_CTYPE
 
 LC_COLLATE
 
-define DIACRIT_FORWARD
-
 % Copy the template from ISO/IEC 14651
 copy "iso14651_t1"
 


-- 
Alexandre Oliva, freedom fighter    http://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]