This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] sort diacritics left-to-right except in fr_CA locale


This fixes a long-standing collation bug in glibc, affecting all locales
but de_DE, lb_LU and fr_CA.  This led me to write a separate NEWS entry
for this bug; do we want a bug report in the database regardless?

Tested on x86_64-linux-gnu.  Ok to install?


for  ChangeLog

	* localedata/Makefile (test-input): Add fr_CA.UTF-8.
	(LOCALES): Likewise.
	* localedata/fr_CA.in: Copied and adjusted from...
	* localedata/fr_FR.in: ... this.  Adjusted too.
	* localedata/locales/de_DE (DIACRIT_FORWARD): Do not define.
	* localedata/locales/lb_LU (DIACRIT_FORWARD): Likewise.
	* localedata/locales/fr_CA (DIACRIT_BACKWARD): Define.
	* localedata/locales/iso14651_t1_common (DIACRIT_FORWARD):
	Make it the new default, overridable with DIACRIT_BACKWARD.
	* NEWS: Note behavior change.
---
 NEWS                                  |    9 +++
 localedata/Makefile                   |    4 +
 localedata/fr_CA.in                   |   96 +++++++++++++++++++++++++++++++++
 localedata/fr_FR.in                   |   22 ++++----
 localedata/locales/de_DE              |    2 -
 localedata/locales/fr_CA              |    2 +
 localedata/locales/iso14651_t1_common |    6 +-
 localedata/locales/lb_LU              |    2 -
 8 files changed, 123 insertions(+), 20 deletions(-)
 create mode 100644 localedata/fr_CA.in

diff --git a/NEWS b/NEWS
index a324c10..1a78cda 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,15 @@ Version 2.21
 
 * Merged gettext 0.19.3 into the intl subdirectory.  This fixes building
   with newer versions of bison.
+
+* Collation (sorting) general rules regarding diacritics have been fixed to
+  match those in Unicode CLDR, namely, whether diacritic tie-breaking takes
+  place in a forward or backward pass over the strings or wstrings.  The
+  only locale that sort diacritics with a backward pass is now fr_CA; it
+  already sorted Âcote < cÃte < cotà < cÃtàbefore.  All other locales now
+  use a forward pass, so that they sort Âcote < cotà < cÃte < cÃtÃÂ, which
+  only de_DE and lb_LU did before.
+

 Version 2.20
 
diff --git a/localedata/Makefile b/localedata/Makefile
index 0826b36..4fc523e 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -37,7 +37,7 @@ test-srcs := collate-test xfrm-test tst-fmon tst-rpmatch tst-trans \
 	     tst-ctype tst-langinfo tst-langinfo-static tst-numeric
 test-input := de_DE.ISO-8859-1 en_US.ISO-8859-1 da_DK.ISO-8859-1 \
 	      hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 tr_TR.UTF-8 fr_FR.UTF-8 \
-	      si_LK.UTF-8
+	      si_LK.UTF-8 fr_CA.UTF-8
 test-input-data = $(addsuffix .in, $(basename $(test-input)))
 test-output := $(foreach s, .out .xout, \
 			 $(addsuffix $s, $(basename $(test-input))))
@@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
 	   hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
 	   nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \
 	   zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \
-	   tr_TR.ISO-8859-9 en_GB.UTF-8
+	   tr_TR.ISO-8859-9 en_GB.UTF-8 fr_CA.UTF-8
 LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
 CHARMAPS := $(shell echo "$(LOCALES)" | \
 		    sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/localedata/fr_CA.in b/localedata/fr_CA.in
new file mode 100644
index 0000000..1c05d69
--- /dev/null
+++ b/localedata/fr_CA.in
@@ -0,0 +1,96 @@
+@@@@@
+0000
+9999
+Aalborg
+aide
+aÃeul
+air
+@@@air
+air@@@
+Ãlborg
+aoÃt
+bohÃme
+BohÃme
+BohÃmien
+caennais
+cÃsium
+ÃÃ et lÃ
+C.A.F.
+Canon
+caÃon
+casanier
+cÃlibat
+colon
+cÃlon
+COOP
+CO-OP
+coop
+co-op
+Copenhagen
+COTE
+cote
+CÃTE
+cÃte
+COTÃ
+cotÃ
+CÃTÃ
+cÃtÃ
+du
+dÃ
+ÃlÃve
+ÃlevÃ
+gÃne
+gÃne
+gÃnÃ
+GrÃÃe
+Grossist
+haie
+haÃe
+Ãle
+Ãle d'OrlÃans
+lame
+l'Ãme
+lamÃ
+les
+LÃS
+lÃse
+lÃsÃ
+L'HaÃ-les-Roses
+MÃCON
+maÃon
+McArthur
+Mc Arthur
+Mc Mahon
+MODÃLE
+modelÃ
+NOÃL
+NoÃl
+notre
+nÃtre
+ode
+Åil
+ou
+OÃ
+ovoÃde
+pÃche
+pÃche
+PÃCHÃ
+pÃchÃ
+pÃchÃ
+pÃcher
+pÃcher
+pechÃre
+pÃchÃre
+relÃve
+relevÃ
+resume
+resumÃ
+rÃsumÃ
+rÃvÃle
+rÃvÃlÃ
+vice-president
+vice-prÃsident
+vice-president's offices
+vice-presidents' offices
+VICE-VERSA
+vice versa
diff --git a/localedata/fr_FR.in b/localedata/fr_FR.in
index dd5c533..070eb4dc 100644
--- a/localedata/fr_FR.in
+++ b/localedata/fr_FR.in
@@ -29,16 +29,16 @@ CO-OP
 Copenhagen
 cote
 COTE
-cÃte
-CÃTE
 cotÃ
 COTÃ
+cÃte
+CÃTE
 cÃtÃ
 CÃTÃ
 du
 dÃ
-ÃlÃve
 ÃlevÃ
+ÃlÃve
 gÃne
 gÃne
 gÃnÃ
@@ -49,20 +49,20 @@ haÃe
 Ãle
 Ãle d'OrlÃans
 lame
-l'Ãme
 lamÃ
+l'Ãme
 les
 LÃS
-lÃse
 lÃsÃ
+lÃse
 L'HaÃ-les-Roses
-MÃCON
 maÃon
+MÃCON
 McArthur
 Mc Arthur
 Mc Mahon
-MODÃLE
 modelÃ
+MODÃLE
 NoÃl
 NOÃL
 notre
@@ -72,22 +72,22 @@ ode
 ou
 OÃ
 ovoÃde
-pÃche
-pÃche
 pÃchÃ
 PÃCHÃ
+pÃche
+pÃche
 pÃchÃ
 pÃcher
 pÃcher
 pechÃre
 pÃchÃre
-relÃve
 relevÃ
+relÃve
 resume
 resumÃ
 rÃsumÃ
-rÃvÃle
 rÃvÃlÃ
+rÃvÃle
 vice-president
 vice-prÃsident
 vice-president's offices
diff --git a/localedata/locales/de_DE b/localedata/locales/de_DE
index e2704a7..2c3510a 100644
--- a/localedata/locales/de_DE
+++ b/localedata/locales/de_DE
@@ -76,8 +76,6 @@ END LC_CTYPE
 
 LC_COLLATE
 
-define DIACRIT_FORWARD
-
 % Copy the template from ISO/IEC 14651
 copy "iso14651_t1"
 
diff --git a/localedata/locales/fr_CA b/localedata/locales/fr_CA
index 5e2c5a1..878539b 100644
--- a/localedata/locales/fr_CA
+++ b/localedata/locales/fr_CA
@@ -51,6 +51,8 @@ copy "fr_FR"
 END LC_CTYPE
 
 LC_COLLATE
+define DIACRIT_BACKWARD
+
 copy "en_CA"
 END LC_COLLATE
 
diff --git a/localedata/locales/iso14651_t1_common b/localedata/locales/iso14651_t1_common
index e0c3eaa..1fc214f 100644
--- a/localedata/locales/iso14651_t1_common
+++ b/localedata/locales/iso14651_t1_common
@@ -5060,10 +5060,10 @@ order_start <SPECIAL>;forward;backward;forward;forward,position
 <U009E> IGNORE;IGNORE;IGNORE;<U009E>
 <U009F> IGNORE;IGNORE;IGNORE;<U009F>
 
-ifdef DIACRIT_FORWARD
-order_start <LATIN>;forward;forward;forward;forward,position
-else
+ifdef DIACRIT_BACKWARD
 order_start <LATIN>;forward;backward;forward;forward,position
+else
+order_start <LATIN>;forward;forward;forward;forward,position
 endif
 #
 <U00A0> <U0020>;<BAS>;<MIN>;IGNORE # 170<NBSP>
diff --git a/localedata/locales/lb_LU b/localedata/locales/lb_LU
index a74e162..c8616fd 100644
--- a/localedata/locales/lb_LU
+++ b/localedata/locales/lb_LU
@@ -77,8 +77,6 @@ END LC_CTYPE
 
 LC_COLLATE
 
-define DIACRIT_FORWARD
-
 % Copy the template from ISO/IEC 14651
 copy "iso14651_t1"
 

-- 
Alexandre Oliva, freedom fighter    http://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]