From 5dae46a14998243dc121cc0a8f72176cae6e43c8 Mon Sep 17 00:00:00 2001 From: behdad Date: Mon, 21 Jun 2004 21:15:31 +0000 Subject: [PATCH] Reimplemented Arabic joining. Hopefullly it's conforming to the standard now, with the exception that we assume "level run" instead of "directional run", which is a proposed changed to be applied for Unicode 4.1. --- .indent.pro | 2 +- lib/fribidi-bidi.h | 17 +++-- lib/fribidi-joining-types.h | 18 +++-- lib/fribidi-joining.c | 133 +++++++++++++++++++++--------------- lib/fribidi-joining.h | 15 ++-- lib/fribidi-unicode.h | 14 ++-- lib/fribidi.c | 10 +-- lib/fribidi.h | 8 +-- 8 files changed, 124 insertions(+), 93 deletions(-) diff --git a/.indent.pro b/.indent.pro index 08eb245..4359aac 100644 --- a/.indent.pro +++ b/.indent.pro @@ -8,7 +8,7 @@ -T FriBidiCharType -T FriBidiParType -T FriBidiJoiningType --T FriBidiArabicProps +-T FriBidiArabicProp -T FriBidiCharSet -T FriBidiCharSetHandler -T FriBidiMemChunk diff --git a/lib/fribidi-bidi.h b/lib/fribidi-bidi.h index 99596bd..53469f2 100644 --- a/lib/fribidi-bidi.h +++ b/lib/fribidi-bidi.h @@ -1,10 +1,10 @@ /* FriBidi * fribidi-bidi.h - bidirectional algorithm * - * $Id: fribidi-bidi.h,v 1.13 2004-06-21 18:49:23 behdad Exp $ + * $Id: fribidi-bidi.h,v 1.14 2004-06-21 21:15:31 behdad Exp $ * $Author: behdad $ - * $Date: 2004-06-21 18:49:23 $ - * $Revision: 1.13 $ + * $Date: 2004-06-21 21:15:31 $ + * $Revision: 1.14 $ * $Source: /home/behdad/src/fdo/fribidi/togit/git/../fribidi/fribidi2/lib/fribidi-bidi.h,v $ * * Authors: @@ -63,7 +63,8 @@ * only LTR, RTL, or ON. */ FRIBIDI_ENTRY FriBidiParType fribidi_get_par_direction ( - const FriBidiCharType *bidi_types, /* input bidi types */ + const FriBidiCharType *bidi_types, /* input list of bidi types as returned by + fribidi_get_bidi_types() */ const FriBidiStrIndex len /* input string length */ ); @@ -85,7 +86,8 @@ FRIBIDI_ENTRY FriBidiParType fribidi_get_par_direction ( */ FRIBIDI_ENTRY FriBidiLevel fribidi_get_par_embedding_levels ( - const FriBidiCharType *bidi_types, /* input bidi types */ + const FriBidiCharType *bidi_types, /* input list of bidi types as returned by + fribidi_get_bidi_types() */ const FriBidiStrIndex len, /* input string length of the paragraph */ FriBidiParType *pbase_dir, /* requested and resolved paragraph * base direction */ @@ -118,12 +120,13 @@ fribidi_get_par_embedding_levels ( * occured (memory allocation failure most probably). */ FRIBIDI_ENTRY FriBidiLevel fribidi_reorder_line ( - const FriBidiCharType *bidi_types, /* input bidi types */ + const FriBidiCharType *bidi_types, /* input list of bidi types as returned by + fribidi_get_bidi_types() */ const FriBidiStrIndex len, /* input length of the line */ const FriBidiStrIndex off, /* input offset of the beginning of the line in the paragraph */ const FriBidiParType base_dir, /* resolved paragraph base direction */ - FriBidiLevel *embedding_levels, /* list of embedding levels, + FriBidiLevel *embedding_levels, /* input list of embedding levels, as returned by fribidi_get_par_embedding_levels */ FriBidiChar *visual_str, /* visual string to reorder */ diff --git a/lib/fribidi-joining-types.h b/lib/fribidi-joining-types.h index 9935d9f..e1edb6b 100644 --- a/lib/fribidi-joining-types.h +++ b/lib/fribidi-joining-types.h @@ -1,10 +1,10 @@ /* FriBidi * fribidi-joining-types.h - character joining types * - * $Id: fribidi-joining-types.h,v 1.3 2004-06-15 11:52:02 behdad Exp $ + * $Id: fribidi-joining-types.h,v 1.4 2004-06-21 21:15:31 behdad Exp $ * $Author: behdad $ - * $Date: 2004-06-15 11:52:02 $ - * $Revision: 1.3 $ + * $Date: 2004-06-21 21:15:31 $ + * $Revision: 1.4 $ * $Source: /home/behdad/src/fdo/fribidi/togit/git/../fribidi/fribidi2/lib/fribidi-joining-types.h,v $ * * Author: @@ -98,12 +98,12 @@ typedef enum _FriBidiJoiningTypeEnum FriBidiJoiningType; typedef fribidi_uint8 FriBidiJoiningType; #endif /* !__FRIBIDI_DOC */ -/* FriBidiArabicProps is essentially the same type as FriBidiJoiningType, but +/* FriBidiArabicProp is essentially the same type as FriBidiJoiningType, but * not limited to the few values returned by fribidi_get_joining_type. */ -typedef fribidi_uint8 FriBidiArabicProps; +typedef fribidi_uint8 FriBidiArabicProp; /* - * The equivalent of JoiningType values for ArabicProps + * The equivalent of JoiningType values for ArabicProp */ /* Primary Arabic Joining Classes (Table 8-2) */ @@ -184,6 +184,12 @@ typedef fribidi_uint8 FriBidiArabicProps; #define FRIBIDI_IS_JOIN_SKIPPED(p) \ ((p) & (FRIBIDI_MASK_TRANSPARENT | FRIBIDI_MASK_IGNORED)) +/* Is base that will be shaped: R, D, L? */ +#define FRIBIDI_IS_JOIN_BASE_SHAPES(p) \ + ( FRIBIDI_MASK_ARAB_SHAPES == ( (p) & \ + ( FRIBIDI_MASK_TRANSPARENT | FRIBIDI_MASK_IGNORED \ + | FRIBIDI_MASK_ARAB_SHAPES ) ) ) + #define FRIBIDI_JOINS_PRECEDING_MASK(level) \ (FRIBIDI_LEVEL_IS_RTL (level) ? FRIBIDI_MASK_JOINS_RIGHT \ : FRIBIDI_MASK_JOINS_LEFT) diff --git a/lib/fribidi-joining.c b/lib/fribidi-joining.c index fea9ba8..a9ac5d2 100644 --- a/lib/fribidi-joining.c +++ b/lib/fribidi-joining.c @@ -1,10 +1,10 @@ /* FriBidi * fribidi-joining.h - Arabic joining algorithm * - * $Id: fribidi-joining.c,v 1.3 2004-06-21 18:49:23 behdad Exp $ + * $Id: fribidi-joining.c,v 1.4 2004-06-21 21:15:31 behdad Exp $ * $Author: behdad $ - * $Date: 2004-06-21 18:49:23 $ - * $Revision: 1.3 $ + * $Date: 2004-06-21 21:15:31 $ + * $Revision: 1.4 $ * $Source: /home/behdad/src/fdo/fribidi/togit/git/../fribidi/fribidi2/lib/fribidi-joining.c,v $ * * Authors: @@ -40,6 +40,7 @@ #include "mem.h" #include "env.h" +#include "bidi-types.h" #include "joining-types.h" #if DEBUG @@ -69,23 +70,22 @@ print_joining_types ( } #endif /* DEBUG */ +#define FRIBIDI_CONSISTENT_LEVEL(i) \ + (FRIBIDI_IS_EXPLICIT_OR_BN (bidi_types[(i)]) \ + ? FRIBIDI_SENTINEL \ + : embedding_levels[(i)]) -#if FRIBIDI_JOIN_WITHIN_RUN_LEVEL -/* Join within same level run (to be proposed for inclusion in Unicode 4.1) */ -# define FRIBIDI_JOINING_RUN(l) (l) -#else /* !FRIBIDI_JOIN_WITHIN_RUN_LEVEL */ -/* Join within same directional run (current rule in Unicode 4.0.1) */ -# define FRIBIDI_JOINING_RUN(l) FRIBIDI_LEVEL_IS_RTL(l) -#endif /* !FRIBIDI_JOIN_WITHIN_RUN_LEVEL */ - +#define FRIBIDI_LEVELS_MATCH(i, j) \ + ((i) == (j) || (i) == FRIBIDI_SENTINEL || (j) == FRIBIDI_SENTINEL) FRIBIDI_ENTRY void fribidi_join_arabic ( /* input */ - const FriBidiLevel *embedding_levels, + const FriBidiCharType *bidi_types, const FriBidiStrIndex len, + const FriBidiLevel *embedding_levels, /* input and output */ - FriBidiArabicProps *ar_props + FriBidiArabicProp *ar_props ) { if UNLIKELY @@ -93,6 +93,7 @@ fribidi_join_arabic ( DBG ("in fribidi_join_arabic"); + fribidi_assert (bidi_types); fribidi_assert (embedding_levels); fribidi_assert (ar_props); @@ -104,52 +105,76 @@ fribidi_join_arabic ( } # endif /* DEBUG */ + /* The joining algorithm turned out very very dirty :(. That's what happens + * when you follow the standard which has never been implemented closely + * before. We assume "level run" instead of "directional run", which is a + * proposed update to be considered for Unicode 4.1. */ + /* 8.2 Arabic - Cursive Joining */ DBG ("Arabic cursive joining"); { - register FriBidiStrIndex i = 0; + /* The following do not need to be initialized as long as joins is + * initialized to false. We just do to turn off compiler warnings. */ + register FriBidiStrIndex saved = 0; + register FriBidiLevel saved_level = FRIBIDI_SENTINEL; + register fribidi_boolean saved_shapes = false; + register FriBidiArabicProp saved_joins_following_mask = 0; + + register fribidi_boolean joins = false; + register FriBidiStrIndex i; for (i = 0; i < len; i++) - { - register FriBidiStrIndex saved = i; - register const FriBidiLevel direction = - FRIBIDI_LEVEL_IS_RTL (embedding_levels[i]); - register const FriBidiArabicProps joins_preceding_mask = - FRIBIDI_JOINS_PRECEDING_MASK (direction); - register const FriBidiArabicProps joins_following_mask = - FRIBIDI_JOINS_FOLLOWING_MASK (direction); - register fribidi_boolean joins = false; - - /* Sweep over directional runs */ - for (; - i < len - && FRIBIDI_LEVEL_IS_RTL (embedding_levels[i]) == direction; i++) - { - /* R1. Transparent chars are skipped (and so do iGnored chars) */ - if (FRIBIDI_IS_JOIN_SKIPPED (ar_props[i])) - continue; - - /* R2..R7. */ - if (!joins) - FRIBIDI_UNSET_BITS (ar_props[i], joins_preceding_mask); - else if (!FRIBIDI_TEST_BITS (ar_props[i], joins_preceding_mask)) - FRIBIDI_UNSET_BITS (ar_props[saved], joins_following_mask); - else - { - /* This is a FriBidi extension: we set joining properties - * for skipped characters in between. */ - for (saved++; saved < i; saved++) - FRIBIDI_SET_BITS (ar_props[saved], - joins_preceding_mask | - joins_following_mask); - } - - joins = FRIBIDI_TEST_BITS (ar_props[i], joins_following_mask); - saved = i; - } - FRIBIDI_UNSET_BITS (ar_props[saved], joins_following_mask); - i--; - } + if (!FRIBIDI_IS_JOINING_TYPE_G (ar_props[i])) + { + register fribidi_boolean disjoin = false; + register fribidi_boolean shapes = FRIBIDI_ARAB_SHAPES (ar_props[i]); + register FriBidiLevel level = FRIBIDI_CONSISTENT_LEVEL (i); + + if (joins && !FRIBIDI_LEVELS_MATCH (saved_level, level)) + { + disjoin = true; + joins = false; + } + + if (!FRIBIDI_IS_JOIN_SKIPPED (ar_props[i])) + { + register const FriBidiArabicProp joins_preceding_mask = + FRIBIDI_JOINS_PRECEDING_MASK (level); + + if (!joins) + { + if (shapes) + FRIBIDI_UNSET_BITS (ar_props[i], joins_preceding_mask); + } + else if (!FRIBIDI_TEST_BITS (ar_props[i], joins_preceding_mask)) + disjoin = true; + } + + if (disjoin && saved_shapes) + FRIBIDI_UNSET_BITS (ar_props[saved], saved_joins_following_mask); + + if (!FRIBIDI_IS_JOIN_SKIPPED (ar_props[i])) + { + saved = i; + saved_level = level; + saved_shapes = shapes; + saved_joins_following_mask = + FRIBIDI_JOINS_FOLLOWING_MASK (level); + joins = + FRIBIDI_TEST_BITS (ar_props[i], saved_joins_following_mask); + } + } + if (joins && saved_shapes) + FRIBIDI_UNSET_BITS (ar_props[saved], saved_joins_following_mask); + + /* if joining on transparents then... */ + /* This is a FriBidi extension: we set joining properties + * for skipped characters in between. + for (saved++; saved < i; saved++) + FRIBIDI_SET_BITS (ar_props[saved], + joins_preceding_mask | + joins_following_mask); + */ } # if DEBUG diff --git a/lib/fribidi-joining.h b/lib/fribidi-joining.h index a341d32..09b123c 100644 --- a/lib/fribidi-joining.h +++ b/lib/fribidi-joining.h @@ -1,10 +1,10 @@ /* FriBidi * fribidi-joining.h - Arabic joining algorithm * - * $Id: fribidi-joining.h,v 1.2 2004-06-15 11:52:02 behdad Exp $ + * $Id: fribidi-joining.h,v 1.3 2004-06-21 21:15:31 behdad Exp $ * $Author: behdad $ - * $Date: 2004-06-15 11:52:02 $ - * $Revision: 1.2 $ + * $Date: 2004-06-21 21:15:31 $ + * $Revision: 1.3 $ * $Source: /home/behdad/src/fdo/fribidi/togit/git/../fribidi/fribidi2/lib/fribidi-joining.h,v $ * * Authors: @@ -46,7 +46,7 @@ * * This function does the Arabic joining algorithm. Means, given Arabic * joining types of the characters in ar_props (don't worry, - * FriBidiJoiningType can be casted to FriBidiArabicProps automagically), this + * FriBidiJoiningType can be casted to FriBidiArabicProp automagically), this * function modifies this properties to grasp the effect of neighboring * characters. You probably need this information later to do Arabic shaping. * @@ -61,11 +61,14 @@ * Arabic properties computed by this function. */ FRIBIDI_ENTRY void fribidi_join_arabic ( + const FriBidiCharType *bidi_types, /* input list of bidi types as + returned by + fribidi_get_bidi_types() */ + const FriBidiStrIndex len, /* input string length */ const FriBidiLevel *embedding_levels, /* input list of embedding levels, as returned by fribidi_get_par_embedding_levels */ - const FriBidiStrIndex len, /* input string length */ - FriBidiArabicProps *ar_props /* Arabic properties to analyze, initilized by + FriBidiArabicProp *ar_props /* Arabic properties to analyze, initilized by joining types, as returned by fribidi_get_joining_types */ ); diff --git a/lib/fribidi-unicode.h b/lib/fribidi-unicode.h index e77da2e..0cb2a4f 100644 --- a/lib/fribidi-unicode.h +++ b/lib/fribidi-unicode.h @@ -1,10 +1,10 @@ /* FriBidi * fribidi-unicode.h - general Unicode definitions * - * $Id: fribidi-unicode.h,v 1.4 2004-06-21 18:49:23 behdad Exp $ + * $Id: fribidi-unicode.h,v 1.5 2004-06-21 21:15:31 behdad Exp $ * $Author: behdad $ - * $Date: 2004-06-21 18:49:23 $ - * $Revision: 1.4 $ + * $Date: 2004-06-21 21:15:31 $ + * $Revision: 1.5 $ * $Source: /home/behdad/src/fdo/fribidi/togit/git/../fribidi/fribidi2/lib/fribidi-unicode.h,v $ * * Author: @@ -66,13 +66,7 @@ extern const char *fribidi_unicode_version; #define FRIBIDI_BIDI_MAX_RESOLVED_LEVELS 63 -/* Unicode Arabic joining/shaping definitions: */ - -/* Unicode 4.0.1: join within "directional run", not "level run". */ -#undef FRIBIDI_JOIN_WITHIN_RUN_LEVEL - - -/* A few Unicode characters */ +/* A few Unicode characters: */ /* Bidirectional marks */ #define FRIBIDI_CHAR_LRM 0x200E diff --git a/lib/fribidi.c b/lib/fribidi.c index 4b92dca..c505119 100644 --- a/lib/fribidi.c +++ b/lib/fribidi.c @@ -1,10 +1,10 @@ /* FriBidi * fribidi.c - Unicode bidirectional and Arabic joining/shaping algorithms * - * $Id: fribidi.c,v 1.14 2004-06-21 18:49:23 behdad Exp $ + * $Id: fribidi.c,v 1.15 2004-06-21 21:15:31 behdad Exp $ * $Author: behdad $ - * $Date: 2004-06-21 18:49:23 $ - * $Revision: 1.14 $ + * $Date: 2004-06-21 21:15:31 $ + * $Revision: 1.15 $ * $Source: /home/behdad/src/fdo/fribidi/togit/git/../fribidi/fribidi2/lib/fribidi.c,v $ * * Authors: @@ -145,7 +145,7 @@ fribidi_log2vis ( fribidi_boolean private_V_to_L = false; fribidi_boolean private_embedding_levels = false; fribidi_boolean status = false; - FriBidiArabicProps *ar_props = NULL; + FriBidiArabicProp *ar_props = NULL; FriBidiCharType *bidi_types = NULL; if UNLIKELY @@ -190,7 +190,7 @@ fribidi_log2vis ( /* Arabic joining */ ar_props = fribidi_malloc (len * sizeof ar_props[0]); fribidi_get_joining_types (str, len, ar_props); - fribidi_join_arabic (embedding_levels, len, ar_props); + fribidi_join_arabic (bidi_types, len, embedding_levels, ar_props); #endif /* !FRIBIDI_NO_ARABIC */ fribidi_shape (embedding_levels, len, visual_str); diff --git a/lib/fribidi.h b/lib/fribidi.h index cbdfbec..13fdbf2 100644 --- a/lib/fribidi.h +++ b/lib/fribidi.h @@ -1,10 +1,10 @@ /* FriBidi * fribidi.h - Unicode bidirectional and Arabic joining/shaping algorithms * - * $Id: fribidi.h,v 1.7 2004-06-15 11:52:02 behdad Exp $ + * $Id: fribidi.h,v 1.8 2004-06-21 21:15:31 behdad Exp $ * $Author: behdad $ - * $Date: 2004-06-15 11:52:02 $ - * $Revision: 1.7 $ + * $Date: 2004-06-21 21:15:31 $ + * $Revision: 1.8 $ * $Source: /home/behdad/src/fdo/fribidi/togit/git/../fribidi/fribidi2/lib/fribidi.h,v $ * * Author: @@ -42,7 +42,7 @@ #include "fribidi-joining-types.h" #include "fribidi-joining.h" #else -typedef void FriBidiJoiningType typedef void FriBidiArabicProps +typedef void FriBidiJoiningType typedef void FriBidiArabicProp #endif /* !FRIBIDI_NO_ARABIC */ #if FRIBIDI_CHARSETS # include "fribidi-char-sets.h" -- 2.40.0