]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/classes/core/src/com/ibm/icu/impl/CharacterIteration.java
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / main / classes / core / src / com / ibm / icu / impl / CharacterIteration.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 2013, International Business Machines Corporation and         *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.impl;
8
9 import java.text.CharacterIterator;
10
11 import com.ibm.icu.text.UTF16;
12
13 public final class CharacterIteration {
14     // disallow instantiation
15     private CharacterIteration() { }
16
17     // 32 bit Char value returned from when an iterator has run out of range.
18     //     Positive value so fast case (not end, not surrogate) can be checked
19     //     with a single test.
20     public static final int DONE32 = 0x7fffffff;
21
22     /**
23      * Move the iterator forward to the next code point, and return that code point,
24      *   leaving the iterator positioned at char returned.
25      *   For Supplementary chars, the iterator is left positioned at the lead surrogate.
26      * @param ci  The character iterator
27      * @return    The next code point.
28      */
29     public static int next32(CharacterIterator ci) {
30         // If the current position is at a surrogate pair, move to the trail surrogate
31         //   which leaves it in position for underlying iterator's next() to work.
32         int c= ci.current();
33         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
34             c = ci.next();   
35             if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
36                c = ci.previous();   
37             }
38         }
39
40         // For BMP chars, this next() is the real deal.
41         c = ci.next();
42         
43         // If we might have a lead surrogate, we need to peak ahead to get the trail 
44         //  even though we don't want to really be positioned there.
45         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
46             c = nextTrail32(ci, c);   
47         }
48         
49         if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
50             // We got a supplementary char.  Back the iterator up to the postion
51             // of the lead surrogate.
52             ci.previous();   
53         }
54         return c;
55    }
56
57     
58     // Out-of-line portion of the in-line Next32 code.
59     // The call site does an initial ci.next() and calls this function
60     //    if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
61     // NOTE:  we leave the underlying char iterator positioned in the
62     //        middle of a surrogate pair.  ci.next() will work correctly
63     //        from there, but the ci.getIndex() will be wrong, and needs
64     //        adjustment.
65     public static int nextTrail32(CharacterIterator ci, int lead) {
66         if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
67             return DONE32;
68         }
69         int retVal = lead;
70         if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
71             char  cTrail = ci.next();
72             if (UTF16.isTrailSurrogate(cTrail)) {
73                 retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
74                             (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
75                             UTF16.SUPPLEMENTARY_MIN_VALUE;
76             } else {
77                 ci.previous();
78             }
79         }
80         return retVal;
81     }
82        
83     public static int previous32(CharacterIterator ci) {
84         if (ci.getIndex() <= ci.getBeginIndex()) {
85             return DONE32;   
86         }
87         char trail = ci.previous();
88         int retVal = trail;
89         if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
90             char lead = ci.previous();
91             if (UTF16.isLeadSurrogate(lead)) {
92                 retVal = (((int)lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
93                           ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
94                           UTF16.SUPPLEMENTARY_MIN_VALUE;
95             } else {
96                 ci.next();
97             }           
98         }
99         return retVal;
100     }
101    
102     public static int current32(CharacterIterator ci) {
103         char  lead   = ci.current();
104         int   retVal = lead;
105         if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
106             return retVal;   
107         }
108         if (UTF16.isLeadSurrogate(lead)) {
109             int  trail = (int)ci.next();
110             ci.previous();
111             if (UTF16.isTrailSurrogate((char)trail)) {
112                 retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
113                          (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
114                          UTF16.SUPPLEMENTARY_MIN_VALUE;
115             }
116          } else {
117             if (lead == CharacterIterator.DONE) {
118                 if (ci.getIndex() >= ci.getEndIndex())   {
119                     retVal = DONE32;   
120                 }
121             }
122          }
123         return retVal;
124     }
125 }