Project

Profile

Help

Revision 91431e0e

Added by Michael Kay over 12 years ago

Fix bug 2836862 The rules= parameter of collation URI doesn't work

View differences:

latest9.1/bj/net/sf/saxon/sort/StandardCollationURIResolver.java
1 1
package net.sf.saxon.sort;
2 2
import net.sf.saxon.Configuration;
3
import net.sf.saxon.om.FastStringBuffer;
3 4
import net.sf.saxon.trans.XPathException;
4 5

  
5 6
import javax.xml.transform.TransformerException;
......
7 8
import java.net.URISyntaxException;
8 9
import java.util.Properties;
9 10
import java.util.StringTokenizer;
11
import java.nio.ByteBuffer;
12
import java.nio.charset.Charset;
10 13

  
11 14
/**
12 15
 * StandardCollationURIResolver allows a Collation to be created given
......
50 53
                    throw new XPathException(err);
51 54
                }
52 55
                Properties props = new Properties();
53
                String query = uuri.getQuery();
56
                String query = uuri.getRawQuery();
54 57
                StringTokenizer queryTokenizer = new StringTokenizer(query, ";&");
55 58
                while (queryTokenizer.hasMoreElements()) {
56 59
                    String param = queryTokenizer.nextToken();
57 60
                    int eq = param.indexOf('=');
58 61
                    if (eq > 0 && eq < param.length()-1) {
59 62
                        String kw = param.substring(0, eq);
60
                        String val = param.substring(eq + 1);
63
                        String val = decode(param.substring(eq + 1));
61 64
                        props.setProperty(kw, val);
62 65
                    }
63 66
                }
......
75 78
        }
76 79
    }
77 80

  
81
    public static String decode(String s) {
82
        // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes
83
        // that escapes are well-formed syntactically, i.e., of the form %XX.  If a
84
        // sequence of escaped octets is not valid UTF-8 then the erroneous octets
85
        // are replaced with '\uFFFD'.
86
        // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
87
        //            with a scope_id
88
        //
89

  
90
        if (s == null) {
91
            return s;
92
        }
93
        int n = s.length();
94
        if (n == 0) {
95
            return s;
96
        }
97
        if (s.indexOf('%') < 0) {
98
            return s;
99
        }
100

  
101
        FastStringBuffer sb = new FastStringBuffer(n);
102
        ByteBuffer bb = ByteBuffer.allocate(n);
103
        Charset utf8 = Charset.forName("UTF-8");
104

  
105
        // This is not horribly efficient, but it will do for now
106
        char c = s.charAt(0);
107
        boolean betweenBrackets = false;
108

  
109
        for (int i = 0; i < n;) {
110
            assert c == s.charAt(i);    // Loop invariant
111
            if (c == '[') {
112
                betweenBrackets = true;
113
            } else if (betweenBrackets && c == ']') {
114
                betweenBrackets = false;
115
            }
116
            if (c != '%' || betweenBrackets) {
117
                sb.append(c);
118
                if (++i >= n) {
119
                    break;
120
                }
121
                c = s.charAt(i);
122
                continue;
123
            }
124
            bb.clear();
125
            for (; ;) {
126
                assert (n - i >= 2);
127
                bb.put(hex(s.charAt(++i), s.charAt(++i)));
128
                if (++i >= n) {
129
                    break;
130
                }
131
                c = s.charAt(i);
132
                if (c != '%') {
133
                    break;
134
                }
135
            }
136
            bb.flip();
137
            sb.append(utf8.decode(bb));
138
        }
139

  
140
        return sb.toString();
141
    }
142

  
143
    private static byte hex(char high, char low) {
144
        return (byte)((hexToDec(high)<<4) | hexToDec(low));
145
    }
146

  
147
    private static int hexToDec(char c) {
148
        if (c >= '0' && c <= '9') {
149
            return c - '0';
150
        } else if (c >= 'a' && c <= 'f') {
151
            return c - 'a' + 10;
152
        } else if (c >= 'A' && c <= 'F') {
153
            return c - 'A' + 10;
154
        } else {
155
            return 0;
156
        }
157
    }
158

  
78 159

  
79 160
}
80 161

  

Also available in: Unified diff