1
2
3
4
5
6
7
8
9
10 package org.jenkinsci.plugins.darcs;
11
12 import java.io.File;
13 import java.io.FileInputStream;
14 import java.io.IOException;
15 import java.io.InputStream;
16 import java.nio.ByteBuffer;
17 import java.nio.CharBuffer;
18 import java.nio.charset.Charset;
19 import java.nio.charset.CharsetDecoder;
20 import java.nio.charset.CoderResult;
21 import java.nio.charset.CodingErrorAction;
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.List;
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 class DarcsXmlSanitizer {
40
41 private static final List<String> ADDL_CHARSETS = Arrays.asList("ISO-8859-1", "UTF-16");
42 private final List<CharsetDecoder> decoders = new ArrayList<CharsetDecoder>();
43
44
45
46
47 private enum State {
48
49
50
51
52 OUTSIDE,
53
54
55
56 IN_NAME,
57
58
59
60 IN_COMMENT;
61 };
62
63
64
65
66 public DarcsXmlSanitizer() {
67 super();
68 decoders.add(Charset.forName("UTF-8").newDecoder());
69
70 for (final String cs : ADDL_CHARSETS) {
71 decoders.add(Charset.forName(cs).newDecoder());
72 }
73
74
75 decoders.add(Charset.forName("UTF-8").newDecoder()
76 .onMalformedInput(CodingErrorAction.REPLACE)
77 .onUnmappableCharacter(CodingErrorAction.REPLACE));
78 }
79
80
81
82
83
84
85
86
87
88 private static int positionBeforeNext(final byte[] data, final int start, final byte[] pattern) {
89 final int[] failure = computeFailure(pattern);
90 int j = 0;
91
92 if (0 == data.length || start >= data.length) {
93 return -1;
94 }
95
96 for (int i = start; i < data.length; i++) {
97 while (j > 0 && pattern[j] != data[i]) {
98 j = failure[j - 1];
99 }
100 if (pattern[j] == data[i]) {
101 j++;
102 }
103 if (j == pattern.length) {
104 return i - pattern.length + 1;
105 }
106 }
107
108 return -1;
109 }
110
111
112
113
114
115
116
117
118 private static int positionAfterNext(final byte[] data, final int start, final byte[] pattern) {
119 int pos = positionBeforeNext(data, start, pattern);
120
121 if (-1 != pos) {
122 pos += pattern.length;
123 }
124
125 return pos;
126 }
127
128
129
130
131
132
133
134 private static int[] computeFailure(final byte[] pattern) {
135 final int[] failure = new int[pattern.length];
136 int j = 0;
137
138 for (int i = 1; i < pattern.length; i++) {
139 while (j > 0 && pattern[j] != pattern[i]) {
140 j = failure[j - 1];
141 }
142
143 if (pattern[j] == pattern[i]) {
144 j++;
145 }
146
147 failure[i] = j;
148 }
149
150 return failure;
151 }
152
153
154
155
156
157
158
159 public String cleanse(final byte[] input) {
160 final CharBuffer cb = CharBuffer.allocate(input.length);
161 CoderResult result;
162 State state = State.OUTSIDE;
163 int currentPosition = 0;
164 int nextPosition = 0;
165 int nextName;
166 int nextComment;
167
168 while (currentPosition < input.length) {
169 switch (state) {
170 case OUTSIDE:
171 nextName = positionAfterNext(input, currentPosition, "<name>".getBytes());
172 nextComment = positionAfterNext(input, currentPosition, "<comment>".getBytes());
173
174 if (-1 != nextName && nextName < nextComment) {
175 nextPosition = nextName;
176 state = State.IN_NAME;
177 } else {
178 nextPosition = nextComment;
179 state = State.IN_COMMENT;
180 }
181
182 if (-1 == nextPosition) {
183 nextPosition = input.length;
184 state = State.OUTSIDE;
185 }
186 break;
187 case IN_NAME:
188 nextPosition = positionBeforeNext(input, nextPosition, "</name>".getBytes());
189
190 if (-1 != nextPosition) {
191 state = State.OUTSIDE;
192 }
193
194 break;
195 case IN_COMMENT:
196 nextPosition = positionBeforeNext(input, nextPosition, "</comment>".getBytes());
197
198 if (-1 != nextPosition) {
199 state = State.OUTSIDE;
200 }
201
202 break;
203 default:
204 throw new IllegalStateException(String.format("Illegal state %s!", state));
205 }
206
207 final ByteBuffer in = ByteBuffer.wrap(input, currentPosition, nextPosition - currentPosition);
208 in.mark();
209 cb.mark();
210
211 for (final CharsetDecoder dec : decoders) {
212 dec.reset();
213 result = dec.decode(in, cb, true);
214
215 if (result.isError()) {
216 in.reset();
217 cb.reset();
218 continue;
219 } else {
220 dec.flush(cb);
221 break;
222 }
223 }
224 currentPosition += nextPosition - currentPosition;
225 }
226
227 cb.flip();
228 return cb.toString();
229 }
230
231
232
233
234
235
236
237 public String cleanse(final File file) throws IOException {
238 return cleanse(readFile(file));
239 }
240
241 private byte[] readFile(final File file) throws IOException {
242
243
244 final long length = file.length();
245
246
247
248
249
250 if (length > Integer.MAX_VALUE) {
251 throw new IOException("File is too large " + file.getName());
252 }
253
254
255 final byte[] bytes = new byte[(int) length];
256 int offset = 0;
257 InputStream is = null;
258
259 try {
260
261 is = new FileInputStream(file);
262
263 int numRead = 0;
264 while (offset < bytes.length
265 && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
266 offset += numRead;
267 }
268 } finally {
269 if (null != is) {
270 is.close();
271 }
272 }
273
274
275 if (offset < bytes.length) {
276 throw new IOException("Could not completely read file " + file.getName());
277 }
278
279 return bytes;
280 }
281 }