2 * Copyright 2007 ZXing authors
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package com.google.zxing.qrcode.decoder;
19 import java.io.UnsupportedEncodingException;
20 import java.util.ArrayList;
21 import java.util.Collection;
22 import java.util.List;
25 import com.google.zxing.DecodeHintType;
26 import com.google.zxing.FormatException;
27 import com.google.zxing.common.BitSource;
28 import com.google.zxing.common.CharacterSetECI;
29 import com.google.zxing.common.DecoderResult;
30 import com.google.zxing.common.StringUtils;
33 * <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
34 * in one QR Code. This class decodes the bits back into text.</p>
36 * <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
40 final class DecodedBitStreamParser {
43 * See ISO 18004:2006, 6.4.4 Table 5
45 private static final char[] ALPHANUMERIC_CHARS = {
46 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
47 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
48 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
49 ' ', '$', '%', '*', '+', '-', '.', '/', ':'
51 private static final int GB2312_SUBSET = 1;
53 private DecodedBitStreamParser() {
56 static DecoderResult decode(byte[] bytes,
58 ErrorCorrectionLevel ecLevel,
59 Map<DecodeHintType,?> hints) throws FormatException {
60 BitSource bits = new BitSource(bytes);
61 StringBuilder result = new StringBuilder(50);
62 List<byte[]> byteSegments = new ArrayList<byte[]>(1);
64 CharacterSetECI currentCharacterSetECI = null;
65 boolean fc1InEffect = false;
68 // While still another segment to read...
69 if (bits.available() < 4) {
70 // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
71 mode = Mode.TERMINATOR;
73 mode = Mode.forBits(bits.readBits(4)); // mode is encoded by 4 bits
75 if (mode != Mode.TERMINATOR) {
76 if (mode == Mode.FNC1_FIRST_POSITION || mode == Mode.FNC1_SECOND_POSITION) {
77 // We do little with FNC1 except alter the parsed result a bit according to the spec
79 } else if (mode == Mode.STRUCTURED_APPEND) {
80 if (bits.available() < 16) {
81 throw FormatException.getFormatInstance();
83 // not really supported; all we do is ignore it
84 // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
86 } else if (mode == Mode.ECI) {
87 // Count doesn't apply to ECI
88 int value = parseECIValue(bits);
89 currentCharacterSetECI = CharacterSetECI.getCharacterSetECIByValue(value);
90 if (currentCharacterSetECI == null) {
91 throw FormatException.getFormatInstance();
94 // First handle Hanzi mode which does not start with character count
95 if (mode == Mode.HANZI) {
96 //chinese mode contains a sub set indicator right after mode indicator
97 int subset = bits.readBits(4);
98 int countHanzi = bits.readBits(mode.getCharacterCountBits(version));
99 if (subset == GB2312_SUBSET) {
100 decodeHanziSegment(bits, result, countHanzi);
103 // "Normal" QR code modes:
104 // How many characters will follow, encoded in this mode?
105 int count = bits.readBits(mode.getCharacterCountBits(version));
106 if (mode == Mode.NUMERIC) {
107 decodeNumericSegment(bits, result, count);
108 } else if (mode == Mode.ALPHANUMERIC) {
109 decodeAlphanumericSegment(bits, result, count, fc1InEffect);
110 } else if (mode == Mode.BYTE) {
111 decodeByteSegment(bits, result, count, currentCharacterSetECI, byteSegments, hints);
112 } else if (mode == Mode.KANJI) {
113 decodeKanjiSegment(bits, result, count);
115 throw FormatException.getFormatInstance();
120 } while (mode != Mode.TERMINATOR);
121 } catch (IllegalArgumentException iae) {
122 // from readBits() calls
123 throw FormatException.getFormatInstance();
126 return new DecoderResult(bytes,
128 byteSegments.isEmpty() ? null : byteSegments,
129 ecLevel == null ? null : ecLevel.toString());
133 * See specification GBT 18284-2000
135 private static void decodeHanziSegment(BitSource bits,
136 StringBuilder result,
137 int count) throws FormatException {
138 // Don't crash trying to read more bits than we have available.
139 if (count * 13 > bits.available()) {
140 throw FormatException.getFormatInstance();
143 // Each character will require 2 bytes. Read the characters as 2-byte pairs
144 // and decode as GB2312 afterwards
145 byte[] buffer = new byte[2 * count];
148 // Each 13 bits encodes a 2-byte character
149 int twoBytes = bits.readBits(13);
150 int assembledTwoBytes = ((twoBytes / 0x060) << 8) | (twoBytes % 0x060);
151 if (assembledTwoBytes < 0x003BF) {
152 // In the 0xA1A1 to 0xAAFE range
153 assembledTwoBytes += 0x0A1A1;
155 // In the 0xB0A1 to 0xFAFE range
156 assembledTwoBytes += 0x0A6A1;
158 buffer[offset] = (byte) ((assembledTwoBytes >> 8) & 0xFF);
159 buffer[offset + 1] = (byte) (assembledTwoBytes & 0xFF);
165 result.append(new String(buffer, StringUtils.GB2312));
166 } catch (UnsupportedEncodingException ignored) {
167 throw FormatException.getFormatInstance();
171 private static void decodeKanjiSegment(BitSource bits,
172 StringBuilder result,
173 int count) throws FormatException {
174 // Don't crash trying to read more bits than we have available.
175 if (count * 13 > bits.available()) {
176 throw FormatException.getFormatInstance();
179 // Each character will require 2 bytes. Read the characters as 2-byte pairs
180 // and decode as Shift_JIS afterwards
181 byte[] buffer = new byte[2 * count];
184 // Each 13 bits encodes a 2-byte character
185 int twoBytes = bits.readBits(13);
186 int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0);
187 if (assembledTwoBytes < 0x01F00) {
188 // In the 0x8140 to 0x9FFC range
189 assembledTwoBytes += 0x08140;
191 // In the 0xE040 to 0xEBBF range
192 assembledTwoBytes += 0x0C140;
194 buffer[offset] = (byte) (assembledTwoBytes >> 8);
195 buffer[offset + 1] = (byte) assembledTwoBytes;
199 // Shift_JIS may not be supported in some environments:
201 result.append(new String(buffer, StringUtils.SHIFT_JIS));
202 } catch (UnsupportedEncodingException ignored) {
203 throw FormatException.getFormatInstance();
207 private static void decodeByteSegment(BitSource bits,
208 StringBuilder result,
210 CharacterSetECI currentCharacterSetECI,
211 Collection<byte[]> byteSegments,
212 Map<DecodeHintType,?> hints) throws FormatException {
213 // Don't crash trying to read more bits than we have available.
214 if (count << 3 > bits.available()) {
215 throw FormatException.getFormatInstance();
218 byte[] readBytes = new byte[count];
219 for (int i = 0; i < count; i++) {
220 readBytes[i] = (byte) bits.readBits(8);
223 if (currentCharacterSetECI == null) {
224 // The spec isn't clear on this mode; see
225 // section 6.4.5: t does not say which encoding to assuming
226 // upon decoding. I have seen ISO-8859-1 used as well as
227 // Shift_JIS -- without anything like an ECI designator to
229 encoding = StringUtils.guessEncoding(readBytes, hints);
231 encoding = currentCharacterSetECI.name();
234 result.append(new String(readBytes, encoding));
235 } catch (UnsupportedEncodingException ignored) {
236 throw FormatException.getFormatInstance();
238 byteSegments.add(readBytes);
241 private static char toAlphaNumericChar(int value) throws FormatException {
242 if (value >= ALPHANUMERIC_CHARS.length) {
243 throw FormatException.getFormatInstance();
245 return ALPHANUMERIC_CHARS[value];
248 private static void decodeAlphanumericSegment(BitSource bits,
249 StringBuilder result,
251 boolean fc1InEffect) throws FormatException {
252 // Read two characters at a time
253 int start = result.length();
255 if (bits.available() < 11) {
256 throw FormatException.getFormatInstance();
258 int nextTwoCharsBits = bits.readBits(11);
259 result.append(toAlphaNumericChar(nextTwoCharsBits / 45));
260 result.append(toAlphaNumericChar(nextTwoCharsBits % 45));
264 // special case: one character left
265 if (bits.available() < 6) {
266 throw FormatException.getFormatInstance();
268 result.append(toAlphaNumericChar(bits.readBits(6)));
270 // See section 6.4.8.1, 6.4.8.2
272 // We need to massage the result a bit if in an FNC1 mode:
273 for (int i = start; i < result.length(); i++) {
274 if (result.charAt(i) == '%') {
275 if (i < result.length() - 1 && result.charAt(i + 1) == '%') {
276 // %% is rendered as %
277 result.deleteCharAt(i + 1);
279 // In alpha mode, % should be converted to FNC1 separator 0x1D
280 result.setCharAt(i, (char) 0x1D);
287 private static void decodeNumericSegment(BitSource bits,
288 StringBuilder result,
289 int count) throws FormatException {
290 // Read three digits at a time
292 // Each 10 bits encodes three digits
293 if (bits.available() < 10) {
294 throw FormatException.getFormatInstance();
296 int threeDigitsBits = bits.readBits(10);
297 if (threeDigitsBits >= 1000) {
298 throw FormatException.getFormatInstance();
300 result.append(toAlphaNumericChar(threeDigitsBits / 100));
301 result.append(toAlphaNumericChar((threeDigitsBits / 10) % 10));
302 result.append(toAlphaNumericChar(threeDigitsBits % 10));
306 // Two digits left over to read, encoded in 7 bits
307 if (bits.available() < 7) {
308 throw FormatException.getFormatInstance();
310 int twoDigitsBits = bits.readBits(7);
311 if (twoDigitsBits >= 100) {
312 throw FormatException.getFormatInstance();
314 result.append(toAlphaNumericChar(twoDigitsBits / 10));
315 result.append(toAlphaNumericChar(twoDigitsBits % 10));
316 } else if (count == 1) {
317 // One digit left over to read
318 if (bits.available() < 4) {
319 throw FormatException.getFormatInstance();
321 int digitBits = bits.readBits(4);
322 if (digitBits >= 10) {
323 throw FormatException.getFormatInstance();
325 result.append(toAlphaNumericChar(digitBits));
329 private static int parseECIValue(BitSource bits) throws FormatException {
330 int firstByte = bits.readBits(8);
331 if ((firstByte & 0x80) == 0) {
333 return firstByte & 0x7F;
335 if ((firstByte & 0xC0) == 0x80) {
337 int secondByte = bits.readBits(8);
338 return ((firstByte & 0x3F) << 8) | secondByte;
340 if ((firstByte & 0xE0) == 0xC0) {
342 int secondThirdBytes = bits.readBits(16);
343 return ((firstByte & 0x1F) << 16) | secondThirdBytes;
345 throw FormatException.getFormatInstance();