2 **************************************************************************
3 * Copyright (C) 2005-2010, International Business Machines Corporation *
4 * and others. All Rights Reserved. *
5 **************************************************************************
9 package com.ibm.icu.dev.demo.charsetdet;
12 import java.awt.event.ActionEvent;
13 import java.awt.event.ActionListener;
14 import java.awt.event.KeyEvent;
15 import java.awt.event.WindowAdapter;
16 import java.awt.event.WindowEvent;
17 import java.io.BufferedInputStream;
19 import java.io.FileInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.InputStreamReader;
24 import java.nio.ByteBuffer;
25 import java.nio.charset.Charset;
26 import java.security.AccessControlException;
28 import javax.swing.JFileChooser;
29 import javax.swing.JFrame;
30 import javax.swing.JMenu;
31 import javax.swing.JMenuBar;
32 import javax.swing.JMenuItem;
33 import javax.swing.JOptionPane;
34 import javax.swing.JScrollPane;
35 import javax.swing.JTextPane;
36 import javax.swing.KeyStroke;
38 import com.ibm.icu.charset.CharsetICU;
39 import com.ibm.icu.dev.demo.impl.DemoApplet;
40 import com.ibm.icu.text.CharsetDetector;
41 import com.ibm.icu.text.CharsetMatch;
44 * This simple application demonstrates how to use the CharsetDetector API. It
45 * opens a file or web page, detects the encoding, and then displays it using that
48 public class DetectingViewer extends JFrame implements ActionListener
54 private static final long serialVersionUID = -2307065724464747775L;
55 private JTextPane text;
56 private JFileChooser fileChooser;
59 * @throws java.awt.HeadlessException
61 public DetectingViewer()
64 DemoApplet.demoFrameOpened();
67 fileChooser = new JFileChooser();
68 } catch (AccessControlException ace) {
69 System.err.println("no file chooser - access control exception. Continuing without file browsing. "+ace.toString());
70 fileChooser = null; //
73 // setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
76 setJMenuBar(makeMenus());
77 text = new JTextPane();
78 text.setContentType("text/plain");
80 text.setSize(800, 800);
82 Font font = new Font("Arial Unicode MS", Font.PLAIN, 24);
85 JScrollPane scrollPane = new JScrollPane(text);
87 getContentPane().add(scrollPane);
92 public void windowClosing(WindowEvent e) {
103 public void actionPerformed(ActionEvent event)
105 String cmd = event.getActionCommand();
107 if (cmd.equals("New...")) {
109 } else if (cmd.equals("Open File...")) {
111 } else if (cmd.equals("Open URL...")) {
113 } else if (cmd.equals("Quit")) {
118 public static void main(String[] args)
120 new DetectingViewer();
123 private void errorDialog(String title, String msg)
125 JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE);
128 private BufferedInputStream openFile(File file)
130 FileInputStream fileStream = null;
133 fileStream = new FileInputStream(file);
134 } catch (Exception e) {
135 errorDialog("Error Opening File", e.getMessage());
139 return new BufferedInputStream(fileStream);
142 // private void openFile(String directory, String filename)
144 // openFile(new File(directory, filename));
148 private BufferedInputStream openURL(String url)
150 InputStream s = null;
153 URL aURL = new URL(url);
154 s = aURL.openStream();
155 } catch (Exception e) {
156 errorDialog("Error Opening URL", e.getMessage());
160 return new BufferedInputStream(s);
163 private String encodingName(CharsetMatch match)
165 return match.getName() + " (" + match.getLanguage() + ")";
168 private void setMatchMenu(CharsetMatch[] matches)
170 JMenu menu = getJMenuBar().getMenu(1);
175 for (int i = 0; i < matches.length; i += 1) {
176 CharsetMatch match = matches[i];
178 menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence());
184 private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'};
185 private byte[] styleTag = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'};
186 private static int BUFFER_SIZE = 100000;
188 private boolean openTag(byte[] buffer, int offset, int length, byte[] tag)
190 int tagLen = tag.length;
191 int bufRem = length - offset;
194 for (b = 0; b < tagLen && b < bufRem; b += 1) {
195 if (buffer[b + offset] != tag[b]) {
203 private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag)
205 if (buffer[offset] != (byte) '/') {
209 return openTag(buffer, offset + 1, length, tag);
212 private byte[] filter(InputStream in)
214 byte[] buffer = new byte[BUFFER_SIZE];
215 int bytesRemaining = BUFFER_SIZE;
218 in.mark(BUFFER_SIZE);
221 while (bytesRemaining > 0) {
222 int bytesRead = in.read(buffer, bufLen, bytesRemaining);
224 if (bytesRead <= 0) {
229 bytesRemaining -= bytesRead;
231 } catch (Exception e) {
232 // TODO: error handling?
236 boolean inTag = false;
237 boolean skip = false;
240 for (int i = 0; i < bufLen; i += 1) {
243 if (b == (byte) '<') {
246 if (openTag(buffer, i + 1, bufLen, scriptTag) ||
247 openTag(buffer, i + 1, bufLen, styleTag)) {
249 } else if (closedTag(buffer, i + 1, bufLen, scriptTag) ||
250 closedTag(buffer, i + 1, bufLen, styleTag)) {
253 } else if (b == (byte) '>') {
255 } else if (! (inTag || skip)) {
260 byte[] filtered = new byte[out];
262 System.arraycopy(buffer, 0, filtered, 0, out);
266 private CharsetMatch[] detect(byte[] bytes)
268 CharsetDetector det = new CharsetDetector();
272 return det.detectAll();
275 private CharsetMatch[] detect(BufferedInputStream inputStream)
277 CharsetDetector det = new CharsetDetector();
280 det.setText(inputStream);
282 return det.detectAll();
283 } catch (Exception e) {
284 // TODO: error message?
289 private void show(InputStream inputStream, CharsetMatch[] matches, String title)
291 InputStreamReader isr;
292 char[] buffer = new char[1024];
295 if (matches == null || matches.length == 0) {
296 errorDialog("Match Error", "No matches!");
301 StringBuffer sb = new StringBuffer();
302 String encoding = matches[0].getName();
306 if (encoding.startsWith("UTF-32")) {
307 byte[] bytes = new byte[1024];
310 Charset utf32 = CharsetICU.forNameICU(encoding);
312 while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) {
313 offset = bytesRead % 4;
314 chBytes = bytesRead - offset;
316 sb.append(utf32.decode(ByteBuffer.wrap(bytes)).toString());
319 for (int i = 0; i < offset; i += 1) {
320 bytes[i] = bytes[chBytes + i];
325 isr = new InputStreamReader(inputStream, encoding);
327 while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
328 sb.append(buffer, 0, bytesRead);
334 this.setTitle(title + " - " + encodingName(matches[0]));
336 setMatchMenu(matches);
337 text.setText(sb.toString());
338 } catch (IOException e) {
339 errorDialog("IO Error", e.getMessage());
340 } catch (Exception e) {
341 errorDialog("Internal Error", e.getMessage());
347 // open a new window...
350 private void doOpenFile()
352 int retVal = fileChooser.showOpenDialog(this);
354 if (retVal == JFileChooser.APPROVE_OPTION) {
355 File file = fileChooser.getSelectedFile();
356 BufferedInputStream inputStream = openFile(file);
358 if (inputStream != null) {
359 CharsetMatch[] matches = detect(inputStream);
361 show(inputStream, matches, file.getName());
366 private void doOpenURL()
368 String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE,
371 if (url != null && url.length() > 0) {
372 BufferedInputStream inputStream = openURL(url);
374 if (inputStream != null) {
375 byte[] filtered = filter(inputStream);
376 CharsetMatch[] matches = detect(filtered);
378 show(inputStream, matches, url);
383 private void doQuit()
385 DemoApplet.demoFrameClosed();
386 this.setVisible(false);
390 private JMenuBar makeMenus()
392 JMenu menu = new JMenu("File");
395 mi = new JMenuItem("Open File...");
396 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK)));
397 mi.addActionListener(this);
399 if(fileChooser == null) {
400 mi.setEnabled(false); // no file chooser.
403 mi = new JMenuItem("Open URL...");
404 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK)));
405 mi.addActionListener(this);
408 mi = new JMenuItem("Quit");
409 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK)));
410 mi.addActionListener(this);
413 JMenuBar mbar = new JMenuBar();
416 menu = new JMenu("Detected Encodings");