2 **************************************************************************
\r
3 * Copyright (C) 2005-2010, International Business Machines Corporation *
\r
4 * and others. All Rights Reserved. *
\r
5 **************************************************************************
\r
9 package com.ibm.icu.dev.demo.charsetdet;
\r
11 import java.awt.Font;
\r
12 import java.awt.event.ActionEvent;
\r
13 import java.awt.event.ActionListener;
\r
14 import java.awt.event.KeyEvent;
\r
15 import java.awt.event.WindowAdapter;
\r
16 import java.awt.event.WindowEvent;
\r
17 import java.io.BufferedInputStream;
\r
18 import java.io.File;
\r
19 import java.io.FileInputStream;
\r
20 import java.io.IOException;
\r
21 import java.io.InputStream;
\r
22 import java.io.InputStreamReader;
\r
23 import java.net.URL;
\r
24 import java.nio.ByteBuffer;
\r
25 import java.nio.charset.Charset;
\r
26 import java.security.AccessControlException;
\r
28 import javax.swing.JFileChooser;
\r
29 import javax.swing.JFrame;
\r
30 import javax.swing.JMenu;
\r
31 import javax.swing.JMenuBar;
\r
32 import javax.swing.JMenuItem;
\r
33 import javax.swing.JOptionPane;
\r
34 import javax.swing.JScrollPane;
\r
35 import javax.swing.JTextPane;
\r
36 import javax.swing.KeyStroke;
\r
38 import com.ibm.icu.charset.CharsetICU;
\r
39 import com.ibm.icu.dev.demo.impl.DemoApplet;
\r
40 import com.ibm.icu.text.CharsetDetector;
\r
41 import com.ibm.icu.text.CharsetMatch;
\r
44 * This simple application demonstrates how to use the CharsetDetector API. It
\r
45 * opens a file or web page, detects the encoding, and then displays it using that
\r
48 public class DetectingViewer extends JFrame implements ActionListener
\r
54 private static final long serialVersionUID = -2307065724464747775L;
\r
55 private JTextPane text;
\r
56 private JFileChooser fileChooser;
\r
59 * @throws java.awt.HeadlessException
\r
61 public DetectingViewer()
\r
64 DemoApplet.demoFrameOpened();
\r
67 fileChooser = new JFileChooser();
\r
68 } catch (AccessControlException ace) {
\r
69 System.err.println("no file chooser - access control exception. Continuing without file browsing. "+ace.toString());
\r
70 fileChooser = null; //
\r
73 // setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
\r
76 setJMenuBar(makeMenus());
\r
77 text = new JTextPane();
\r
78 text.setContentType("text/plain");
\r
80 text.setSize(800, 800);
\r
82 Font font = new Font("Arial Unicode MS", Font.PLAIN, 24);
\r
85 JScrollPane scrollPane = new JScrollPane(text);
\r
87 getContentPane().add(scrollPane);
\r
91 new WindowAdapter() {
\r
92 public void windowClosing(WindowEvent e) {
\r
93 // setVisible(false);
\r
103 public void actionPerformed(ActionEvent event)
\r
105 String cmd = event.getActionCommand();
\r
107 if (cmd.equals("New...")) {
\r
109 } else if (cmd.equals("Open File...")) {
\r
111 } else if (cmd.equals("Open URL...")) {
\r
113 } else if (cmd.equals("Quit")) {
\r
118 public static void main(String[] args)
\r
120 new DetectingViewer();
\r
123 private void errorDialog(String title, String msg)
\r
125 JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE);
\r
128 private BufferedInputStream openFile(File file)
\r
130 FileInputStream fileStream = null;
\r
133 fileStream = new FileInputStream(file);
\r
134 } catch (Exception e) {
\r
135 errorDialog("Error Opening File", e.getMessage());
\r
139 return new BufferedInputStream(fileStream);
\r
142 // private void openFile(String directory, String filename)
\r
144 // openFile(new File(directory, filename));
\r
148 private BufferedInputStream openURL(String url)
\r
150 InputStream s = null;
\r
153 URL aURL = new URL(url);
\r
154 s = aURL.openStream();
\r
155 } catch (Exception e) {
\r
156 errorDialog("Error Opening URL", e.getMessage());
\r
160 return new BufferedInputStream(s);
\r
163 private String encodingName(CharsetMatch match)
\r
165 return match.getName() + " (" + match.getLanguage() + ")";
\r
168 private void setMatchMenu(CharsetMatch[] matches)
\r
170 JMenu menu = getJMenuBar().getMenu(1);
\r
171 JMenuItem menuItem;
\r
175 for (int i = 0; i < matches.length; i += 1) {
\r
176 CharsetMatch match = matches[i];
\r
178 menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence());
\r
180 menu.add(menuItem);
\r
184 private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'};
\r
185 private byte[] styleTag = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'};
\r
186 private static int BUFFER_SIZE = 100000;
\r
188 private boolean openTag(byte[] buffer, int offset, int length, byte[] tag)
\r
190 int tagLen = tag.length;
\r
191 int bufRem = length - offset;
\r
194 for (b = 0; b < tagLen && b < bufRem; b += 1) {
\r
195 if (buffer[b + offset] != tag[b]) {
\r
200 return b == tagLen;
\r
203 private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag)
\r
205 if (buffer[offset] != (byte) '/') {
\r
209 return openTag(buffer, offset + 1, length, tag);
\r
212 private byte[] filter(InputStream in)
\r
214 byte[] buffer = new byte[BUFFER_SIZE];
\r
215 int bytesRemaining = BUFFER_SIZE;
\r
218 in.mark(BUFFER_SIZE);
\r
221 while (bytesRemaining > 0) {
\r
222 int bytesRead = in.read(buffer, bufLen, bytesRemaining);
\r
224 if (bytesRead <= 0) {
\r
228 bufLen += bytesRead;
\r
229 bytesRemaining -= bytesRead;
\r
231 } catch (Exception e) {
\r
232 // TODO: error handling?
\r
236 boolean inTag = false;
\r
237 boolean skip = false;
\r
240 for (int i = 0; i < bufLen; i += 1) {
\r
241 byte b = buffer[i];
\r
243 if (b == (byte) '<') {
\r
246 if (openTag(buffer, i + 1, bufLen, scriptTag) ||
\r
247 openTag(buffer, i + 1, bufLen, styleTag)) {
\r
249 } else if (closedTag(buffer, i + 1, bufLen, scriptTag) ||
\r
250 closedTag(buffer, i + 1, bufLen, styleTag)) {
\r
253 } else if (b == (byte) '>') {
\r
255 } else if (! (inTag || skip)) {
\r
260 byte[] filtered = new byte[out];
\r
262 System.arraycopy(buffer, 0, filtered, 0, out);
\r
266 private CharsetMatch[] detect(byte[] bytes)
\r
268 CharsetDetector det = new CharsetDetector();
\r
270 det.setText(bytes);
\r
272 return det.detectAll();
\r
275 private CharsetMatch[] detect(BufferedInputStream inputStream)
\r
277 CharsetDetector det = new CharsetDetector();
\r
280 det.setText(inputStream);
\r
282 return det.detectAll();
\r
283 } catch (Exception e) {
\r
284 // TODO: error message?
\r
289 private void show(InputStream inputStream, CharsetMatch[] matches, String title)
\r
291 InputStreamReader isr;
\r
292 char[] buffer = new char[1024];
\r
295 if (matches == null || matches.length == 0) {
\r
296 errorDialog("Match Error", "No matches!");
\r
301 StringBuffer sb = new StringBuffer();
\r
302 String encoding = matches[0].getName();
\r
304 inputStream.reset();
\r
306 if (encoding.startsWith("UTF-32")) {
\r
307 byte[] bytes = new byte[1024];
\r
310 Charset utf32 = CharsetICU.forNameICU(encoding);
\r
312 while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) {
\r
313 offset = bytesRead % 4;
\r
314 chBytes = bytesRead - offset;
\r
316 sb.append(utf32.decode(ByteBuffer.wrap(bytes)).toString());
\r
319 for (int i = 0; i < offset; i += 1) {
\r
320 bytes[i] = bytes[chBytes + i];
\r
325 isr = new InputStreamReader(inputStream, encoding);
\r
327 while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
\r
328 sb.append(buffer, 0, bytesRead);
\r
334 this.setTitle(title + " - " + encodingName(matches[0]));
\r
336 setMatchMenu(matches);
\r
337 text.setText(sb.toString());
\r
338 } catch (IOException e) {
\r
339 errorDialog("IO Error", e.getMessage());
\r
340 } catch (Exception e) {
\r
341 errorDialog("Internal Error", e.getMessage());
\r
345 private void doNew()
\r
347 // open a new window...
\r
350 private void doOpenFile()
\r
352 int retVal = fileChooser.showOpenDialog(this);
\r
354 if (retVal == JFileChooser.APPROVE_OPTION) {
\r
355 File file = fileChooser.getSelectedFile();
\r
356 BufferedInputStream inputStream = openFile(file);
\r
358 if (inputStream != null) {
\r
359 CharsetMatch[] matches = detect(inputStream);
\r
361 show(inputStream, matches, file.getName());
\r
366 private void doOpenURL()
\r
368 String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE,
\r
371 if (url != null && url.length() > 0) {
\r
372 BufferedInputStream inputStream = openURL(url);
\r
374 if (inputStream != null) {
\r
375 byte[] filtered = filter(inputStream);
\r
376 CharsetMatch[] matches = detect(filtered);
\r
378 show(inputStream, matches, url);
\r
383 private void doQuit()
\r
385 DemoApplet.demoFrameClosed();
\r
386 this.setVisible(false);
\r
390 private JMenuBar makeMenus()
\r
392 JMenu menu = new JMenu("File");
\r
395 mi = new JMenuItem("Open File...");
\r
396 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK)));
\r
397 mi.addActionListener(this);
\r
399 if(fileChooser == null) {
\r
400 mi.setEnabled(false); // no file chooser.
\r
403 mi = new JMenuItem("Open URL...");
\r
404 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK)));
\r
405 mi.addActionListener(this);
\r
408 mi = new JMenuItem("Quit");
\r
409 mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK)));
\r
410 mi.addActionListener(this);
\r
413 JMenuBar mbar = new JMenuBar();
\r
416 menu = new JMenu("Detected Encodings");
\r