1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 **************************************************************************
5 * Copyright (C) 2005-2010, International Business Machines Corporation   *
6 * and others. All Rights Reserved.                                       *
7 **************************************************************************
8 *
9 */
10
11package com.ibm.icu.dev.demo.charsetdet;
12
13import java.awt.Font;
14import java.awt.event.ActionEvent;
15import java.awt.event.ActionListener;
16import java.awt.event.KeyEvent;
17import java.awt.event.WindowAdapter;
18import java.awt.event.WindowEvent;
19import java.io.BufferedInputStream;
20import java.io.File;
21import java.io.FileInputStream;
22import java.io.IOException;
23import java.io.InputStream;
24import java.io.InputStreamReader;
25import java.net.URL;
26import java.nio.ByteBuffer;
27import java.nio.charset.Charset;
28import java.security.AccessControlException;
29
30import javax.swing.JFileChooser;
31import javax.swing.JFrame;
32import javax.swing.JMenu;
33import javax.swing.JMenuBar;
34import javax.swing.JMenuItem;
35import javax.swing.JOptionPane;
36import javax.swing.JScrollPane;
37import javax.swing.JTextPane;
38import javax.swing.KeyStroke;
39
40import com.ibm.icu.charset.CharsetICU;
41import com.ibm.icu.dev.demo.impl.DemoApplet;
42import com.ibm.icu.text.CharsetDetector;
43import com.ibm.icu.text.CharsetMatch;
44
45/**
46 * This simple application demonstrates how to use the CharsetDetector API. It
47 * opens a file or web page, detects the encoding, and then displays it using that
48 * encoding.
49 */
50public class DetectingViewer extends JFrame implements ActionListener
51{
52
53    /**
54     * For serialization
55     */
56    private static final long serialVersionUID = -2307065724464747775L;
57    private JTextPane text;
58    private JFileChooser fileChooser;
59
60    /**
61     * @throws java.awt.HeadlessException
62     */
63    public DetectingViewer()
64    {
65        super();
66        DemoApplet.demoFrameOpened();
67
68        try {
69            fileChooser = new JFileChooser();
70        } catch (AccessControlException ace) {
71            System.err.println("no file chooser - access control exception. Continuing without file browsing. "+ace.toString());
72            fileChooser = null; //
73        }
74
75//        setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
76        setSize(800, 800);
77
78        setJMenuBar(makeMenus());
79        text = new JTextPane();
80        text.setContentType("text/plain");
81        text.setText("");
82        text.setSize(800, 800);
83
84        Font font = new Font("Arial Unicode MS", Font.PLAIN, 24);
85        text.setFont(font);
86
87        JScrollPane scrollPane = new JScrollPane(text);
88
89        getContentPane().add(scrollPane);
90        setVisible(true);
91
92        addWindowListener(
93                new WindowAdapter() {
94                    public void windowClosing(WindowEvent e) {
95//                        setVisible(false);
96//                        dispose();
97
98                          doQuit();
99                    }
100                } );
101
102
103    }
104
105    public void actionPerformed(ActionEvent event)
106    {
107        String cmd = event.getActionCommand();
108
109        if (cmd.equals("New...")) {
110           doNew();
111        } else if (cmd.equals("Open File...")) {
112           doOpenFile();
113        } else if (cmd.equals("Open URL...")) {
114            doOpenURL();
115        } else if (cmd.equals("Quit")) {
116           doQuit();
117        }
118    }
119
120    public static void main(String[] args)
121    {
122        new DetectingViewer();
123    }
124
125    private void errorDialog(String title, String msg)
126    {
127        JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE);
128    }
129
130    private BufferedInputStream openFile(File file)
131    {
132        FileInputStream fileStream = null;
133
134        try {
135            fileStream = new FileInputStream(file);
136        } catch (Exception e) {
137            errorDialog("Error Opening File", e.getMessage());
138            return null;
139        }
140
141        return new BufferedInputStream(fileStream);
142    }
143
144//    private void openFile(String directory, String filename)
145//    {
146//        openFile(new File(directory, filename));
147//    }
148
149
150    private BufferedInputStream openURL(String url)
151    {
152        InputStream s = null;
153
154        try {
155            URL aURL = new URL(url);
156            s = aURL.openStream();
157        } catch (Exception e) {
158            errorDialog("Error Opening URL", e.getMessage());
159            return null;
160        }
161
162        return new BufferedInputStream(s);
163    }
164
165    private String encodingName(CharsetMatch match)
166    {
167        return match.getName() + " (" + match.getLanguage() + ")";
168    }
169
170    private void setMatchMenu(CharsetMatch[] matches)
171    {
172        JMenu menu = getJMenuBar().getMenu(1);
173        JMenuItem menuItem;
174
175        menu.removeAll();
176
177        for (int i = 0; i < matches.length; i += 1) {
178            CharsetMatch match = matches[i];
179
180            menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence());
181
182            menu.add(menuItem);
183        }
184    }
185
186    private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'};
187    private byte[] styleTag  = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'};
188    private static int BUFFER_SIZE = 100000;
189
190    private boolean openTag(byte[] buffer, int offset, int length, byte[] tag)
191    {
192        int tagLen = tag.length;
193        int bufRem = length - offset;
194        int b;
195
196        for (b = 0; b < tagLen && b < bufRem; b += 1) {
197            if (buffer[b + offset] != tag[b]) {
198                return false;
199            }
200        }
201
202        return b == tagLen;
203    }
204
205    private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag)
206    {
207        if (buffer[offset] != (byte) '/') {
208            return false;
209        }
210
211        return openTag(buffer, offset + 1, length, tag);
212    }
213
214    private byte[] filter(InputStream in)
215    {
216        byte[] buffer = new byte[BUFFER_SIZE];
217        int bytesRemaining = BUFFER_SIZE;
218        int bufLen = 0;
219
220        in.mark(BUFFER_SIZE);
221
222        try {
223            while (bytesRemaining > 0) {
224                int bytesRead = in.read(buffer, bufLen, bytesRemaining);
225
226                if (bytesRead <= 0) {
227                    break;
228                }
229
230                bufLen += bytesRead;
231                bytesRemaining -= bytesRead;
232            }
233        } catch (Exception e) {
234            // TODO: error handling?
235            return null;
236        }
237
238        boolean inTag = false;
239        boolean skip  = false;
240        int out = 0;
241
242        for (int i = 0; i < bufLen; i += 1) {
243            byte b = buffer[i];
244
245            if (b == (byte) '<') {
246                inTag = true;
247
248                if (openTag(buffer, i + 1, bufLen, scriptTag) ||
249                    openTag(buffer, i + 1, bufLen, styleTag)) {
250                    skip = true;
251                } else if (closedTag(buffer, i + 1, bufLen, scriptTag) ||
252                           closedTag(buffer, i + 1, bufLen, styleTag)) {
253                    skip = false;
254                }
255            } else if (b == (byte) '>') {
256                inTag = false;
257            } else if (! (inTag || skip)) {
258                buffer[out++] = b;
259            }
260        }
261
262        byte[] filtered = new byte[out];
263
264        System.arraycopy(buffer, 0, filtered, 0, out);
265        return filtered;
266    }
267
268    private CharsetMatch[] detect(byte[] bytes)
269    {
270        CharsetDetector det = new CharsetDetector();
271
272        det.setText(bytes);
273
274        return det.detectAll();
275    }
276
277    private CharsetMatch[] detect(BufferedInputStream inputStream)
278    {
279        CharsetDetector det    = new CharsetDetector();
280
281        try {
282            det.setText(inputStream);
283
284            return det.detectAll();
285        } catch (Exception e) {
286            // TODO: error message?
287            return null;
288        }
289    }
290
291    private void show(InputStream inputStream, CharsetMatch[] matches, String title)
292    {
293        InputStreamReader isr;
294        char[] buffer = new char[1024];
295        int bytesRead = 0;
296
297        if (matches == null || matches.length == 0) {
298            errorDialog("Match Error", "No matches!");
299            return;
300        }
301
302        try {
303            StringBuffer sb = new StringBuffer();
304            String encoding = matches[0].getName();
305
306            inputStream.reset();
307
308            if (encoding.startsWith("UTF-32")) {
309                byte[] bytes = new byte[1024];
310                int offset = 0;
311                int chBytes = 0;
312                Charset utf32 = CharsetICU.forNameICU(encoding);
313
314                while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) {
315                    offset  = bytesRead % 4;
316                    chBytes = bytesRead - offset;
317
318                    sb.append(utf32.decode(ByteBuffer.wrap(bytes)).toString());
319
320                    if (offset != 0) {
321                        for (int i = 0; i < offset; i += 1) {
322                            bytes[i] = bytes[chBytes + i];
323                        }
324                    }
325                }
326            } else {
327                isr = new InputStreamReader(inputStream, encoding);
328
329                while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
330                    sb.append(buffer, 0, bytesRead);
331                }
332
333                isr.close();
334            }
335
336            this.setTitle(title + " - " + encodingName(matches[0]));
337
338            setMatchMenu(matches);
339            text.setText(sb.toString());
340        } catch (IOException e) {
341            errorDialog("IO Error", e.getMessage());
342        } catch (Exception e) {
343            errorDialog("Internal Error", e.getMessage());
344        }
345    }
346
347    private void doNew()
348    {
349        // open a new window...
350    }
351
352    private void doOpenFile()
353    {
354        int retVal = fileChooser.showOpenDialog(this);
355
356        if (retVal == JFileChooser.APPROVE_OPTION) {
357            File file = fileChooser.getSelectedFile();
358            BufferedInputStream inputStream = openFile(file);
359
360            if (inputStream != null) {
361                CharsetMatch[] matches = detect(inputStream);
362
363                show(inputStream, matches, file.getName());
364            }
365        }
366    }
367
368    private void doOpenURL()
369    {
370        String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE,
371                null, null, null);
372
373        if (url != null && url.length() > 0) {
374            BufferedInputStream inputStream = openURL(url);
375
376            if (inputStream != null) {
377                byte[] filtered = filter(inputStream);
378                CharsetMatch[] matches = detect(filtered);
379
380                show(inputStream, matches, url);
381            }
382        }
383}
384
385    private void doQuit()
386    {
387        DemoApplet.demoFrameClosed();
388        this.setVisible(false);
389        this.dispose();
390    }
391
392    private JMenuBar makeMenus()
393    {
394        JMenu menu = new JMenu("File");
395        JMenuItem mi;
396
397        mi = new JMenuItem("Open File...");
398        mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK)));
399        mi.addActionListener(this);
400        menu.add(mi);
401        if(fileChooser == null) {
402            mi.setEnabled(false); // no file chooser.
403        }
404
405        mi = new JMenuItem("Open URL...");
406        mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK)));
407        mi.addActionListener(this);
408        menu.add(mi);
409
410        mi = new JMenuItem("Quit");
411        mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK)));
412        mi.addActionListener(this);
413        menu.add(mi);
414
415        JMenuBar mbar = new JMenuBar();
416        mbar.add(menu);
417
418        menu = new JMenu("Detected Encodings");
419        mbar.add(menu);
420
421        return mbar;
422    }
423}
424