2 ***********************************************************************
3 * Copyright (C) 2005-2006, International Business Machines *
4 * Corporation and others. All Rights Reserved. *
5 ***********************************************************************
9 package com.ibm.icu.dev.tool.charsetdet.sbcs;
14 * TODO To change the template for this generated type comment go to
15 * Window - Preferences - Java - Code Style - Code Templates
17 public class Checker implements NGramParser.NGramParserClient
19 private NGramList ngrams;
20 private int totalNGrams;
21 private int totalHits;
23 private String language;
24 private String encoding;
26 private int[] histogram;
28 private static final int BUFFER_SIZE = 1024;
30 private char[] buffer;
34 private NGramParser parser;
37 * TODO This should take cumulative percent and the name...
39 public Checker(NGramList list, InputFile dataFile)
42 ngrams.setMapper(dataFile);
44 language = languageName(dataFile.getFilename());
45 encoding = dataFile.getEncoding();
47 buffer = new char[BUFFER_SIZE];
48 parser = new NGramParser(this);
51 histogram = new int[100];
55 public void handleNGram(String key)
57 NGramList.NGram ngram = ngrams.get(key);
63 //ngram.incrementRefCount();
67 private void resetCounts()
70 totalNGrams = totalHits = 0;
73 private void resetHistogram()
75 for(int i = 0; i < 100; i += 1) {
81 private static void exceptionError(Exception e)
83 System.err.println("ioError: " + e.toString());
86 private static String languageName(String filename)
88 return filename.substring(0, filename.indexOf('.'));
91 private boolean nextBuffer(InputFile inputFile)
94 bufMax = inputFile.read(buffer);
95 } catch (Exception e) {
107 private void parseBuffer()
114 public char nextChar()
116 if (bufIndex >= bufMax) {
120 return buffer[bufIndex++];
123 public String getLanguage()
128 public void setMapper(InputFile file)
130 ngrams.setMapper(file);
133 public int checkBuffer(char[] theBuffer, int charCount)
143 public void check(InputFile dataFile)
145 int minHist = 101, maxHist = -1;
149 String dataFilename = dataFile.getFilename();
150 String fileEncoding = dataFile.getEncoding();
152 System.out.println(language + "(" + encoding + ") stats, " + languageName(dataFilename) + "(" + fileEncoding + ") data:");
157 while (nextBuffer(dataFile)) {
160 double percentHits = (double) totalHits / totalNGrams * 100.0;
161 int ph = (int) percentHits;
174 for(int ph = minHist; ph <= maxHist; ph += 1) {
175 System.out.println(ph + "\t" + histogram[ph]);
178 System.out.println();