한글 데이터는 아래에서 받을 수 있습니다.
https://github.com/tesseract-ocr/tessdata/blob/master/kor.traineddata
기본 sample은 eng만 가능하였기때문에 여러언어를 사용하는 방법에 대해서 살펴보았습니다.
여러 언어를 사용하는 방법
http://stackoverflow.com/questions/16508796/how-can-i-use-multiple-language-support-on-android-with-tesseract
더하기로 연결해 주면 되네요. ^^;
baseApi.init(dataPath, "eng+kor");
수정 된 소스 입니다.
patch 형태
diff -r D:\work\android\ocr\tesseract2\SimpleAndroidOCRActivity_bak.java D:\work\android\ocr\tesseract2\Simple-Android-OCR-master\simpleAndroidOCR\src\main\java\com\datumdroid\android\ocr\simple\SimpleAndroidOCRActivity.java 37c37 < public static final String lang = "eng"; --- > public static final String lang = "eng+kor"; 68c68,82 < --- > > String lang_one[] = lang.split("\\+"); > for(String one : lang_one) { > dataCopy(one); > } > > // _image = (ImageView) findViewById(R.id.image); > _field = (EditText) findViewById(R.id.field); > _button = (Button) findViewById(R.id.button); > _button.setOnClickListener(new ButtonClickHandler()); > > _path = DATA_PATH + "/ocr.jpg"; > } > > private void dataCopy(String one) { 73c87,88 < if (!(new File(DATA_PATH + "tessdata/" + lang + ".traineddata")).exists()) { --- > > if (!(new File(DATA_PATH + "tessdata/" + one + ".traineddata")).exists()) { 77c92 < InputStream in = assetManager.open("tessdata/" + lang + ".traineddata"); --- > InputStream in = assetManager.open("tessdata/" + one + ".traineddata"); 80c95 < + "tessdata/" + lang + ".traineddata"); --- > + "tessdata/" + one + ".traineddata"); 92,93c107,108 < < Log.v(TAG, "Copied " + lang + " traineddata"); --- > > Log.v(TAG, "Copied " + one + " traineddata"); 95c110 < Log.e(TAG, "Was unable to copy " + lang + " traineddata " + e.toString()); --- > Log.e(TAG, "Was unable to copy " + one + " traineddata " + e.toString()); 98,106d112 < < < < // _image = (ImageView) findViewById(R.id.image); < _field = (EditText) findViewById(R.id.field); < _button = (Button) findViewById(R.id.button); < _button.setOnClickListener(new ButtonClickHandler()); < < _path = DATA_PATH + "/ocr.jpg"; 226,228c232,234 < if ( lang.equalsIgnoreCase("eng") ) { < recognizedText = recognizedText.replaceAll("[^a-zA-Z0-9]+", " "); < } --- > //if ( lang.equalsIgnoreCase("eng") ) { > // recognizedText = recognizedText.replaceAll("[^a-zA-Z0-9]+", " "); > //}
전체 소스 입니다.
package com.datumdroid.android.ocr.simple; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.zip.GZIPInputStream; import android.app.Activity; import android.content.Intent; import android.content.res.AssetManager; import android.graphics.Bitmap; import android.graphics.BitmapFactory; import android.graphics.Matrix; import android.media.ExifInterface; import android.net.Uri; import android.os.Bundle; import android.os.Environment; import android.provider.MediaStore; import android.util.Log; import android.view.View; import android.widget.Button; import android.widget.EditText; import com.googlecode.tesseract.android.TessBaseAPI; public class SimpleAndroidOCRActivity extends Activity { public static final String PACKAGE_NAME = "com.datumdroid.android.ocr.simple"; public static final String DATA_PATH = Environment .getExternalStorageDirectory().toString() + "/SimpleAndroidOCR/"; // You should have the trained data file in assets folder
// You can get them at:
// http://code.google.com/p/tesseract-ocr/downloads/list
public static final String lang = "eng+kor"; private static final String TAG = "SimpleAndroidOCR.java"; protected Button _button; // protected ImageView _image; protected EditText _field; protected String _path; protected boolean _taken; protected static final String PHOTO_TAKEN = "photo_taken"; @Override public void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.main); String[] paths = new String[] { DATA_PATH, DATA_PATH + "tessdata/" }; for (String path : paths) { File dir = new File(path); if (!dir.exists()) { if (!dir.mkdirs()) { Log.v(TAG, "ERROR: Creation of directory " + path + " on sdcard failed"); return; } else { Log.v(TAG, "Created directory " + path + " on sdcard"); } } } String lang_one[] = lang.split("\\+"); for(String one : lang_one) { dataCopy(one); } // _image = (ImageView) findViewById(R.id.image);
_field = (EditText) findViewById(R.id.field); _button = (Button) findViewById(R.id.button); _button.setOnClickListener(new ButtonClickHandler()); _path = DATA_PATH + "/ocr.jpg"; } private void dataCopy(String one) { // lang.traineddata file with the app (in assets folder)
// You can get them at:
// http://code.google.com/p/tesseract-ocr/downloads/list
// This area needs work and optimization if (!(new File(DATA_PATH + "tessdata/" + one + ".traineddata")).exists()) { try { AssetManager assetManager = getAssets(); InputStream in = assetManager.open("tessdata/" + one + ".traineddata"); //GZIPInputStream gin = new GZIPInputStream(in);
OutputStream out = new FileOutputStream(DATA_PATH
+ "tessdata/" + one + ".traineddata"); // Transfer bytes from in to out
byte[] buf = new byte[1024]; int len; //while ((lenf = gin.read(buff)) > 0) {
while ((len = in.read(buf)) > 0) { out.write(buf, 0, len); } in.close(); //gin.close();
out.close(); Log.v(TAG, "Copied " + one + " traineddata"); } catch (IOException e) { Log.e(TAG, "Was unable to copy " + one + " traineddata " + e.toString()); } } } public class ButtonClickHandler implements View.OnClickListener { public void onClick(View view) { Log.v(TAG, "Starting Camera app"); startCameraActivity(); } } // Simple android photo capture:
// http://labs.makemachine.net/2010/03/simple-android-photo-capture/ protected void startCameraActivity() { File file = new File(_path); Uri outputFileUri = Uri.fromFile(file); final Intent intent = new Intent(MediaStore.ACTION_IMAGE_CAPTURE); intent.putExtra(MediaStore.EXTRA_OUTPUT, outputFileUri); startActivityForResult(intent, 0); } @Override protected void onActivityResult(int requestCode, int resultCode, Intent data) { Log.i(TAG, "resultCode: " + resultCode); if (resultCode == -1) { onPhotoTaken(); } else { Log.v(TAG, "User cancelled"); } } @Override protected void onSaveInstanceState(Bundle outState) { outState.putBoolean(SimpleAndroidOCRActivity.PHOTO_TAKEN, _taken); } @Override protected void onRestoreInstanceState(Bundle savedInstanceState) { Log.i(TAG, "onRestoreInstanceState()"); if (savedInstanceState.getBoolean(SimpleAndroidOCRActivity.PHOTO_TAKEN)) { onPhotoTaken(); } } protected void onPhotoTaken() { _taken = true; BitmapFactory.Options options = new BitmapFactory.Options(); options.inSampleSize = 4; Bitmap bitmap = BitmapFactory.decodeFile(_path, options); try { ExifInterface exif = new ExifInterface(_path); int exifOrientation = exif.getAttributeInt( ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL); Log.v(TAG, "Orient: " + exifOrientation); int rotate = 0; switch (exifOrientation) { case ExifInterface.ORIENTATION_ROTATE_90: rotate = 90; break; case ExifInterface.ORIENTATION_ROTATE_180: rotate = 180; break; case ExifInterface.ORIENTATION_ROTATE_270: rotate = 270; break; } Log.v(TAG, "Rotation: " + rotate); if (rotate != 0) { // Getting width & height of the given image.
int w = bitmap.getWidth(); int h = bitmap.getHeight(); // Setting pre rotate
Matrix mtx = new Matrix(); mtx.preRotate(rotate); // Rotating Bitmap
bitmap = Bitmap.createBitmap(bitmap, 0, 0, w, h, mtx, false);
}
// Convert to ARGB_8888, required by tess
bitmap = bitmap.copy(Bitmap.Config.ARGB_8888, true); } catch (IOException e) { Log.e(TAG, "Couldn't correct orientation: " + e.toString()); } // _image.setImageBitmap( bitmap ); Log.v(TAG, "Before baseApi"); TessBaseAPI baseApi = new TessBaseAPI(); baseApi.setDebug(true); baseApi.init(DATA_PATH, lang); baseApi.setImage(bitmap); String recognizedText = baseApi.getUTF8Text(); baseApi.end(); // You now have the text in recognizedText var, you can do anything with it.
// We will display a stripped out trimmed alpha-numeric version of it (if lang is eng)
// so that garbage doesn't make it to the display. Log.v(TAG, "OCRED TEXT: " + recognizedText); //if ( lang.equalsIgnoreCase("eng") ) {
// recognizedText = recognizedText.replaceAll("[^a-zA-Z0-9]+", " ");
//} recognizedText = recognizedText.trim(); if ( recognizedText.length() != 0 ) { _field.setText(_field.getText().toString().length() == 0 ? recognizedText : _field.getText() + " " + recognizedText); _field.setSelection(_field.getText().toString().length()); } // Cycle done.
} // www.Gaut.am was here
// Thanks for reading!}
실행 화면
안드로이드를 공부하고있는 초보자 입니다.
답글삭제xml파일도 올려주실수 있나요??
따라하다 보니 실행되고 사진을 찍고 난 후 두번째 화면
저장 과 저장안함 버튼에서 에러가나서 앱이 종료됩니다.
도움 부탁드려요!!
어떤 xml 을 의미하는지 모르겠습니다.
삭제기본적으로 제가 ocr 작성한 문서를 살펴보면
https://github.com/GautamGupta/Simple-Android-OCR 여기서 받은 소스에 위 소스 한곳만 수정한것입니다. xml이라면 위 링크 참조하시면 될겁니다.