Android 开发 OCR Tesseract4Android图片文字识别巨详细全部代码教程-编程知识

下面是整个详解步骤过程

效果图
一、OCR的含义
二、前提准备
二、使用步骤详情
- 1、将 JitPack 存储库添加到存储库末尾的项目根文件
- 2、将依赖项添加到应用模块文件
- 3、数据文件下载路径
- 4、详情代码案例Demo如下：
- - Main.xml
  - Main.java

效果图

流程：获取assets中的图片显示到页面，点击提取图片内的文字

一、OCR的含义

ocr是Optical Character Recognition（光学字符识别）是指电子设备（例如扫描仪或数码相机）检查纸上打印的字符，通过检测暗、亮的模式确定其形状，然后用字符识别方法将形状翻译成计算机文字的过程

二、前提准备

官网奉上

注意：
1、 Android 4.1 （API 16）或更高版本
2、JDK版本是Java 17 Java 17获取

下载后把项目的JKD 换成Java 17,如下图：
在这里插入图片描述

二、使用步骤详情

1、将 JitPack 存储库添加到存储库末尾的项目根文件

allprojects {repositories {...maven { url 'https://jitpack.io' }}
}

根据自己studio位置添加，我的是新版的studio是如下图位置

2、将依赖项添加到应用模块文件

dependencies {// To use Standard variant:implementation 'cz.adaptech.tesseract4android:tesseract4android:4.7.0'
}

这里我们使用的是单线程依赖

在这里插入图片描述

3、数据文件下载路径

数据文件下载路径或者直接下载中文数据包中文数据包
数据包下载下来放到assets文件夹下，cs.png是随便找的一张图片，测试用的

4、详情代码案例Demo如下：

Main.xml

<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"xmlns:app="http://schemas.android.com/apk/res-auto"xmlns:tools="http://schemas.android.com/tools"android:layout_width="match_parent"android:layout_height="match_parent"android:orientation="vertical"tools:context=".MainActivity"><Buttonandroid:id="@+id/btn_tiqu"android:text="提取"android:layout_width="wrap_content"android:layout_height="wrap_content"/><ImageViewandroid:id="@+id/image"android:text="拍照"android:layout_width="match_parent"android:layout_height="400dp"/><TextViewandroid:id="@+id/tv_result"android:layout_width="wrap_content"android:layout_height="wrap_content"/></LinearLayout>

Main.java

package com.example.tesseract4application;import androidx.appcompat.app.AppCompatActivity;
import android.annotation.SuppressLint;
import android.content.Context;
import android.content.Intent;
import android.content.res.AssetManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.os.Bundle;
import android.provider.MediaStore;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.ImageView;
import android.widget.TextView;
import android.widget.Toast;import com.googlecode.tesseract.android.TessBaseAPI;import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;public class MainActivity extends AppCompatActivity {private static final int REQUEST_IMAGE_CAPTURE = 1;public static final String TESS_DATA = "/tessdata";//        private static final String DATA_FILENAME = "eng.traineddata";private static final String DATA_FILENAME = "chi_sim.traineddata";private TextView tv_result;private Button btn_tiqu;private ImageView image;@SuppressLint("MissingInflatedId")@Overrideprotected void onCreate(Bundle savedInstanceState) {super.onCreate(savedInstanceState);setContentView(R.layout.activity_main);tv_result=findViewById(R.id.tv_result);btn_tiqu=findViewById(R.id.btn_tiqu);image=findViewById(R.id.image);Bitmap bitmapFromAssets = getBitmapFromAssets(MainActivity.this, "cs.png");image.setImageBitmap(bitmapFromAssets);btn_tiqu.setOnClickListener(new View.OnClickListener() {@Overridepublic void onClick(View view) {recognizeTextFromBitmap(bitmapFromAssets);}});}// 从assets中读取一张Bitmap类型的图片private Bitmap getBitmapFromAssets(Context context, String filename) {Bitmap bitmap = null;AssetManager assetManager = context.getAssets();try {InputStream is = assetManager.open(filename);bitmap = BitmapFactory.decodeStream(is);is.close();Log.i("TAG", "图片读取成功。");
//            Toast.makeText(getApplicationContext(), "图片读取成功。", Toast.LENGTH_SHORT).show();} catch (IOException e) {Log.i("TAG", "图片读取失败。");
//            Toast.makeText(getApplicationContext(), "图片读取失败。", Toast.LENGTH_SHORT).show();e.printStackTrace();}return bitmap;}private void dispatchTakePictureIntent() {Intent takePictureIntent = new Intent(MediaStore.ACTION_IMAGE_CAPTURE);if (takePictureIntent.resolveActivity(getPackageManager()) != null) {startActivityForResult(takePictureIntent, REQUEST_IMAGE_CAPTURE);}}@Overrideprotected void onActivityResult(int requestCode, int resultCode, Intent data) {super.onActivityResult(requestCode, resultCode, data);if (requestCode == REQUEST_IMAGE_CAPTURE && resultCode == RESULT_OK) {Bundle extras = data.getExtras();Bitmap imageBitmap = (Bitmap) extras.get("data");// 在这里使用imageBitmap进行文字识别image.setImageBitmap(imageBitmap);recognizeTextFromBitmap(imageBitmap);}}private void recognizeTextFromBitmap(Bitmap image) {prepareTess();// 创建TessBaseAPI实例（这在内部创建本机Tesseract实例）TessBaseAPI tess = new TessBaseAPI();//给定的路径必须包含子目录“tessdata”，其中是“*.traineddata”语言文件//路径必须可由应用程序直接读取String dataPath = getExternalFilesDir("/").getPath() + "/";if (!tess.init(dataPath, "chi_sim")) { // could be multiple languages, like "eng+deu+fra"//初始化Tesseract时出错（数据路径错误/无法访问或语言文件不存在）//释放本机Tesseract实例tess.recycle();return;}//加载图像（文件路径、位图、像素…）//（在Tesseract生命周期内可以调用多次）tess.setImage(image);//启动识别（如果尚未对此图像进行识别）并检索结果//（在Tesseract生命周期内可以调用多次）String text = tess.getUTF8Text();tv_result.setText(text);//当您不想再使用本机Tesseract实例时，请将其释放//在该调用之后，无法在此TessBaseAPI实例上调用任何方法tess.recycle();}// 为Tesserect复制(从assets中复制过去)所需的数据private void prepareTess() {try{// 先创建必须的目录File dir = getExternalFilesDir(TESS_DATA);if(!dir.exists()){if (!dir.mkdir()) {Toast.makeText(getApplicationContext(), "目录" + dir.getPath() + "没有创建成功", Toast.LENGTH_SHORT).show();}}// 从assets中复制必须的数据String pathToDataFile = dir + "/" + DATA_FILENAME;if (!(new File(pathToDataFile)).exists()) {InputStream in = getAssets().open(DATA_FILENAME);OutputStream out = new FileOutputStream(pathToDataFile);byte[] buff = new byte[1024];int len;while ((len = in.read(buff)) > 0) {out.write(buff, 0, len);}in.close();out.close();}} catch (Exception e) {Log.e("TAG", e.getMessage());}}}