前言:
目前看官们对“apache验证码”大致比较注意,我们都需要知道一些“apache验证码”的相关内容。那么小编在网上搜集了一些对于“apache验证码””的相关资讯,希望朋友们能喜欢,咱们快快来学习一下吧!1.找开发人员去掉验证码或者使用万能验证码
2.使用OCR自动识别
使用OCR自动化识别,一般识别率不是太高,处理一般简单验证码还是没问题
这里使用的是Tesseract-OCR,下载地址:
怎么使用呢?
进入安装后的目录:
tesseract.exe test.png test -1
准备一份网页,上面使用该验证码
<html><head><title>Table test by Youngtitle>head><body> br><h1> Test h1> <img src=";> br>body>html>
要识别验证码,首先得取得验证码,这两款采取的是页面元素部分截图的方式,首先是获取整个页面的截图
然后找到页面元素坐标进行截取
/** * This method for screen shot element * * @param driver * @param element * @param path * @throws InterruptedException */ public static void screenShotForElement(WebDriver driver, WebElement element, String path) throws InterruptedException { File scrFile = ((TakesScreenshot) driver) .getScreenshotAs(OutputType.FILE); try { Point p = element.getLocation(); int width = element.getSize().getWidth(); int height = element.getSize().getHeight(); Rectangle rect = new Rectangle(width, height); BufferedImage img = ImageIO.read(scrFile); BufferedImage dest = img.getSubimage(p.getX(), p.getY(), rect.width, rect.height); ImageIO.write(dest, "png", scrFile); Thread.sleep(1000); FileUtils.copyFile(scrFile, new File(path)); } catch (IOException e) { e.printStackTrace(); } }
截取完元素,就可以调用Tesseract-OCR生成text
// use Tesseract to get strings Runtime rt = Runtime.getRuntime(); rt.exec("cmd.exe /C tesseract.exe D:\\Tesseract-OCR\\test.png D:\\Tesseract-OCR\\test -1 ");
接下来通过java读取txt
/** * This method for read TXT file * * @param filePath */ public static void readTextFile(String filePath) { try { String encoding = "GBK"; File file = new File(filePath); if (file.isFile() && file.exists()) { // 判断文件是否存在 InputStreamReader read = new InputStreamReader( new FileInputStream(file), encoding);// 考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; while ((lineTxt = bufferedReader.readLine()) != null) { System.out.println(lineTxt); } read.close(); } else { System.out.println("找不到指定的文件"); } } catch (Exception e) { System.out.println("读取文件内容出错"); e.printStackTrace(); } }
整体代码如下:
package com.dbyl.tests; import java.awt.Rectangle;import java.awt.image.BufferedImage;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.Reader;import java.util.concurrent.TimeUnit;import javax.imageio.ImageIO;import org.apache.commons.io.FileUtils;import org.openqa.selenium.By;import org.openqa.selenium.OutputType;import org.openqa.selenium.Point;import org.openqa.selenium.TakesScreenshot;import org.openqa.selenium.WebDriver;import org.openqa.selenium.WebElement;import com.dbyl.libarary.utils.DriverFactory;public class TesseractTest { public static void main(String[] args) throws IOException, InterruptedException { WebDriver driver = DriverFactory.getChromeDriver(); driver.get(";); driver.manage().timeouts().pageLoadTimeout(30, TimeUnit.SECONDS); WebElement element = driver.findElement(By.xpath("//img")); // take screen shot for element screenShotForElement(driver, element, "D:\\Tesseract-OCR\\test.png"); driver.quit(); // use Tesseract to get strings Runtime rt = Runtime.getRuntime(); rt.exec("cmd.exe /C tesseract.exe D:\\Tesseract-OCR\\test.png D:\\Tesseract-OCR\\test -1 "); Thread.sleep(1000); // Read text readTextFile("D:\\Tesseract-OCR\\test.txt"); } /** * This method for read TXT file * * @param filePath */ public static void readTextFile(String filePath) { try { String encoding = "GBK"; File file = new File(filePath); if (file.isFile() && file.exists()) { // 判断文件是否存在 InputStreamReader read = new InputStreamReader( new FileInputStream(file), encoding);// 考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; while ((lineTxt = bufferedReader.readLine()) != null) { System.out.println(lineTxt); } read.close(); } else { System.out.println("找不到指定的文件"); } } catch (Exception e) { System.out.println("读取文件内容出错"); e.printStackTrace(); } } /** * This method for screen shot element * * @param driver * @param element * @param path * @throws InterruptedException */ public static void screenShotForElement(WebDriver driver, WebElement element, String path) throws InterruptedException { File scrFile = ((TakesScreenshot) driver) .getScreenshotAs(OutputType.FILE); try { Point p = element.getLocation(); int width = element.getSize().getWidth(); int height = element.getSize().getHeight(); Rectangle rect = new Rectangle(width, height); BufferedImage img = ImageIO.read(scrFile); BufferedImage dest = img.getSubimage(p.getX(), p.getY(), rect.width, rect.height); ImageIO.write(dest, "png", scrFile); Thread.sleep(1000); FileUtils.copyFile(scrFile, new File(path)); } catch (IOException e) { e.printStackTrace(); } } }
版权声明:
本站文章均来自互联网搜集,如有侵犯您的权益,请联系我们删除,谢谢。
标签: #apache验证码