package cube.file;

import cell.util.Utils;
import cube.file.OCRFile;
import cube.vision.BoundingBox;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.json.JSONObject;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:cube/file/TesseractHocrFile.class */
public class TesseractHocrFile extends OCRFile {
    private String filename;

    public TesseractHocrFile(File file) {
        this.filename = file.getName();
        File preproccess = preproccess(file);
        if (null != preproccess) {
            readXML(preproccess);
            preproccess.delete();
        }
    }

    public TesseractHocrFile(JSONObject jSONObject) {
        super(jSONObject);
        if (jSONObject.has("filename")) {
            this.filename = jSONObject.getString("filename");
        }
    }

    @Override // cube.file.OCRFile, cube.common.JSONable
    public JSONObject toJSON() {
        JSONObject json = super.toJSON();
        json.put("filename", this.filename);
        return json;
    }

    private File preproccess(File file) {
        String str = file.getParent() + "/tmp_" + Utils.randomString(4) + ".xml";
        BufferedReader bufferedReader = null;
        BufferedWriter bufferedWriter = null;
        try {
            try {
                try {
                    bufferedReader = new BufferedReader(new FileReader(file));
                    bufferedWriter = new BufferedWriter(new FileWriter(str));
                    boolean z = false;
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        if (z) {
                            z = false;
                        } else if (readLine.startsWith("<!DOCTYPE")) {
                            z = true;
                        } else {
                            bufferedWriter.write(readLine);
                            bufferedWriter.write("\n");
                        }
                    }
                    if (null != bufferedReader) {
                        try {
                            bufferedReader.close();
                        } catch (IOException e) {
                        }
                    }
                    if (null != bufferedWriter) {
                        try {
                            bufferedWriter.close();
                        } catch (IOException e2) {
                        }
                    }
                } catch (Throwable th) {
                    if (null != bufferedReader) {
                        try {
                            bufferedReader.close();
                        } catch (IOException e3) {
                        }
                    }
                    if (null != bufferedWriter) {
                        try {
                            bufferedWriter.close();
                        } catch (IOException e4) {
                        }
                    }
                    throw th;
                }
            } catch (FileNotFoundException e5) {
                e5.printStackTrace();
                if (null != bufferedReader) {
                    try {
                        bufferedReader.close();
                    } catch (IOException e6) {
                    }
                }
                if (null != bufferedWriter) {
                    try {
                        bufferedWriter.close();
                    } catch (IOException e7) {
                    }
                }
            }
        } catch (IOException e8) {
            e8.printStackTrace();
            if (null != bufferedReader) {
                try {
                    bufferedReader.close();
                } catch (IOException e9) {
                }
            }
            if (null != bufferedWriter) {
                try {
                    bufferedWriter.close();
                } catch (IOException e10) {
                }
            }
        }
        return new File(str);
    }

    private void readXML(File file) {
        OCRFile.Page parsePage;
        try {
            NodeList elementsByTagName = ((Element) DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(file).getElementsByTagName("body").item(0)).getElementsByTagName("div");
            for (int i = 0; i < elementsByTagName.getLength(); i++) {
                Element element = (Element) elementsByTagName.item(i);
                if (element.getAttribute("class").equals("ocr_page") && null != (parsePage = parsePage(element))) {
                    this.pages.add(parsePage);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParserConfigurationException e2) {
            e2.printStackTrace();
        } catch (SAXException e3) {
            e3.printStackTrace();
        } catch (Exception e4) {
            e4.printStackTrace();
        }
    }

    private OCRFile.Page parsePage(Element element) {
        OCRFile.Page page = new OCRFile.Page();
        String[] split = element.getAttribute("title").split(";");
        int length = split.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            String trim = split[i].trim();
            if (trim.startsWith("bbox")) {
                String[] split2 = trim.split(" ");
                if (split2.length == 5) {
                    page.bbox = new BoundingBox(Integer.parseInt(split2[1]), Integer.parseInt(split2[2]), Integer.parseInt(split2[3]), Integer.parseInt(split2[4]));
                }
            } else {
                i++;
            }
        }
        NodeList childNodes = element.getChildNodes();
        for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
            Node item = childNodes.item(i2);
            if (1 == item.getNodeType()) {
                Element element2 = (Element) item;
                if (element2.getAttribute("class").equals("ocr_carea")) {
                    parseArea(page, element2);
                }
            }
        }
        return page;
    }

    private OCRFile.Area parseArea(OCRFile.Page page, Element element) {
        OCRFile.Area area = new OCRFile.Area();
        String[] split = element.getAttribute("title").trim().split(" ");
        area.bbox = new BoundingBox(Integer.parseInt(split[1]), Integer.parseInt(split[2]), Integer.parseInt(split[3]), Integer.parseInt(split[4]));
        page.areas.add(area);
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (1 == item.getNodeType()) {
                Element element2 = (Element) item;
                if (element2.getAttribute("class").equals("ocr_par")) {
                    parsePart(area, element2);
                }
            }
        }
        return area;
    }

    private OCRFile.Part parsePart(OCRFile.Area area, Element element) {
        OCRFile.Part part = new OCRFile.Part();
        part.language = element.getAttribute("lang");
        String[] split = element.getAttribute("title").trim().split(" ");
        part.bbox = new BoundingBox(Integer.parseInt(split[1]), Integer.parseInt(split[2]), Integer.parseInt(split[3]), Integer.parseInt(split[4]));
        area.parts.add(part);
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (1 == item.getNodeType()) {
                Element element2 = (Element) item;
                if (element2.getAttribute("class").equals("ocr_line")) {
                    parseLine(part, element2);
                }
            }
        }
        return part;
    }

    private OCRFile.Line parseLine(OCRFile.Part part, Element element) {
        OCRFile.Line line = new OCRFile.Line();
        String[] split = element.getAttribute("title").trim().split(";");
        int length = split.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            String trim = split[i].trim();
            if (trim.startsWith("bbox")) {
                String[] split2 = trim.split(" ");
                if (split2.length == 5) {
                    line.bbox = new BoundingBox(Integer.parseInt(split2[1]), Integer.parseInt(split2[2]), Integer.parseInt(split2[3]), Integer.parseInt(split2[4]));
                }
            } else {
                i++;
            }
        }
        part.lines.add(line);
        NodeList childNodes = element.getChildNodes();
        for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
            Node item = childNodes.item(i2);
            if (1 == item.getNodeType()) {
                Element element2 = (Element) item;
                if (element2.getAttribute("class").equals("ocrx_word")) {
                    parseWord(line, element2);
                }
            }
        }
        return line;
    }

    private OCRFile.Word parseWord(OCRFile.Line line, Element element) {
        OCRFile.Word word = new OCRFile.Word(element.getTextContent());
        String[] split = element.getAttribute("title").split(";");
        int length = split.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            String trim = split[i].trim();
            if (trim.startsWith("bbox")) {
                String[] split2 = trim.split(" ");
                word.bbox = new BoundingBox(Integer.parseInt(split2[1]), Integer.parseInt(split2[2]), Integer.parseInt(split2[3]), Integer.parseInt(split2[4]));
                break;
            }
            i++;
        }
        line.words.add(word);
        return word;
    }
}
