/*
 * Decompiled with CFR 0.152.
 */
package org.neoref.spider;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.neoref.spider.MeshErrorHandler;
import org.neoref.spider.NucleoDBBuildHandler;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

public class NucleoDBBuilder
extends Thread {
    static int count;
    static String GBSET;
    int start;
    int end;
    int step;
    int interval;
    String kingdom;

    NucleoDBBuilder(int start, int end, int step, String kingdom, int interval) {
        this.start = start;
        this.end = end;
        this.step = step;
        this.kingdom = kingdom;
        this.interval = interval;
    }

    public void run() {
        try {
            URL esearchURL = new URL("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&term=" + this.kingdom + "&usehistory=y");
            URLConnection conn = esearchURL.openConnection();
            InputStream is = conn.getInputStream();
            InputSource source = new InputSource(is);
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document xmlDoc = db.parse(source);
            count = Integer.parseInt(xmlDoc.getElementsByTagName("Count").item(0).getFirstChild().getNodeValue());
            String webEnv = xmlDoc.getElementsByTagName("WebEnv").item(0).getFirstChild().getNodeValue();
            int query_key = Integer.parseInt(xmlDoc.getElementsByTagName("QueryKey").item(0).getFirstChild().getNodeValue());
            if (this.end < 0) {
                this.end = count - 1;
            } else if (this.end > count) {
                this.end = count - 1;
            }
            XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
            NucleoDBBuildHandler handler = new NucleoDBBuildHandler();
            parser.setFeature("http://xml.org/sax/features/validation", true);
            parser.setContentHandler(handler);
            parser.setErrorHandler(new MeshErrorHandler());
            for (int retstart = this.start; retstart <= this.end; retstart += this.step) {
                int len;
                if (retstart + this.step > this.end && this.step > 1) {
                    this.step = 1;
                }
                NucleoDBBuilder.log("Fetch a list of records from " + retstart);
                URL efetchURL = new URL("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&WebEnv=" + webEnv + "&query_key=" + query_key + "&retstart=" + retstart + "&retmax=" + this.step + "&retmode=xml");
                conn = efetchURL.openConnection();
                is = conn.getInputStream();
                byte[] buf = new byte[1024];
                ByteArrayOutputStream bos = new ByteArrayOutputStream();
                while ((len = is.read(buf)) != -1) {
                    bos.write(buf, 0, len);
                }
                buf = bos.toByteArray();
                len = bos.size();
                while (buf[len - 1] != 62) {
                    --len;
                }
                if (buf[len - 2] != 116) {
                    NucleoDBBuilder.log("Fixing detected xml error");
                    bos.reset();
                    bos.write(buf, 0, len);
                    buf = GBSET.getBytes();
                    bos.write(buf, 0, buf.length);
                }
                ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
                source = new InputSource(bis);
                try {
                    NucleoDBBuilder.log("Parse the xml");
                    parser.parse(source);
                    NucleoDBBuilder.sleep(this.interval);
                }
                catch (Exception e) {
                    NucleoDBBuilder.log("XML Error Message: " + e.getMessage() + ", skip gene...");
                    retstart = retstart + count + 1;
                    NucleoDBBuilder.log("recover automatically from gene: " + retstart);
                    retstart -= this.step;
                    count = 0;
                    NucleoDBBuilder.sleep(this.interval);
                    continue;
                }
                buf = null;
                bos = null;
                bis = null;
            }
            NucleoDBBuilder.log("Done with NucleoDBBuild");
        }
        catch (Exception e) {
            NucleoDBBuilder.log("General Error Message: " + e.getMessage());
        }
    }

    public static void log(String entry) {
        System.out.println(new Date() + ":" + entry);
    }

    public static void main(String[] args) throws Exception {
        String cfgStr;
        String kingdom = "";
        int start = 0;
        int end = 0;
        int step = 0;
        int interval = 0;
        if (args.length < 1) {
            System.out.println("Usage: nucleoDBBuild config-file");
            System.exit(-1);
        }
        BufferedReader cfgIn = new BufferedReader(new FileReader(args[0]));
        while ((cfgStr = cfgIn.readLine()) != null) {
            if (cfgStr.startsWith("#")) continue;
            StringTokenizer st = new StringTokenizer(cfgStr, " ");
            if (st.hasMoreTokens()) {
                start = Integer.parseInt(st.nextToken()) - 1;
            } else {
                System.out.println("bad config file");
                System.exit(-1);
            }
            if (st.hasMoreTokens()) {
                end = Integer.parseInt(st.nextToken()) - 1;
            } else {
                System.out.println("bad config file");
                System.exit(-1);
            }
            if (st.hasMoreTokens()) {
                step = Integer.parseInt(st.nextToken());
            } else {
                System.out.println("bad config file");
                System.exit(-1);
            }
            if (st.hasMoreTokens()) {
                kingdom = st.nextToken();
            } else {
                System.out.println("bad config file");
                System.exit(-1);
            }
            if (st.hasMoreTokens()) {
                interval = Integer.parseInt(st.nextToken()) * 1000;
            } else {
                System.out.println("bad config file");
                System.exit(-1);
            }
            NucleoDBBuilder.log("Build gene database for " + kingdom);
            NucleoDBBuilder ndbd = new NucleoDBBuilder(start, end, step, kingdom, interval);
            ndbd.start();
            ndbd.join();
        }
    }

    static {
        GBSET = "</GBSet>";
    }
}

