/*
 * Decompiled with CFR 0.152.
 */
package org.neoref.spider;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import org.neoref.spider.HTMLParse;
import org.neoref.spider.ISpiderReportable;

public class Spider {
    protected Collection workloadError = new ArrayList(3);
    protected Collection workloadWaiting = new ArrayList(3);
    protected Collection workloadProcessed = new ArrayList(3);
    protected ISpiderReportable report;
    protected boolean cancel = false;

    public Spider(ISpiderReportable report) {
        this.report = report;
    }

    public Collection getWorkloadError() {
        return this.workloadError;
    }

    public Collection getWorkloadWaiting() {
        return this.workloadWaiting;
    }

    public Collection getWorkloadProcessed() {
        return this.workloadProcessed;
    }

    public void clear() {
        this.getWorkloadError().clear();
        this.getWorkloadWaiting().clear();
        this.getWorkloadProcessed().clear();
    }

    public void cancel() {
        this.cancel = true;
    }

    public void addURL(URL url) {
        if (this.getWorkloadWaiting().contains(url)) {
            return;
        }
        if (this.getWorkloadError().contains(url)) {
            return;
        }
        if (this.getWorkloadProcessed().contains(url)) {
            return;
        }
        this.log("Adding to workload: " + url);
        this.getWorkloadWaiting().add(url);
    }

    public void processURL(URL url) {
        try {
            this.log("Processing: " + url);
            URLConnection connection = url.openConnection();
            if (connection.getContentType() != null && !connection.getContentType().toLowerCase().startsWith("text/")) {
                this.getWorkloadWaiting().remove(url);
                this.getWorkloadProcessed().add(url);
                this.log("Not processing because content type is: " + connection.getContentType());
                return;
            }
            InputStream is = connection.getInputStream();
            InputStreamReader r = new InputStreamReader(is);
            HTMLEditorKit.Parser parse = new HTMLParse().getParser();
            parse.parse(r, new Parser(url), true);
        }
        catch (IOException e) {
            this.getWorkloadWaiting().remove(url);
            this.getWorkloadError().add(url);
            this.log("Error: " + url);
            this.report.spiderURLError(url);
            return;
        }
        this.getWorkloadWaiting().remove(url);
        this.getWorkloadProcessed().add(url);
        this.log("Complete: " + url);
    }

    public void begin() {
        this.cancel = false;
        while (!this.getWorkloadWaiting().isEmpty() && !this.cancel) {
            Object[] list = this.getWorkloadWaiting().toArray();
            for (int i = 0; i < list.length && !this.cancel; ++i) {
                this.processURL((URL)list[i]);
            }
        }
    }

    public void log(String entry) {
        System.out.println(new Date() + ":" + entry);
    }

    protected class Parser
    extends HTMLEditorKit.ParserCallback {
        protected URL base;

        public Parser(URL base) {
            this.base = base;
        }

        public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
            String href = (String)a.getAttribute(HTML.Attribute.HREF);
            if (href == null && t == HTML.Tag.FRAME) {
                href = (String)a.getAttribute(HTML.Attribute.SRC);
            }
            if (href == null) {
                return;
            }
            int i = href.indexOf(35);
            if (i != -1) {
                href = href.substring(0, i);
            }
            if (href.toLowerCase().startsWith("mailto:")) {
                Spider.this.report.spiderFoundEMail(href);
                return;
            }
            this.handleLink(this.base, href);
        }

        public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
            this.handleSimpleTag(t, a, pos);
        }

        protected void handleLink(URL base, String str) {
            try {
                URL url = new URL(base, str);
                if (Spider.this.report.spiderFoundURL(base, url)) {
                    Spider.this.addURL(url);
                }
            }
            catch (MalformedURLException e) {
                Spider.this.log("Found malformed URL: " + str);
            }
        }
    }
}

