Merge pull request #220 from Wiiplay123/master

Added FurAffinity Description Ripping
2015-12-04 09:51:26 -08:00 · 2015-12-04 09:51:26 -08:00 · 4433e5a2fe
commit 4433e5a2fe
parent 87353a44ca e1fd37993d
2 changed files with 109 additions and 3 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@ -56,6 +56,9 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
    public String getDescription(String page) throws IOException {
    	throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
    }
    public int descSleepTime() {
        return 0;
    }
    @Override
    public void rip() throws IOException {
        int index = 0;
@ -89,12 +92,14 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
                logger.debug("Fetching description(s) from " + doc.location());
            	List<String> textURLs = getDescriptionsFromPage(doc);
            	if (textURLs.size() > 0) {
                    logger.debug("Found description link(s) from " + doc.location());
            		for (String textURL : textURLs) {
            			if (isStopped()) {
            				break;
            			}
            			textindex += 1;
-            			logger.debug("Getting decription from " + textURL);
+            			logger.debug("Getting description from " + textURL);
                        sleep(descSleepTime());
            			String tempDesc = getDescription(textURL);
            			if (tempDesc != null) {
            			    logger.debug("Got description: " + tempDesc);
@ -125,6 +130,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
        waitForThreads();
    }
    public boolean saveText(URL url, String subdirectory, String text, int index) {
        // Not the best for some cases, like FurAffinity. Overridden there.
        try {
            stopCheck();
        } catch (IOException e) {
@ -141,7 +147,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
            if (!subdirectory.equals("")) { // Not sure about this part
                subdirectory = File.separator + subdirectory;
            }
-            // TODO Get prefix working again, probably requires reworking a lot of stuff!
+            // TODO Get prefix working again, probably requires reworking a lot of stuff! (Might be fixed now)
            saveFileAs = new File(
                    workingDir.getCanonicalPath()
                    + subdirectory
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
@ -1,5 +1,7 @@
 package com.rarchives.ripme.ripper.rippers;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
@ -10,10 +12,13 @@ import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import com.rarchives.ripme.utils.Utils;
 import org.jsoup.Connection.Method;
 import org.jsoup.Connection.Response;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.safety.Whitelist;
 import org.jsoup.select.Elements;
 import com.rarchives.ripme.ripper.AbstractHTMLRipper;
@ -48,7 +53,10 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
    public String getHost() {
        return "furaffinity";
    }
-
+    @Override
    public boolean hasDescriptionSupport() {
        return true;
    }
    @Override
    public Document getFirstPage() throws IOException {
        if (cookies == null || cookies.size() == 0) {
@ -110,7 +118,99 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
        }
        return urls;
    }
    @Override
    public List<String> getDescriptionsFromPage(Document page) {
        List<String> urls = new ArrayList<String>();
        Elements urlElements = page.select("b[id^=sid_]");
        for (Element e : urlElements) {
            urls.add(urlBase + e.select("a").first().attr("href"));
            logger.debug("Desc2 " + urlBase + e.select("a").first().attr("href"));
        }
        return urls;
    }
    @Override
    public int descSleepTime() {
        return 400;
    }
    public String getDescription(String page) {
        try {
            // Fetch the image page
            Response resp = Http.url(page)
                    .referrer(this.url)
                    .cookies(cookies)
                    .response();
            cookies.putAll(resp.cookies());
            // Try to find the description
            Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
            if (els.size() == 0) {
                logger.debug("No description at " + page);
                throw new IOException("No description found");
            }
            logger.debug("Description found!");
            Document documentz = resp.parse();
            Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
            // Would break completely if FurAffinity changed site layout.
            documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
            ele.select("br").append("\\n");
            ele.select("p").prepend("\\n\\n");
            logger.debug("Returning description at " + page);
            String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
            Element title = documentz.select("td[class=\"cat\"][valign=\"top\"] > b").get(0);
            String tempText = title.text();
            return tempText + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
        } catch (IOException ioe) {
            logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
            return null;
        }
    }
    @Override
    public boolean saveText(URL url, String subdirectory, String text, int index) {
       //TODO Make this better please?
       try {
            stopCheck();
        } catch (IOException e) {
            return false;
        }
        String newText = "";
        String saveAs = "";
        File saveFileAs;
        saveAs = text.split("\n")[0];
        for (int i = 1;i < text.split("\n").length; i++) {
             newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
        }
        try {
            if (!subdirectory.equals("")) {
                subdirectory = File.separator + subdirectory;
            }
            int o = url.toString().lastIndexOf('/')-1;
            String test = url.toString().substring(url.toString().lastIndexOf('/',o)+1);
            test = test.replace("/",""); // This is probably not the best way to do this.
            test = test.replace("\\",""); // CLOSE ENOUGH!
            saveFileAs = new File(
                    workingDir.getCanonicalPath()
                            + subdirectory
                            + File.separator
                            + getPrefix(index)
                            + saveAs
                            + " "
                            + test
                            + ".txt");
            // Write the file
            FileOutputStream out = (new FileOutputStream(saveFileAs));
            out.write(text.getBytes());
            out.close();
        } catch (IOException e) {
            logger.error("[!] Error creating save file path for description '" + url + "':", e);
            return false;
        }
        logger.debug("Downloading " + url + "'s description to " + saveFileAs);
        if (!saveFileAs.getParentFile().exists()) {
            logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
            saveFileAs.getParentFile().mkdirs();
        }
        return true;
    }
    @Override
    public void downloadURL(URL url, int index) {
        furaffinityThreadPool.addThread(new FuraffinityDocumentThread(url));