From e1fd37993dec4dce813fa14f686425787b3aef49 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Fri, 29 May 2015 13:26:48 -0500 Subject: [PATCH] Added FurAffinity Description Ripping Also added description sleep time value to AbstractHTMLRipper, to avoid read timed out. --- .../ripme/ripper/AbstractHTMLRipper.java | 10 +- .../ripper/rippers/FuraffinityRipper.java | 102 +++++++++++++++++- 2 files changed, 109 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index c9bb0259..e8621e7b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -56,6 +56,9 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { public String getDescription(String page) throws IOException { throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? } + public int descSleepTime() { + return 0; + } @Override public void rip() throws IOException { int index = 0; @@ -89,12 +92,14 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { logger.debug("Fetching description(s) from " + doc.location()); List textURLs = getDescriptionsFromPage(doc); if (textURLs.size() > 0) { + logger.debug("Found description link(s) from " + doc.location()); for (String textURL : textURLs) { if (isStopped()) { break; } textindex += 1; - logger.debug("Getting decription from " + textURL); + logger.debug("Getting description from " + textURL); + sleep(descSleepTime()); String tempDesc = getDescription(textURL); if (tempDesc != null) { logger.debug("Got description: " + tempDesc); @@ -125,6 +130,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { waitForThreads(); } public boolean saveText(URL url, String subdirectory, String text, int index) { + // Not the best for some cases, like FurAffinity. Overridden there. try { stopCheck(); } catch (IOException e) { @@ -141,7 +147,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { if (!subdirectory.equals("")) { // Not sure about this part subdirectory = File.separator + subdirectory; } - // TODO Get prefix working again, probably requires reworking a lot of stuff! + // TODO Get prefix working again, probably requires reworking a lot of stuff! (Might be fixed now) saveFileAs = new File( workingDir.getCanonicalPath() + subdirectory diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java index 9a44e162..ed63305d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java @@ -1,5 +1,7 @@ package com.rarchives.ripme.ripper.rippers; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -10,10 +12,13 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.utils.Utils; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; +import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.safety.Whitelist; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractHTMLRipper; @@ -48,7 +53,10 @@ public class FuraffinityRipper extends AbstractHTMLRipper { public String getHost() { return "furaffinity"; } - + @Override + public boolean hasDescriptionSupport() { + return true; + } @Override public Document getFirstPage() throws IOException { if (cookies == null || cookies.size() == 0) { @@ -110,7 +118,99 @@ public class FuraffinityRipper extends AbstractHTMLRipper { } return urls; } + @Override + public List getDescriptionsFromPage(Document page) { + List urls = new ArrayList(); + Elements urlElements = page.select("b[id^=sid_]"); + for (Element e : urlElements) { + urls.add(urlBase + e.select("a").first().attr("href")); + logger.debug("Desc2 " + urlBase + e.select("a").first().attr("href")); + } + return urls; + } + @Override + public int descSleepTime() { + return 400; + } + public String getDescription(String page) { + try { + // Fetch the image page + Response resp = Http.url(page) + .referrer(this.url) + .cookies(cookies) + .response(); + cookies.putAll(resp.cookies()); + // Try to find the description + Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]"); + if (els.size() == 0) { + logger.debug("No description at " + page); + throw new IOException("No description found"); + } + logger.debug("Description found!"); + Document documentz = resp.parse(); + Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is. + // Would break completely if FurAffinity changed site layout. + documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); + ele.select("br").append("\\n"); + ele.select("p").prepend("\\n\\n"); + logger.debug("Returning description at " + page); + String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); + Element title = documentz.select("td[class=\"cat\"][valign=\"top\"] > b").get(0); + String tempText = title.text(); + return tempText + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name. + } catch (IOException ioe) { + logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); + return null; + } + } + @Override + public boolean saveText(URL url, String subdirectory, String text, int index) { + //TODO Make this better please? + try { + stopCheck(); + } catch (IOException e) { + return false; + } + String newText = ""; + String saveAs = ""; + File saveFileAs; + saveAs = text.split("\n")[0]; + for (int i = 1;i < text.split("\n").length; i++) { + newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i]; + } + try { + if (!subdirectory.equals("")) { + subdirectory = File.separator + subdirectory; + } + int o = url.toString().lastIndexOf('/')-1; + String test = url.toString().substring(url.toString().lastIndexOf('/',o)+1); + test = test.replace("/",""); // This is probably not the best way to do this. + test = test.replace("\\",""); // CLOSE ENOUGH! + saveFileAs = new File( + workingDir.getCanonicalPath() + + subdirectory + + File.separator + + getPrefix(index) + + saveAs + + " " + + test + + ".txt"); + // Write the file + FileOutputStream out = (new FileOutputStream(saveFileAs)); + out.write(text.getBytes()); + out.close(); + } catch (IOException e) { + logger.error("[!] Error creating save file path for description '" + url + "':", e); + return false; + } + logger.debug("Downloading " + url + "'s description to " + saveFileAs); + if (!saveFileAs.getParentFile().exists()) { + logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent())); + saveFileAs.getParentFile().mkdirs(); + } + return true; + } @Override public void downloadURL(URL url, int index) { furaffinityThreadPool.addThread(new FuraffinityDocumentThread(url));