ImgScroll/src/main/java/com/rarchives/ripme/ripper/rippers/FuraffinityRipper.java
2017-11-21 12:51:00 -08:00

247 lines
8.7 KiB
Java
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package com.rarchives.ripme.ripper.rippers;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Base64;
import com.rarchives.ripme.utils.Http;
public class FuraffinityRipper extends AbstractHTMLRipper {
private static Map<String, String> cookies=null;
private static final String urlBase = "https://www.furaffinity.net";
// Thread pool for finding direct image links from "image" pages (html)
private DownloadThreadPool furaffinityThreadPool
= new DownloadThreadPool( "furaffinity");
@Override
public DownloadThreadPool getThreadPool() {
return furaffinityThreadPool;
}
public FuraffinityRipper(URL url) throws IOException {
super(url);
}
@Override
public String getDomain() {
return "furaffinity.net";
}
@Override
public String getHost() {
return "furaffinity";
}
@Override
public boolean hasDescriptionSupport() {
return false;
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
private void login() throws IOException {
String user = new String(Base64.decode("cmlwbWU="));
String pass = new String(Base64.decode("cmlwbWVwYXNzd29yZA=="));
Response loginPage = Http.url(urlBase + "/login/")
.referrer(urlBase)
.response();
cookies = loginPage.cookies();
Map<String,String> formData = new HashMap<>();
formData.put("action", "login");
formData.put("retard_protection", "1");
formData.put("name", user);
formData.put("pass", pass);
formData.put("login", "Login to FurAffinity");
Response doLogin = Http.url(urlBase + "/login/?ref=" + url)
.referrer(urlBase + "/login/")
.data(formData)
.method(Method.POST)
.response();
cookies.putAll(doLogin.cookies());
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
Elements nextPageUrl = doc.select("td[align=right] form");
if (nextPageUrl.size() == 0) {
throw new IOException("No more pages");
}
String nextUrl = urlBase + nextPageUrl.first().attr("action");
sleep(500);
Document nextPage = Http.url(nextUrl).get();
Elements hrefs = nextPage.select("div#no-images");
if (hrefs.size() != 0) {
throw new IOException("No more pages");
}
return nextPage;
}
private String getImageFromPost(String url) {
try {
logger.info("found url " + Http.url(url).get().select("meta[property=og:image]").attr("content"));
return Http.url(url).get().select("meta[property=og:image]").attr("content");
} catch (IOException e) {
return "";
}
}
@Override
public List<String> getURLsFromPage(Document page) {
List<String> urls = new ArrayList<>();
Elements urlElements = page.select("figure.t-image > b > u > a");
for (Element e : urlElements) {
urls.add(getImageFromPost(urlBase + e.select("a").first().attr("href")));
}
return urls;
}
@Override
public List<String> getDescriptionsFromPage(Document page) {
List<String> urls = new ArrayList<>();
Elements urlElements = page.select("figure.t-image > b > u > a");
for (Element e : urlElements) {
urls.add(urlBase + e.select("a").first().attr("href"));
logger.debug("Desc2 " + urlBase + e.select("a").first().attr("href"));
}
return urls;
}
@Override
public int descSleepTime() {
return 400;
}
public String getDescription(String page) {
try {
// Fetch the image page
Response resp = Http.url(page)
.referrer(this.url)
.response();
cookies.putAll(resp.cookies());
// Try to find the description
Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
if (els.size() == 0) {
logger.debug("No description at " + page);
throw new IOException("No description found");
}
logger.debug("Description found!");
Document documentz = resp.parse();
Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
// Would break completely if FurAffinity changed site layout.
documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
ele.select("br").append("\\n");
ele.select("p").prepend("\\n\\n");
logger.debug("Returning description at " + page);
String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
} catch (IOException ioe) {
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
return null;
}
}
@Override
public boolean saveText(URL url, String subdirectory, String text, int index) {
//TODO Make this better please?
try {
stopCheck();
} catch (IOException e) {
return false;
}
String newText = "";
String saveAs = "";
File saveFileAs;
saveAs = text.split("\n")[0];
saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1");
for (int i = 1;i < text.split("\n").length; i++) {
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
}
try {
if (!subdirectory.equals("")) {
subdirectory = File.separator + subdirectory;
}
int o = url.toString().lastIndexOf('/')-1;
String test = url.toString().substring(url.toString().lastIndexOf('/',o)+1);
test = test.replace("/",""); // This is probably not the best way to do this.
test = test.replace("\\",""); // CLOSE ENOUGH!
saveFileAs = new File(
workingDir.getCanonicalPath()
+ subdirectory
+ File.separator
+ saveAs
+ ".txt");
// Write the file
FileOutputStream out = (new FileOutputStream(saveFileAs));
out.write(text.getBytes());
out.close();
} catch (IOException e) {
logger.error("[!] Error creating save file path for description '" + url + "':", e);
return false;
}
logger.debug("Downloading " + url + "'s description to " + saveFileAs);
if (!saveFileAs.getParentFile().exists()) {
logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
saveFileAs.getParentFile().mkdirs();
}
return true;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern
.compile("^https?://www\\.furaffinity\\.net/gallery/([-_.0-9a-zA-Z]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Expected furaffinity.net URL format: "
+ "www.furaffinity.net/gallery/username - got " + url
+ " instead");
}
private class FuraffinityDocumentThread extends Thread {
private URL url;
FuraffinityDocumentThread(URL url) {
super();
this.url = url;
}
}
}