Moving from MultiPage ripper to HTML ripper, added JSON ripper
This commit is contained in:
parent
b7397cd31e
commit
c166f93d57
@ -10,9 +10,9 @@ import org.jsoup.nodes.Document;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public abstract class AbstractMultiPageRipper extends AlbumRipper {
|
||||
public abstract class AbstractHTMLRipper extends AlbumRipper {
|
||||
|
||||
public AbstractMultiPageRipper(URL url) throws IOException {
|
||||
public AbstractHTMLRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@ -23,6 +23,9 @@ public abstract class AbstractMultiPageRipper extends AlbumRipper {
|
||||
public abstract Document getNextPage(Document doc) throws IOException;
|
||||
public abstract List<String> getURLsFromPage(Document page);
|
||||
public abstract void downloadURL(URL url, int index);
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean keepSortOrder() {
|
||||
return true;
|
||||
@ -54,19 +57,29 @@ public abstract class AbstractMultiPageRipper extends AlbumRipper {
|
||||
|
||||
for (String imageURL : imageURLs) {
|
||||
if (isStopped()) {
|
||||
logger.info("Interrupted");
|
||||
break;
|
||||
}
|
||||
index += 1;
|
||||
downloadURL(new URL(imageURL), index);
|
||||
}
|
||||
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
|
||||
doc = getNextPage(doc);
|
||||
} catch (IOException e) {
|
||||
logger.info("Can't get next page: " + e.getMessage());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If they're using a thread pool, wait for it.
|
||||
if (getThreadPool() != null) {
|
||||
getThreadPool().waitForThreads();
|
||||
}
|
||||
waitForThreads();
|
||||
}
|
||||
|
@ -0,0 +1,93 @@
|
||||
package com.rarchives.ripme.ripper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public abstract class AbstractJSONRipper extends AlbumRipper {
|
||||
|
||||
public AbstractJSONRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
public abstract String getDomain();
|
||||
public abstract String getHost();
|
||||
|
||||
public abstract JSONObject getFirstPage() throws IOException;
|
||||
public abstract JSONObject getNextPage(JSONObject json) throws IOException;
|
||||
public abstract List<String> getURLsFromJSON(JSONObject json);
|
||||
public abstract void downloadURL(URL url, int index);
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean keepSortOrder() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(getDomain());
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
int index = 0;
|
||||
logger.info("Retrieving " + this.url);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
||||
JSONObject json = getFirstPage();
|
||||
|
||||
while (json != null) {
|
||||
List<String> imageURLs = getURLsFromJSON(json);
|
||||
|
||||
if (imageURLs.size() == 0) {
|
||||
throw new IOException("No images found at " + this.url);
|
||||
}
|
||||
|
||||
for (String imageURL : imageURLs) {
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
index += 1;
|
||||
downloadURL(new URL(imageURL), index);
|
||||
}
|
||||
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
|
||||
json = getNextPage(json);
|
||||
} catch (IOException e) {
|
||||
logger.info("Can't get next page: " + e.getMessage());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If they're using a thread pool, wait for it.
|
||||
if (getThreadPool() != null) {
|
||||
getThreadPool().waitForThreads();
|
||||
}
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
public String getPrefix(int index) {
|
||||
String prefix = "";
|
||||
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefix = String.format("%03d_", index);
|
||||
}
|
||||
return prefix;
|
||||
}
|
||||
}
|
@ -24,7 +24,7 @@ public abstract class AbstractSinglePageRipper extends AlbumRipper {
|
||||
public abstract void downloadURL(URL url, int index);
|
||||
|
||||
public boolean keepSortOrder() {
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -19,12 +19,12 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractMultiPageRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Base64;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class DeviantartRipper extends AbstractMultiPageRipper {
|
||||
public class DeviantartRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final int SLEEP_TIME = 2000;
|
||||
|
||||
|
@ -12,10 +12,10 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractMultiPageRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class DrawcrowdRipper extends AbstractMultiPageRipper {
|
||||
public class DrawcrowdRipper extends AbstractHTMLRipper {
|
||||
|
||||
public DrawcrowdRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
|
@ -4,7 +4,9 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@ -13,22 +15,26 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class EHentaiRipper extends AlbumRipper {
|
||||
public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
// All sleep times are in milliseconds
|
||||
private static final int PAGE_SLEEP_TIME = 3 * 1000;
|
||||
private static final int IMAGE_SLEEP_TIME = 1 * 1000;
|
||||
private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000;
|
||||
private static final int PAGE_SLEEP_TIME = 3000;
|
||||
private static final int IMAGE_SLEEP_TIME = 1500;
|
||||
private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000;
|
||||
|
||||
private static final String DOMAIN = "g.e-hentai.org", HOST = "e-hentai";
|
||||
private String lastURL = null;
|
||||
|
||||
// Thread pool for finding direct image links from "image" pages (html)
|
||||
private DownloadThreadPool ehentaiThreadPool = new DownloadThreadPool("ehentai");
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return ehentaiThreadPool;
|
||||
}
|
||||
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
@ -45,25 +51,22 @@ public class EHentaiRipper extends AlbumRipper {
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
return "e-hentai";
|
||||
}
|
||||
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return url;
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "g.e-hentai.org";
|
||||
}
|
||||
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
if (albumDoc == null) {
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
|
||||
logger.info("Retrieving " + url);
|
||||
albumDoc = Http.url(url)
|
||||
.cookies(cookies)
|
||||
.get();
|
||||
albumDoc = getPageWithRetries(url);
|
||||
}
|
||||
Elements elems = albumDoc.select("#gn");
|
||||
return HOST + "_" + elems.get(0).text();
|
||||
return getHost() + "_" + elems.first().text();
|
||||
} catch (Exception e) {
|
||||
// Fall back to default album naming convention
|
||||
logger.warn("Failed to get album title from " + url, e);
|
||||
@ -88,94 +91,96 @@ public class EHentaiRipper extends AlbumRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
int index = 0, retries = 3;
|
||||
String nextUrl = this.url.toExternalForm();
|
||||
/**
|
||||
* Attempts to get page, checks for IP ban, waits.
|
||||
* @param url
|
||||
* @return Page document
|
||||
* @throws IOException If page loading errors, or if retries are exhausted
|
||||
*/
|
||||
private Document getPageWithRetries(URL url) throws IOException {
|
||||
Document doc;
|
||||
int retries = 3;
|
||||
while (true) {
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
if (albumDoc == null) {
|
||||
logger.info(" Retrieving album page " + nextUrl);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
|
||||
albumDoc = Http.url(nextUrl)
|
||||
.referrer(this.url)
|
||||
.cookies(cookies)
|
||||
.get();
|
||||
}
|
||||
// Check for rate limiting
|
||||
if (albumDoc.toString().contains("IP address will be automatically banned")) {
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
||||
logger.info("Retrieving " + url);
|
||||
doc = Http.url(url)
|
||||
.referrer(this.url)
|
||||
.cookies(cookies)
|
||||
.get();
|
||||
if (doc.toString().contains("IP address will be automatically banned")) {
|
||||
if (retries == 0) {
|
||||
logger.error("Hit rate limit and maximum number of retries, giving up");
|
||||
break;
|
||||
throw new IOException("Hit rate limit and maximum number of retries, giving up");
|
||||
}
|
||||
logger.warn("Hit rate limit while loading " + nextUrl + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining");
|
||||
logger.warn("Hit rate limit while loading " + url + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining");
|
||||
retries--;
|
||||
try {
|
||||
Thread.sleep(IP_BLOCK_SLEEP_TIME);
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("Interrupted while waiting for rate limit to subside", e);
|
||||
break;
|
||||
}
|
||||
albumDoc = null;
|
||||
continue;
|
||||
}
|
||||
// Find thumbnails
|
||||
Elements thumbs = albumDoc.select("#gdt > .gdtm a");
|
||||
if (thumbs.size() == 0) {
|
||||
logger.info("albumDoc: " + albumDoc);
|
||||
logger.info("No images found at " + nextUrl);
|
||||
break;
|
||||
}
|
||||
// Iterate over images on page
|
||||
for (Element thumb : thumbs) {
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
EHentaiImageThread t = new EHentaiImageThread(new URL(thumb.attr("href")), index, this.workingDir);
|
||||
ehentaiThreadPool.addThread(t);
|
||||
try {
|
||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||
} catch (InterruptedException e) {
|
||||
logger.warn("Interrupted while waiting to load next image", e);
|
||||
throw new IOException("Interrupted while waiting for rate limit to subside");
|
||||
}
|
||||
}
|
||||
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
// Find next page
|
||||
Elements hrefs = albumDoc.select(".ptt a");
|
||||
if (hrefs.size() == 0) {
|
||||
logger.info("No navigation links found at " + nextUrl);
|
||||
break;
|
||||
}
|
||||
// Ensure next page is different from the current page
|
||||
String lastUrl = nextUrl;
|
||||
nextUrl = hrefs.last().attr("href");
|
||||
if (lastUrl.equals(nextUrl)) {
|
||||
break; // We're on the last page
|
||||
}
|
||||
|
||||
// Reset albumDoc so we fetch the page next time
|
||||
albumDoc = null;
|
||||
|
||||
// Sleep before loading next page
|
||||
try {
|
||||
Thread.sleep(PAGE_SLEEP_TIME);
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("Interrupted while waiting to load next page", e);
|
||||
break;
|
||||
else {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(DOMAIN);
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = getPageWithRetries(this.url);
|
||||
}
|
||||
this.lastURL = this.url.toExternalForm();
|
||||
return albumDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Check if we've stopped
|
||||
if (isStopped()) {
|
||||
throw new IOException("Ripping interrupted");
|
||||
}
|
||||
// Find next page
|
||||
Elements hrefs = doc.select(".ptt a");
|
||||
if (hrefs.size() == 0) {
|
||||
logger.info("doc: " + doc.html());
|
||||
throw new IOException("No navigation links found");
|
||||
}
|
||||
// Ensure next page is different from the current page
|
||||
String nextURL = hrefs.last().attr("href");
|
||||
if (nextURL.equals(this.lastURL)) {
|
||||
logger.info("lastURL = nextURL : " + nextURL);
|
||||
throw new IOException("Reached last page of results");
|
||||
}
|
||||
// Sleep before loading next page
|
||||
sleep(PAGE_SLEEP_TIME);
|
||||
// Load next page
|
||||
Document nextPage = getPageWithRetries(new URL(nextURL));
|
||||
this.lastURL = nextURL;
|
||||
return nextPage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<String>();
|
||||
Elements thumbs = page.select("#gdt > .gdtm a");
|
||||
// Iterate over images on page
|
||||
for (Element thumb : thumbs) {
|
||||
imageURLs.add(thumb.attr("href"));
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir);
|
||||
ehentaiThreadPool.addThread(t);
|
||||
try {
|
||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
logger.warn("Interrupted while waiting to load next image", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -187,7 +192,6 @@ public class EHentaiRipper extends AlbumRipper {
|
||||
private URL url;
|
||||
private int index;
|
||||
private File workingDir;
|
||||
private int retries = 3;
|
||||
|
||||
public EHentaiImageThread(URL url, int index, File workingDir) {
|
||||
super();
|
||||
@ -203,27 +207,7 @@ public class EHentaiRipper extends AlbumRipper {
|
||||
|
||||
private void fetchImage() {
|
||||
try {
|
||||
Document doc = Http.url(this.url)
|
||||
.referrer(this.url)
|
||||
.cookies(cookies)
|
||||
.get();
|
||||
// Check for rate limit
|
||||
if (doc.toString().contains("IP address will be automatically banned")) {
|
||||
if (this.retries == 0) {
|
||||
logger.error("Rate limited & ran out of retries, skipping image at " + this.url);
|
||||
return;
|
||||
}
|
||||
logger.warn("Hit rate limit. Sleeping for " + IP_BLOCK_SLEEP_TIME + "ms");
|
||||
try {
|
||||
Thread.sleep(IP_BLOCK_SLEEP_TIME);
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("Interrupted while waiting for rate limit to subside", e);
|
||||
return;
|
||||
}
|
||||
this.retries--;
|
||||
fetchImage(); // Re-attempt to download the image
|
||||
return;
|
||||
}
|
||||
Document doc = getPageWithRetries(this.url);
|
||||
|
||||
// Find image
|
||||
Elements images = doc.select(".sni > a > img");
|
||||
|
@ -1,168 +1,41 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractSinglePageRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class EightmusesRipper extends AlbumRipper {
|
||||
|
||||
private static final String DOMAIN = "8muses.com",
|
||||
HOST = "8muses";
|
||||
public class EightmusesRipper extends AbstractSinglePageRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private Map<String,String> cookies = new HashMap<String,String>();
|
||||
|
||||
public EightmusesRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(DOMAIN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
Element titleElement = albumDoc.select("meta[name=description]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return HOST + "_" + title.trim();
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
logger.info("Unable to find title at " + url);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
ripAlbum(this.url.toExternalForm(), this.workingDir);
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
private void ripAlbum(String url, File subdir) throws IOException {
|
||||
logger.info(" Retrieving " + url);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, url);
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
|
||||
int index = 0; // Both album index and image index
|
||||
if (albumDoc.select(".preview > span").size() > 0) {
|
||||
// Page contains subalbums (not images)
|
||||
for (Element subalbum : albumDoc.select("a.preview")) {
|
||||
ripSubalbumFromPreview(subalbum, subdir, ++index);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Page contains images
|
||||
for (Element thumb : albumDoc.select("img")) {
|
||||
downloadImage(thumb, subdir, ++index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param subalbum Anchor element of a subalbum
|
||||
* @throws IOException
|
||||
*/
|
||||
private void ripSubalbumFromPreview(Element subalbum, File subdir, int index) throws IOException {
|
||||
// Find + sanitize URL from Element
|
||||
String subUrl = subalbum.attr("href");
|
||||
subUrl = subUrl.replaceAll("\\.\\./", "");
|
||||
if (subUrl.startsWith("//")) {
|
||||
subUrl = "http:";
|
||||
}
|
||||
else if (!subUrl.startsWith("http://")) {
|
||||
subUrl = "http://www.8muses.com/" + subUrl;
|
||||
}
|
||||
// Prepend image index if enabled
|
||||
// Get album title
|
||||
String subTitle = subalbum.attr("alt");
|
||||
if (subTitle.equals("")) {
|
||||
subTitle = getGID(new URL(subUrl));
|
||||
}
|
||||
subTitle = Utils.filesystemSafe(subTitle);
|
||||
// Create path to subdirectory
|
||||
File subDir = new File(subdir.getAbsolutePath() + File.separator + subTitle);
|
||||
if (!subDir.exists()) {
|
||||
subDir.mkdirs();
|
||||
}
|
||||
albumDoc = null;
|
||||
ripAlbum(subUrl, subDir);
|
||||
try {
|
||||
Thread.sleep(2000);
|
||||
} catch (InterruptedException e) {
|
||||
logger.warn("Interrupted whiel waiting to load next album");
|
||||
}
|
||||
}
|
||||
|
||||
private void downloadImage(Element thumb, File subdir, int index) {
|
||||
// Find thumbnail image source
|
||||
String image = null;
|
||||
if (thumb.hasAttr("data-cfsrc")) {
|
||||
image = thumb.attr("data-cfsrc");
|
||||
}
|
||||
else if (thumb.hasAttr("src")) {
|
||||
image = thumb.attr("src");
|
||||
}
|
||||
else {
|
||||
logger.warn("Thumb does not havedata-cfsrc or src: " + thumb);
|
||||
return;
|
||||
}
|
||||
// Remove relative directory path naming
|
||||
image = image.replaceAll("\\.\\./", "");
|
||||
if (image.startsWith("//")) {
|
||||
image = "http:" + image;
|
||||
}
|
||||
// Convert from thumb URL to full-size
|
||||
if (image.contains("-cu_")) {
|
||||
image = image.replaceAll("-cu_[^.]+", "-me");
|
||||
}
|
||||
// Set download path
|
||||
try {
|
||||
URL imageURL = new URL(image);
|
||||
String saveAs = subdir.getAbsolutePath() + File.separator;
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
// Append image index
|
||||
saveAs += String.format("%03d_", index);
|
||||
}
|
||||
// Append image title
|
||||
saveAs += Utils.filesystemSafe(thumb.attr("title"));
|
||||
// Append extension
|
||||
saveAs += image.substring(image.lastIndexOf('.'));
|
||||
File saveFile = new File(saveAs);
|
||||
// Download
|
||||
addURLToDownload(imageURL, saveFile, thumb.baseUri(), null);
|
||||
} catch (IOException e) {
|
||||
logger.error("Failed to download image at " + image, e);
|
||||
sendUpdate(STATUS.DOWNLOAD_ERRORED, "Failed to download image at " + image);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
return "8muses";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "8muses.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -175,4 +48,100 @@ public class EightmusesRipper extends AlbumRipper {
|
||||
return m.group(m.groupCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[name=description]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + title.trim();
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
logger.info("Unable to find title at " + url);
|
||||
}
|
||||
return super.getAlbumTitle(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
albumDoc = resp.parse();
|
||||
}
|
||||
return albumDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<String>();
|
||||
if (page.select(".preview > span").size() > 0) {
|
||||
// Page contains subalbums (not images)
|
||||
Elements albumElements = page.select("a.preview");
|
||||
List<Element> albumsList = albumElements.subList(0, albumElements.size());
|
||||
Collections.reverse(albumsList);
|
||||
// Iterate over elements in reverse order
|
||||
for (Element subalbum : albumsList) {
|
||||
String subUrl = subalbum.attr("href");
|
||||
subUrl = subUrl.replaceAll("\\.\\./", "");
|
||||
if (subUrl.startsWith("//")) {
|
||||
subUrl = "http:";
|
||||
}
|
||||
else if (!subUrl.startsWith("http://")) {
|
||||
subUrl = "http://www.8muses.com/" + subUrl;
|
||||
}
|
||||
try {
|
||||
logger.info("Retrieving " + subUrl);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
|
||||
Document subPage = Http.url(subUrl).get();
|
||||
// Get all images in subalbum, add to list.
|
||||
List<String> subalbumImages = getURLsFromPage(subPage);
|
||||
logger.info("Found " + subalbumImages.size() + " images in subalbum");
|
||||
imageURLs.addAll(subalbumImages);
|
||||
} catch (IOException e) {
|
||||
logger.warn("Error while loading subalbum " + subUrl, e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Page contains images
|
||||
for (Element thumb : page.select("img")) {
|
||||
// Find thumbnail image source
|
||||
String image = null;
|
||||
if (thumb.hasAttr("data-cfsrc")) {
|
||||
image = thumb.attr("data-cfsrc");
|
||||
}
|
||||
else if (thumb.hasAttr("src")) {
|
||||
image = thumb.attr("src");
|
||||
}
|
||||
else {
|
||||
logger.warn("Thumb does not have data-cfsrc or src: " + thumb);
|
||||
continue;
|
||||
}
|
||||
// Remove relative directory path naming
|
||||
image = image.replaceAll("\\.\\./", "");
|
||||
if (image.startsWith("//")) {
|
||||
image = "http:" + image;
|
||||
}
|
||||
// Convert from thumb URL to full-size
|
||||
if (image.contains("-cu_")) {
|
||||
image = image.replaceAll("-cu_[^.]+", "-me");
|
||||
}
|
||||
imageURLs.add(image);
|
||||
}
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPrefix(int index) {
|
||||
return String.format("%03d_", index);
|
||||
}
|
||||
}
|
||||
|
@ -3,82 +3,33 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class FapprovedRipper extends AlbumRipper {
|
||||
public class FapprovedRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String DOMAIN = "fapproved.com",
|
||||
HOST = "fapproved";
|
||||
private int pageIndex = 1;
|
||||
private String username = null;
|
||||
|
||||
public FapprovedRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return (url.getHost().endsWith(DOMAIN));
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://fapproved\\.com/users/([a-zA-Z0-9\\-_]{1,}).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return new URL("http://fapproved.com/users/" + m.group(1));
|
||||
}
|
||||
throw new MalformedURLException("Expected username in URL (fapproved.com/users/username and not " + url);
|
||||
}
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
int index = 0, page = 0;
|
||||
String url, user = getGID(this.url);
|
||||
boolean hasNextPage = true;
|
||||
while (hasNextPage) {
|
||||
page++;
|
||||
url = "http://fapproved.com/users/" + user + "/images?page=" + page;
|
||||
this.sendUpdate(STATUS.LOADING_RESOURCE, url);
|
||||
logger.info(" Retrieving " + url);
|
||||
Document doc = Http.url(url)
|
||||
.ignoreContentType()
|
||||
.get();
|
||||
for (Element image : doc.select("div.actual-image img")) {
|
||||
String imageUrl = image.attr("src");
|
||||
if (imageUrl.startsWith("//")) {
|
||||
imageUrl = "http:" + imageUrl;
|
||||
}
|
||||
index++;
|
||||
String prefix = "";
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefix = String.format("%03d_", index);
|
||||
}
|
||||
addURLToDownload(new URL(imageUrl), prefix);
|
||||
}
|
||||
if ( (doc.select("div.pagination li.next.disabled").size() != 0)
|
||||
|| (doc.select("div.pagination").size() == 0) ) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(3000);
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("[!] Interrupted while waiting to load next album:", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
return "fapproved";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "fapproved.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -86,9 +37,62 @@ public class FapprovedRipper extends AlbumRipper {
|
||||
Pattern p = Pattern.compile("^https?://[w.]*fapproved.com/users/([a-zA-Z0-9\\-_]{3,}).*$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
username = m.group(1);
|
||||
return username;
|
||||
}
|
||||
throw new MalformedURLException("Fapproved user not found in " + url + ", expected http://fapproved.com/users/username/images");
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return new URL("http://fapproved.com/users/" + getGID(url));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
pageIndex = 1;
|
||||
String pageURL = getPageURL(pageIndex);
|
||||
return Http.url(pageURL)
|
||||
.ignoreContentType()
|
||||
.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
if ( (doc.select("div.pagination li.next.disabled").size() != 0)
|
||||
|| (doc.select("div.pagination").size() == 0) ) {
|
||||
throw new IOException("No more pages found");
|
||||
}
|
||||
sleep(1000);
|
||||
pageIndex++;
|
||||
String pageURL = getPageURL(pageIndex);
|
||||
return Http.url(pageURL)
|
||||
.ignoreContentType()
|
||||
.get();
|
||||
}
|
||||
|
||||
private String getPageURL(int index) throws IOException {
|
||||
if (username == null) {
|
||||
username = getGID(this.url);
|
||||
}
|
||||
return "http://fapproved.com/users/" + username + "/images?page=" + pageIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<String>();
|
||||
for (Element image : page.select("div.actual-image img")) {
|
||||
String imageURL = image.attr("src");
|
||||
if (imageURL.startsWith("//")) {
|
||||
imageURL = "http:" + imageURL;
|
||||
}
|
||||
imageURLs.add(imageURL);
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
||||
|
@ -3,8 +3,10 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
@ -17,19 +19,22 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.utils.Base64;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class FlickrRipper extends AlbumRipper {
|
||||
public class FlickrRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String DOMAIN = "flickr.com",
|
||||
HOST = "flickr";
|
||||
|
||||
private DownloadThreadPool flickrThreadPool;
|
||||
private int page = 1;
|
||||
private Set<String> attempted = new HashSet<String>();
|
||||
private Document albumDoc = null;
|
||||
private DownloadThreadPool flickrThreadPool;
|
||||
@Override
|
||||
public DownloadThreadPool getThreadPool() {
|
||||
return flickrThreadPool;
|
||||
}
|
||||
|
||||
public FlickrRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@ -38,7 +43,11 @@ public class FlickrRipper extends AlbumRipper {
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
return "flickr";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "flickr.com";
|
||||
}
|
||||
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
@ -61,15 +70,13 @@ public class FlickrRipper extends AlbumRipper {
|
||||
}
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
Document doc = getFirstPage();
|
||||
String user = url.toExternalForm();
|
||||
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
|
||||
user = user.substring(0, user.indexOf("/"));
|
||||
String title = albumDoc.select("meta[name=description]").get(0).attr("content");
|
||||
String title = doc.select("meta[name=description]").get(0).attr("content");
|
||||
if (!title.equals("")) {
|
||||
return HOST + "_" + user + "_" + title;
|
||||
return getHost() + "_" + user + "_" + title;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Fall back to default album naming convention
|
||||
@ -114,79 +121,79 @@ public class FlickrRipper extends AlbumRipper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
//Map<String,String> cookies = signinToFlickr();
|
||||
Set<String> attempted = new HashSet<String>();
|
||||
int index = 0, page = 1;
|
||||
String nextURL = this.url.toExternalForm();
|
||||
while (true) {
|
||||
if (isStopped()) {
|
||||
break;
|
||||
}
|
||||
logger.info(" Retrieving " + nextURL);
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(nextURL).get();
|
||||
}
|
||||
for (Element thumb : albumDoc.select("a[data-track=photo-click]")) {
|
||||
String imageTitle = null;
|
||||
if (thumb.hasAttr("title")) {
|
||||
imageTitle = thumb.attr("title");
|
||||
}
|
||||
String imagePage = thumb.attr("href");
|
||||
if (imagePage.startsWith("/")) {
|
||||
imagePage = "http://www.flickr.com" + imagePage;
|
||||
}
|
||||
if (imagePage.contains("/in/")) {
|
||||
imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1);
|
||||
}
|
||||
if (!imagePage.endsWith("/")) {
|
||||
imagePage += "/";
|
||||
}
|
||||
imagePage += "sizes/o/";
|
||||
|
||||
// Check for duplicates
|
||||
if (attempted.contains(imagePage)) {
|
||||
continue;
|
||||
}
|
||||
attempted.add(imagePage);
|
||||
|
||||
index += 1;
|
||||
// Add image page to threadpool to grab the image & download it
|
||||
FlickrImageThread mit = new FlickrImageThread(new URL(imagePage), imageTitle, index);
|
||||
flickrThreadPool.addThread(mit);
|
||||
}
|
||||
// Find how many pages there are
|
||||
int lastPage = 0;
|
||||
for (Element apage : albumDoc.select("a[data-track^=page-]")) {
|
||||
String lastPageStr = apage.attr("data-track").replace("page-", "");
|
||||
lastPage = Integer.parseInt(lastPageStr);
|
||||
}
|
||||
// If we're at the last page, stop.
|
||||
if (page >= lastPage) {
|
||||
break;
|
||||
}
|
||||
// Load the next page
|
||||
page++;
|
||||
albumDoc = null;
|
||||
nextURL = this.url.toExternalForm();
|
||||
if (!nextURL.endsWith("/")) {
|
||||
nextURL += "/";
|
||||
}
|
||||
nextURL += "page" + page + "/";
|
||||
// Wait a bit
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("Interrupted while waiting to load next page " + nextURL, e);
|
||||
break;
|
||||
}
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
flickrThreadPool.waitForThreads();
|
||||
waitForThreads();
|
||||
return albumDoc;
|
||||
}
|
||||
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(DOMAIN);
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
// Find how many pages there are
|
||||
int lastPage = 0;
|
||||
for (Element apage : doc.select("a[data-track^=page-]")) {
|
||||
String lastPageStr = apage.attr("data-track").replace("page-", "");
|
||||
lastPage = Integer.parseInt(lastPageStr);
|
||||
}
|
||||
// If we're at the last page, stop.
|
||||
if (page >= lastPage) {
|
||||
throw new IOException("No more pages");
|
||||
}
|
||||
// Load the next page
|
||||
page++;
|
||||
albumDoc = null;
|
||||
String nextURL = this.url.toExternalForm();
|
||||
if (!nextURL.endsWith("/")) {
|
||||
nextURL += "/";
|
||||
}
|
||||
nextURL += "page" + page + "/";
|
||||
// Wait a bit
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
throw new IOException("Interrupted while waiting to load next page " + nextURL);
|
||||
}
|
||||
return Http.url(nextURL).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document page) {
|
||||
List<String> imageURLs = new ArrayList<String>();
|
||||
for (Element thumb : page.select("a[data-track=photo-click]")) {
|
||||
/* TODO find a way to persist the image title
|
||||
String imageTitle = null;
|
||||
if (thumb.hasAttr("title")) {
|
||||
imageTitle = thumb.attr("title");
|
||||
}
|
||||
*/
|
||||
String imagePage = thumb.attr("href");
|
||||
if (imagePage.startsWith("/")) {
|
||||
imagePage = "http://www.flickr.com" + imagePage;
|
||||
}
|
||||
if (imagePage.contains("/in/")) {
|
||||
imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1);
|
||||
}
|
||||
if (!imagePage.endsWith("/")) {
|
||||
imagePage += "/";
|
||||
}
|
||||
imagePage += "sizes/o/";
|
||||
|
||||
// Check for duplicates
|
||||
if (attempted.contains(imagePage)) {
|
||||
continue;
|
||||
}
|
||||
attempted.add(imagePage);
|
||||
imageURLs.add(imagePage);
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
// Add image page to threadpool to grab the image & download it
|
||||
FlickrImageThread mit = new FlickrImageThread(url, index);
|
||||
flickrThreadPool.addThread(mit);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -224,13 +231,11 @@ public class FlickrRipper extends AlbumRipper {
|
||||
*/
|
||||
private class FlickrImageThread extends Thread {
|
||||
private URL url;
|
||||
private String title;
|
||||
private int index;
|
||||
|
||||
public FlickrImageThread(URL url, String title, int index) {
|
||||
public FlickrImageThread(URL url, int index) {
|
||||
super();
|
||||
this.url = url;
|
||||
this.title = title;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
@ -248,9 +253,8 @@ public class FlickrRipper extends AlbumRipper {
|
||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||
prefix = String.format("%03d_", index);
|
||||
}
|
||||
prefix += Utils.filesystemSafe(title);
|
||||
synchronized (flickrThreadPool) {
|
||||
addURLToDownload(new URL(fullsizeImages.get(0).attr("src")), prefix);
|
||||
addURLToDownload(new URL(fullsizeImages.first().attr("src")), prefix);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
|
@ -3,7 +3,9 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@ -12,99 +14,27 @@ import org.jsoup.Connection.Method;
|
||||
import org.jsoup.Connection.Response;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class GifyoRipper extends AlbumRipper {
|
||||
public class GifyoRipper extends AbstractHTMLRipper {
|
||||
|
||||
private static final String DOMAIN = "gifyo.com",
|
||||
HOST = "gifyo";
|
||||
private int page = 0;
|
||||
private Map<String,String> cookies = new HashMap<String,String>();
|
||||
|
||||
public GifyoRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return (url.getHost().endsWith(DOMAIN));
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("^https?://gifyo\\.com/([a-zA-Z0-9\\-_]+)/?$");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return new URL("http://gifyo.com/" + m.group(1) + "/");
|
||||
}
|
||||
throw new MalformedURLException("Expected username in URL (gifyo.com/username/ and not " + url);
|
||||
}
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
int page = 0;
|
||||
Map<String,String> cookies = new HashMap<String,String>();
|
||||
while (true) {
|
||||
this.sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm() + " (page #" + page + ")");
|
||||
logger.info(" Retrieving " + this.url + "(page #" + page + ")");
|
||||
Response resp = null;
|
||||
if (page == 0) {
|
||||
resp = Http.url(this.url)
|
||||
.ignoreContentType()
|
||||
.response();
|
||||
cookies = resp.cookies();
|
||||
}
|
||||
else {
|
||||
Map<String,String> postData = new HashMap<String,String>();
|
||||
postData.put("cmd", "refreshData");
|
||||
postData.put("view", "gif");
|
||||
postData.put("layout", "grid");
|
||||
postData.put("page", Integer.toString(page));
|
||||
resp = Http.url(this.url)
|
||||
.ignoreContentType()
|
||||
.data(postData)
|
||||
.cookies(cookies)
|
||||
.method(Method.POST)
|
||||
.response();
|
||||
cookies.putAll(resp.cookies());
|
||||
}
|
||||
Document doc = resp.parse();
|
||||
Elements images = doc.select("div.gif img");
|
||||
logger.info("Found " + images.size() + " images");
|
||||
for (Element image : images) {
|
||||
String imageUrl = image.attr("src");
|
||||
if (imageUrl.startsWith("//")) {
|
||||
imageUrl = "http:" + imageUrl;
|
||||
}
|
||||
imageUrl = imageUrl.replace("/medium/", "/large/");
|
||||
imageUrl = imageUrl.replace("_s.gif", ".gif");
|
||||
addURLToDownload(new URL(imageUrl));
|
||||
}
|
||||
if (images.size() == 0) {
|
||||
if (doc.html().contains("profile is private")) {
|
||||
sendUpdate(STATUS.RIP_ERRORED, "User has private profile");
|
||||
throw new IOException("User has private profile");
|
||||
}
|
||||
else {
|
||||
logger.info("Page " + page + " has 0 images");
|
||||
}
|
||||
break;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(3000);
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("[!] Interrupted while waiting to load next album:", e);
|
||||
break;
|
||||
}
|
||||
page++;
|
||||
}
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
return "gifyo";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "gifyo.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -117,4 +47,67 @@ public class GifyoRipper extends AlbumRipper {
|
||||
throw new MalformedURLException("Gifyo user not found in " + url + ", expected http://gifyo.com/username");
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return new URL("http://gifyo.com/" + getGID(url) + "/");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
Response resp = Http.url(this.url)
|
||||
.ignoreContentType()
|
||||
.response();
|
||||
cookies = resp.cookies();
|
||||
|
||||
Document doc = resp.parse();
|
||||
if (doc.html().contains("profile is private")) {
|
||||
sendUpdate(STATUS.RIP_ERRORED, "User has private profile");
|
||||
throw new IOException("User has private profile");
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
page++;
|
||||
Map<String,String> postData = new HashMap<String,String>();
|
||||
postData.put("cmd", "refreshData");
|
||||
postData.put("view", "gif");
|
||||
postData.put("layout", "grid");
|
||||
postData.put("page", Integer.toString(page));
|
||||
Response resp = Http.url(this.url)
|
||||
.ignoreContentType()
|
||||
.data(postData)
|
||||
.cookies(cookies)
|
||||
.method(Method.POST)
|
||||
.response();
|
||||
cookies.putAll(resp.cookies());
|
||||
Document nextDoc = resp.parse();
|
||||
if (nextDoc.select("div.gif img").size() == 0) {
|
||||
throw new IOException("No more images found");
|
||||
}
|
||||
sleep(2000);
|
||||
return nextDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<String>();
|
||||
for (Element image : doc.select("div.gif img")) {
|
||||
String imageUrl = image.attr("src");
|
||||
if (imageUrl.startsWith("//")) {
|
||||
imageUrl = "http:" + imageUrl;
|
||||
}
|
||||
imageUrl = imageUrl.replace("/medium/", "/large/");
|
||||
imageUrl = imageUrl.replace("_s.gif", ".gif");
|
||||
imageURLs.add(imageUrl);
|
||||
}
|
||||
logger.info("Found " + imageURLs.size() + " images");
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url);
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -10,16 +12,14 @@ import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractSinglePageRipper;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class GirlsOfDesireRipper extends AlbumRipper {
|
||||
public class GirlsOfDesireRipper extends AbstractSinglePageRipper {
|
||||
// All sleep times are in milliseconds
|
||||
private static final int IMAGE_SLEEP_TIME = 100;
|
||||
|
||||
private static final String DOMAIN = "girlsofdesire.org", HOST = "GirlsOfDesire";
|
||||
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
@ -29,23 +29,19 @@ public class GirlsOfDesireRipper extends AlbumRipper {
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
return "GirlsOfDesire";
|
||||
}
|
||||
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return url;
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "girlsofdesire.org";
|
||||
}
|
||||
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
if (albumDoc == null) {
|
||||
logger.info(" Retrieving " + url.toExternalForm());
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
Elements elems = albumDoc.select(".albumName");
|
||||
return HOST + "_" + elems.first().text();
|
||||
Document doc = getFirstPage();
|
||||
Elements elems = doc.select(".albumName");
|
||||
return getHost() + "_" + elems.first().text();
|
||||
} catch (Exception e) {
|
||||
// Fall back to default album naming convention
|
||||
logger.warn("Failed to get album title from " + url, e);
|
||||
@ -70,6 +66,33 @@ public class GirlsOfDesireRipper extends AlbumRipper {
|
||||
+ " Got: " + url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
return albumDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromPage(Document doc) {
|
||||
List<String> imageURLs = new ArrayList<String>();
|
||||
for (Element thumb : doc.select("td.vtop > a > img")) {
|
||||
String imgSrc = thumb.attr("src");
|
||||
imgSrc = imgSrc.replaceAll("_thumb\\.", ".");
|
||||
if (imgSrc.startsWith("/")) {
|
||||
imgSrc = "http://www.girlsofdesire.org" + imgSrc;
|
||||
}
|
||||
imageURLs.add(imgSrc);
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
String nextUrl = this.url.toExternalForm();
|
||||
@ -107,8 +130,4 @@ public class GirlsOfDesireRipper extends AlbumRipper {
|
||||
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
public boolean canRip(URL url) {
|
||||
return url.getHost().endsWith(DOMAIN);
|
||||
}
|
||||
}
|
@ -3,21 +3,22 @@ package com.rarchives.ripme.ripper.rippers;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
|
||||
public class GonewildRipper extends AlbumRipper {
|
||||
|
||||
private static final String HOST = "gonewild";
|
||||
private static final int SLEEP_TIME = 1000;
|
||||
public class GonewildRipper extends AbstractJSONRipper {
|
||||
|
||||
private static final int count = 50;
|
||||
private int startIndex = 0;
|
||||
private static String API_DOMAIN;
|
||||
private String username;
|
||||
|
||||
@ -26,6 +27,15 @@ public class GonewildRipper extends AlbumRipper {
|
||||
API_DOMAIN = Utils.getConfigString("gw.api", "gonewild");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "gonewild";
|
||||
}
|
||||
@Override
|
||||
public String getDomain() {
|
||||
return "gonewild.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canRip(URL url) {
|
||||
return getUsernameMatcher(url).matches();
|
||||
@ -36,72 +46,64 @@ public class GonewildRipper extends AlbumRipper {
|
||||
return p.matcher(url.toExternalForm());
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||
return url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rip() throws IOException {
|
||||
int start = 0,
|
||||
count = 50;
|
||||
String baseGwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi"
|
||||
+ "?method=get_user"
|
||||
+ "&user=" + username
|
||||
+ "&count=" + count;
|
||||
String gwURL, imagePath;
|
||||
JSONArray posts, images;
|
||||
JSONObject json, post, image;
|
||||
while (true) {
|
||||
logger.info(" Retrieving posts by " + username);
|
||||
gwURL = baseGwURL
|
||||
+ "&start=" + start;
|
||||
start += count;
|
||||
json = Http.url(gwURL)
|
||||
.getJSON();
|
||||
if (json.has("error")) {
|
||||
logger.error("Error while retrieving user posts:" + json.getString("error"));
|
||||
break;
|
||||
}
|
||||
posts = json.getJSONArray("posts");
|
||||
if (posts.length() == 0) {
|
||||
break; // No more posts to get
|
||||
}
|
||||
for (int i = 0; i < posts.length(); i++) {
|
||||
post = (JSONObject) posts.get(i);
|
||||
images = post.getJSONArray("images");
|
||||
for (int j = 0; j < images.length(); j++) {
|
||||
image = (JSONObject) images.get(j);
|
||||
imagePath = image.getString("path");
|
||||
if (imagePath.startsWith("..")) {
|
||||
imagePath = imagePath.substring(2);
|
||||
}
|
||||
imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath;
|
||||
logger.info(" Found file: " + imagePath);
|
||||
addURLToDownload(new URL(imagePath));
|
||||
}
|
||||
}
|
||||
try {
|
||||
Thread.sleep(SLEEP_TIME);
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("[!] Interrupted while waiting to load more posts", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
waitForThreads();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return HOST;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Matcher m = getUsernameMatcher(url);
|
||||
if (m.matches()) {
|
||||
this.username = m.group(m.groupCount());
|
||||
}
|
||||
else {
|
||||
throw new MalformedURLException("Expected format: gonewild.com/<user>");
|
||||
}
|
||||
return username;
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject getFirstPage() throws IOException {
|
||||
String gwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi"
|
||||
+ "?method=get_user"
|
||||
+ "&user=" + username
|
||||
+ "&count=" + count
|
||||
+ "&start=" + startIndex;
|
||||
JSONObject nextJSON = Http.url(gwURL).getJSON();
|
||||
if (nextJSON.has("error")) {
|
||||
throw new IOException(nextJSON.getString("error"));
|
||||
}
|
||||
if (nextJSON.getJSONArray("posts").length() == 0) {
|
||||
throw new IOException("No posts found");
|
||||
}
|
||||
return nextJSON;
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject getNextPage(JSONObject json) throws IOException {
|
||||
startIndex += count;
|
||||
sleep(1000);
|
||||
return getFirstPage();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getURLsFromJSON(JSONObject json) {
|
||||
List<String> imageURLs = new ArrayList<String>();
|
||||
JSONArray posts = json.getJSONArray("posts");
|
||||
for (int i = 0; i < posts.length(); i++) {
|
||||
JSONObject post = posts.getJSONObject(i);
|
||||
JSONArray images = post.getJSONArray("images");
|
||||
for (int j = 0; j < images.length(); j++) {
|
||||
JSONObject image = images.getJSONObject(j);
|
||||
String imagePath = image.getString("path");
|
||||
if (imagePath.startsWith("..")) {
|
||||
imagePath = imagePath.substring(2);
|
||||
}
|
||||
imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath;
|
||||
imageURLs.add(imagePath);
|
||||
}
|
||||
}
|
||||
return imageURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void downloadURL(URL url, int index) {
|
||||
addURLToDownload(url, getPrefix(index));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user