Moving from MultiPage ripper to HTML ripper, added JSON ripper
This commit is contained in:
parent
b7397cd31e
commit
c166f93d57
@ -10,9 +10,9 @@ import org.jsoup.nodes.Document;
|
|||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
public abstract class AbstractMultiPageRipper extends AlbumRipper {
|
public abstract class AbstractHTMLRipper extends AlbumRipper {
|
||||||
|
|
||||||
public AbstractMultiPageRipper(URL url) throws IOException {
|
public AbstractHTMLRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -23,6 +23,9 @@ public abstract class AbstractMultiPageRipper extends AlbumRipper {
|
|||||||
public abstract Document getNextPage(Document doc) throws IOException;
|
public abstract Document getNextPage(Document doc) throws IOException;
|
||||||
public abstract List<String> getURLsFromPage(Document page);
|
public abstract List<String> getURLsFromPage(Document page);
|
||||||
public abstract void downloadURL(URL url, int index);
|
public abstract void downloadURL(URL url, int index);
|
||||||
|
public DownloadThreadPool getThreadPool() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean keepSortOrder() {
|
public boolean keepSortOrder() {
|
||||||
return true;
|
return true;
|
||||||
@ -54,19 +57,29 @@ public abstract class AbstractMultiPageRipper extends AlbumRipper {
|
|||||||
|
|
||||||
for (String imageURL : imageURLs) {
|
for (String imageURL : imageURLs) {
|
||||||
if (isStopped()) {
|
if (isStopped()) {
|
||||||
logger.info("Interrupted");
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
index += 1;
|
index += 1;
|
||||||
downloadURL(new URL(imageURL), index);
|
downloadURL(new URL(imageURL), index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isStopped()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
|
||||||
doc = getNextPage(doc);
|
doc = getNextPage(doc);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.info("Can't get next page: " + e.getMessage());
|
logger.info("Can't get next page: " + e.getMessage());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If they're using a thread pool, wait for it.
|
||||||
|
if (getThreadPool() != null) {
|
||||||
|
getThreadPool().waitForThreads();
|
||||||
|
}
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,93 @@
|
|||||||
|
package com.rarchives.ripme.ripper;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
|
public abstract class AbstractJSONRipper extends AlbumRipper {
|
||||||
|
|
||||||
|
public AbstractJSONRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract String getDomain();
|
||||||
|
public abstract String getHost();
|
||||||
|
|
||||||
|
public abstract JSONObject getFirstPage() throws IOException;
|
||||||
|
public abstract JSONObject getNextPage(JSONObject json) throws IOException;
|
||||||
|
public abstract List<String> getURLsFromJSON(JSONObject json);
|
||||||
|
public abstract void downloadURL(URL url, int index);
|
||||||
|
public DownloadThreadPool getThreadPool() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean keepSortOrder() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canRip(URL url) {
|
||||||
|
return url.getHost().endsWith(getDomain());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void rip() throws IOException {
|
||||||
|
int index = 0;
|
||||||
|
logger.info("Retrieving " + this.url);
|
||||||
|
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
||||||
|
JSONObject json = getFirstPage();
|
||||||
|
|
||||||
|
while (json != null) {
|
||||||
|
List<String> imageURLs = getURLsFromJSON(json);
|
||||||
|
|
||||||
|
if (imageURLs.size() == 0) {
|
||||||
|
throw new IOException("No images found at " + this.url);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String imageURL : imageURLs) {
|
||||||
|
if (isStopped()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
index += 1;
|
||||||
|
downloadURL(new URL(imageURL), index);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isStopped()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
|
||||||
|
json = getNextPage(json);
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.info("Can't get next page: " + e.getMessage());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If they're using a thread pool, wait for it.
|
||||||
|
if (getThreadPool() != null) {
|
||||||
|
getThreadPool().waitForThreads();
|
||||||
|
}
|
||||||
|
waitForThreads();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPrefix(int index) {
|
||||||
|
String prefix = "";
|
||||||
|
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
|
||||||
|
prefix = String.format("%03d_", index);
|
||||||
|
}
|
||||||
|
return prefix;
|
||||||
|
}
|
||||||
|
}
|
@ -24,7 +24,7 @@ public abstract class AbstractSinglePageRipper extends AlbumRipper {
|
|||||||
public abstract void downloadURL(URL url, int index);
|
public abstract void downloadURL(URL url, int index);
|
||||||
|
|
||||||
public boolean keepSortOrder() {
|
public boolean keepSortOrder() {
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -19,12 +19,12 @@ import org.jsoup.nodes.Document;
|
|||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractMultiPageRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Base64;
|
import com.rarchives.ripme.utils.Base64;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
public class DeviantartRipper extends AbstractMultiPageRipper {
|
public class DeviantartRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private static final int SLEEP_TIME = 2000;
|
private static final int SLEEP_TIME = 2000;
|
||||||
|
|
||||||
|
@ -12,10 +12,10 @@ import org.jsoup.nodes.Document;
|
|||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractMultiPageRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
public class DrawcrowdRipper extends AbstractMultiPageRipper {
|
public class DrawcrowdRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
public DrawcrowdRipper(URL url) throws IOException {
|
public DrawcrowdRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
|
@ -4,7 +4,9 @@ import java.io.File;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
@ -13,22 +15,26 @@ import org.jsoup.nodes.Document;
|
|||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
public class EHentaiRipper extends AlbumRipper {
|
public class EHentaiRipper extends AbstractHTMLRipper {
|
||||||
// All sleep times are in milliseconds
|
// All sleep times are in milliseconds
|
||||||
private static final int PAGE_SLEEP_TIME = 3 * 1000;
|
private static final int PAGE_SLEEP_TIME = 3000;
|
||||||
private static final int IMAGE_SLEEP_TIME = 1 * 1000;
|
private static final int IMAGE_SLEEP_TIME = 1500;
|
||||||
private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000;
|
private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000;
|
||||||
|
|
||||||
private static final String DOMAIN = "g.e-hentai.org", HOST = "e-hentai";
|
private String lastURL = null;
|
||||||
|
|
||||||
// Thread pool for finding direct image links from "image" pages (html)
|
// Thread pool for finding direct image links from "image" pages (html)
|
||||||
private DownloadThreadPool ehentaiThreadPool = new DownloadThreadPool("ehentai");
|
private DownloadThreadPool ehentaiThreadPool = new DownloadThreadPool("ehentai");
|
||||||
|
@Override
|
||||||
|
public DownloadThreadPool getThreadPool() {
|
||||||
|
return ehentaiThreadPool;
|
||||||
|
}
|
||||||
|
|
||||||
// Current HTML document
|
// Current HTML document
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
@ -45,25 +51,22 @@ public class EHentaiRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return HOST;
|
return "e-hentai";
|
||||||
}
|
}
|
||||||
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
@Override
|
||||||
return url;
|
public String getDomain() {
|
||||||
|
return "g.e-hentai.org";
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
if (albumDoc == null) {
|
if (albumDoc == null) {
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
|
albumDoc = getPageWithRetries(url);
|
||||||
logger.info("Retrieving " + url);
|
|
||||||
albumDoc = Http.url(url)
|
|
||||||
.cookies(cookies)
|
|
||||||
.get();
|
|
||||||
}
|
}
|
||||||
Elements elems = albumDoc.select("#gn");
|
Elements elems = albumDoc.select("#gn");
|
||||||
return HOST + "_" + elems.get(0).text();
|
return getHost() + "_" + elems.first().text();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Fall back to default album naming convention
|
// Fall back to default album naming convention
|
||||||
logger.warn("Failed to get album title from " + url, e);
|
logger.warn("Failed to get album title from " + url, e);
|
||||||
@ -88,94 +91,96 @@ public class EHentaiRipper extends AlbumRipper {
|
|||||||
+ " Got: " + url);
|
+ " Got: " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
public void rip() throws IOException {
|
* Attempts to get page, checks for IP ban, waits.
|
||||||
int index = 0, retries = 3;
|
* @param url
|
||||||
String nextUrl = this.url.toExternalForm();
|
* @return Page document
|
||||||
|
* @throws IOException If page loading errors, or if retries are exhausted
|
||||||
|
*/
|
||||||
|
private Document getPageWithRetries(URL url) throws IOException {
|
||||||
|
Document doc;
|
||||||
|
int retries = 3;
|
||||||
while (true) {
|
while (true) {
|
||||||
if (isStopped()) {
|
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
||||||
break;
|
logger.info("Retrieving " + url);
|
||||||
}
|
doc = Http.url(url)
|
||||||
if (albumDoc == null) {
|
.referrer(this.url)
|
||||||
logger.info(" Retrieving album page " + nextUrl);
|
.cookies(cookies)
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
|
.get();
|
||||||
albumDoc = Http.url(nextUrl)
|
if (doc.toString().contains("IP address will be automatically banned")) {
|
||||||
.referrer(this.url)
|
|
||||||
.cookies(cookies)
|
|
||||||
.get();
|
|
||||||
}
|
|
||||||
// Check for rate limiting
|
|
||||||
if (albumDoc.toString().contains("IP address will be automatically banned")) {
|
|
||||||
if (retries == 0) {
|
if (retries == 0) {
|
||||||
logger.error("Hit rate limit and maximum number of retries, giving up");
|
throw new IOException("Hit rate limit and maximum number of retries, giving up");
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
logger.warn("Hit rate limit while loading " + nextUrl + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining");
|
logger.warn("Hit rate limit while loading " + url + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining");
|
||||||
retries--;
|
retries--;
|
||||||
try {
|
try {
|
||||||
Thread.sleep(IP_BLOCK_SLEEP_TIME);
|
Thread.sleep(IP_BLOCK_SLEEP_TIME);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
logger.error("Interrupted while waiting for rate limit to subside", e);
|
throw new IOException("Interrupted while waiting for rate limit to subside");
|
||||||
break;
|
|
||||||
}
|
|
||||||
albumDoc = null;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Find thumbnails
|
|
||||||
Elements thumbs = albumDoc.select("#gdt > .gdtm a");
|
|
||||||
if (thumbs.size() == 0) {
|
|
||||||
logger.info("albumDoc: " + albumDoc);
|
|
||||||
logger.info("No images found at " + nextUrl);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Iterate over images on page
|
|
||||||
for (Element thumb : thumbs) {
|
|
||||||
if (isStopped()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
EHentaiImageThread t = new EHentaiImageThread(new URL(thumb.attr("href")), index, this.workingDir);
|
|
||||||
ehentaiThreadPool.addThread(t);
|
|
||||||
try {
|
|
||||||
Thread.sleep(IMAGE_SLEEP_TIME);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.warn("Interrupted while waiting to load next image", e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
if (isStopped()) {
|
return doc;
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Find next page
|
|
||||||
Elements hrefs = albumDoc.select(".ptt a");
|
|
||||||
if (hrefs.size() == 0) {
|
|
||||||
logger.info("No navigation links found at " + nextUrl);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Ensure next page is different from the current page
|
|
||||||
String lastUrl = nextUrl;
|
|
||||||
nextUrl = hrefs.last().attr("href");
|
|
||||||
if (lastUrl.equals(nextUrl)) {
|
|
||||||
break; // We're on the last page
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset albumDoc so we fetch the page next time
|
|
||||||
albumDoc = null;
|
|
||||||
|
|
||||||
// Sleep before loading next page
|
|
||||||
try {
|
|
||||||
Thread.sleep(PAGE_SLEEP_TIME);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.error("Interrupted while waiting to load next page", e);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
waitForThreads();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canRip(URL url) {
|
@Override
|
||||||
return url.getHost().endsWith(DOMAIN);
|
public Document getFirstPage() throws IOException {
|
||||||
|
if (albumDoc == null) {
|
||||||
|
albumDoc = getPageWithRetries(this.url);
|
||||||
|
}
|
||||||
|
this.lastURL = this.url.toExternalForm();
|
||||||
|
return albumDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
// Check if we've stopped
|
||||||
|
if (isStopped()) {
|
||||||
|
throw new IOException("Ripping interrupted");
|
||||||
|
}
|
||||||
|
// Find next page
|
||||||
|
Elements hrefs = doc.select(".ptt a");
|
||||||
|
if (hrefs.size() == 0) {
|
||||||
|
logger.info("doc: " + doc.html());
|
||||||
|
throw new IOException("No navigation links found");
|
||||||
|
}
|
||||||
|
// Ensure next page is different from the current page
|
||||||
|
String nextURL = hrefs.last().attr("href");
|
||||||
|
if (nextURL.equals(this.lastURL)) {
|
||||||
|
logger.info("lastURL = nextURL : " + nextURL);
|
||||||
|
throw new IOException("Reached last page of results");
|
||||||
|
}
|
||||||
|
// Sleep before loading next page
|
||||||
|
sleep(PAGE_SLEEP_TIME);
|
||||||
|
// Load next page
|
||||||
|
Document nextPage = getPageWithRetries(new URL(nextURL));
|
||||||
|
this.lastURL = nextURL;
|
||||||
|
return nextPage;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document page) {
|
||||||
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
|
Elements thumbs = page.select("#gdt > .gdtm a");
|
||||||
|
// Iterate over images on page
|
||||||
|
for (Element thumb : thumbs) {
|
||||||
|
imageURLs.add(thumb.attr("href"));
|
||||||
|
}
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir);
|
||||||
|
ehentaiThreadPool.addThread(t);
|
||||||
|
try {
|
||||||
|
Thread.sleep(IMAGE_SLEEP_TIME);
|
||||||
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
|
logger.warn("Interrupted while waiting to load next image", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -187,7 +192,6 @@ public class EHentaiRipper extends AlbumRipper {
|
|||||||
private URL url;
|
private URL url;
|
||||||
private int index;
|
private int index;
|
||||||
private File workingDir;
|
private File workingDir;
|
||||||
private int retries = 3;
|
|
||||||
|
|
||||||
public EHentaiImageThread(URL url, int index, File workingDir) {
|
public EHentaiImageThread(URL url, int index, File workingDir) {
|
||||||
super();
|
super();
|
||||||
@ -203,27 +207,7 @@ public class EHentaiRipper extends AlbumRipper {
|
|||||||
|
|
||||||
private void fetchImage() {
|
private void fetchImage() {
|
||||||
try {
|
try {
|
||||||
Document doc = Http.url(this.url)
|
Document doc = getPageWithRetries(this.url);
|
||||||
.referrer(this.url)
|
|
||||||
.cookies(cookies)
|
|
||||||
.get();
|
|
||||||
// Check for rate limit
|
|
||||||
if (doc.toString().contains("IP address will be automatically banned")) {
|
|
||||||
if (this.retries == 0) {
|
|
||||||
logger.error("Rate limited & ran out of retries, skipping image at " + this.url);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
logger.warn("Hit rate limit. Sleeping for " + IP_BLOCK_SLEEP_TIME + "ms");
|
|
||||||
try {
|
|
||||||
Thread.sleep(IP_BLOCK_SLEEP_TIME);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.error("Interrupted while waiting for rate limit to subside", e);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this.retries--;
|
|
||||||
fetchImage(); // Re-attempt to download the image
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find image
|
// Find image
|
||||||
Elements images = doc.select(".sni > a > img");
|
Elements images = doc.select(".sni > a > img");
|
||||||
|
@ -1,168 +1,41 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.Connection.Response;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractSinglePageRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
|
||||||
|
|
||||||
public class EightmusesRipper extends AlbumRipper {
|
public class EightmusesRipper extends AbstractSinglePageRipper {
|
||||||
|
|
||||||
private static final String DOMAIN = "8muses.com",
|
|
||||||
HOST = "8muses";
|
|
||||||
|
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
|
private Map<String,String> cookies = new HashMap<String,String>();
|
||||||
|
|
||||||
public EightmusesRipper(URL url) throws IOException {
|
public EightmusesRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
return url.getHost().endsWith(DOMAIN);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
|
||||||
try {
|
|
||||||
// Attempt to use album title as GID
|
|
||||||
if (albumDoc == null) {
|
|
||||||
albumDoc = Http.url(url).get();
|
|
||||||
}
|
|
||||||
Element titleElement = albumDoc.select("meta[name=description]").first();
|
|
||||||
String title = titleElement.attr("content");
|
|
||||||
title = title.substring(title.lastIndexOf('/') + 1);
|
|
||||||
return HOST + "_" + title.trim();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// Fall back to default album naming convention
|
|
||||||
logger.info("Unable to find title at " + url);
|
|
||||||
}
|
|
||||||
return super.getAlbumTitle(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
ripAlbum(this.url.toExternalForm(), this.workingDir);
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void ripAlbum(String url, File subdir) throws IOException {
|
|
||||||
logger.info(" Retrieving " + url);
|
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, url);
|
|
||||||
if (albumDoc == null) {
|
|
||||||
albumDoc = Http.url(url).get();
|
|
||||||
}
|
|
||||||
|
|
||||||
int index = 0; // Both album index and image index
|
|
||||||
if (albumDoc.select(".preview > span").size() > 0) {
|
|
||||||
// Page contains subalbums (not images)
|
|
||||||
for (Element subalbum : albumDoc.select("a.preview")) {
|
|
||||||
ripSubalbumFromPreview(subalbum, subdir, ++index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Page contains images
|
|
||||||
for (Element thumb : albumDoc.select("img")) {
|
|
||||||
downloadImage(thumb, subdir, ++index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param subalbum Anchor element of a subalbum
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private void ripSubalbumFromPreview(Element subalbum, File subdir, int index) throws IOException {
|
|
||||||
// Find + sanitize URL from Element
|
|
||||||
String subUrl = subalbum.attr("href");
|
|
||||||
subUrl = subUrl.replaceAll("\\.\\./", "");
|
|
||||||
if (subUrl.startsWith("//")) {
|
|
||||||
subUrl = "http:";
|
|
||||||
}
|
|
||||||
else if (!subUrl.startsWith("http://")) {
|
|
||||||
subUrl = "http://www.8muses.com/" + subUrl;
|
|
||||||
}
|
|
||||||
// Prepend image index if enabled
|
|
||||||
// Get album title
|
|
||||||
String subTitle = subalbum.attr("alt");
|
|
||||||
if (subTitle.equals("")) {
|
|
||||||
subTitle = getGID(new URL(subUrl));
|
|
||||||
}
|
|
||||||
subTitle = Utils.filesystemSafe(subTitle);
|
|
||||||
// Create path to subdirectory
|
|
||||||
File subDir = new File(subdir.getAbsolutePath() + File.separator + subTitle);
|
|
||||||
if (!subDir.exists()) {
|
|
||||||
subDir.mkdirs();
|
|
||||||
}
|
|
||||||
albumDoc = null;
|
|
||||||
ripAlbum(subUrl, subDir);
|
|
||||||
try {
|
|
||||||
Thread.sleep(2000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.warn("Interrupted whiel waiting to load next album");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void downloadImage(Element thumb, File subdir, int index) {
|
|
||||||
// Find thumbnail image source
|
|
||||||
String image = null;
|
|
||||||
if (thumb.hasAttr("data-cfsrc")) {
|
|
||||||
image = thumb.attr("data-cfsrc");
|
|
||||||
}
|
|
||||||
else if (thumb.hasAttr("src")) {
|
|
||||||
image = thumb.attr("src");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
logger.warn("Thumb does not havedata-cfsrc or src: " + thumb);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Remove relative directory path naming
|
|
||||||
image = image.replaceAll("\\.\\./", "");
|
|
||||||
if (image.startsWith("//")) {
|
|
||||||
image = "http:" + image;
|
|
||||||
}
|
|
||||||
// Convert from thumb URL to full-size
|
|
||||||
if (image.contains("-cu_")) {
|
|
||||||
image = image.replaceAll("-cu_[^.]+", "-me");
|
|
||||||
}
|
|
||||||
// Set download path
|
|
||||||
try {
|
|
||||||
URL imageURL = new URL(image);
|
|
||||||
String saveAs = subdir.getAbsolutePath() + File.separator;
|
|
||||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
|
||||||
// Append image index
|
|
||||||
saveAs += String.format("%03d_", index);
|
|
||||||
}
|
|
||||||
// Append image title
|
|
||||||
saveAs += Utils.filesystemSafe(thumb.attr("title"));
|
|
||||||
// Append extension
|
|
||||||
saveAs += image.substring(image.lastIndexOf('.'));
|
|
||||||
File saveFile = new File(saveAs);
|
|
||||||
// Download
|
|
||||||
addURLToDownload(imageURL, saveFile, thumb.baseUri(), null);
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.error("Failed to download image at " + image, e);
|
|
||||||
sendUpdate(STATUS.DOWNLOAD_ERRORED, "Failed to download image at " + image);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return HOST;
|
return "8muses";
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "8muses.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -175,4 +48,100 @@ public class EightmusesRipper extends AlbumRipper {
|
|||||||
return m.group(m.groupCount());
|
return m.group(m.groupCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
|
try {
|
||||||
|
// Attempt to use album title as GID
|
||||||
|
Element titleElement = getFirstPage().select("meta[name=description]").first();
|
||||||
|
String title = titleElement.attr("content");
|
||||||
|
title = title.substring(title.lastIndexOf('/') + 1);
|
||||||
|
return getHost() + "_" + title.trim();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Fall back to default album naming convention
|
||||||
|
logger.info("Unable to find title at " + url);
|
||||||
|
}
|
||||||
|
return super.getAlbumTitle(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
if (albumDoc == null) {
|
||||||
|
Response resp = Http.url(url).response();
|
||||||
|
cookies.putAll(resp.cookies());
|
||||||
|
albumDoc = resp.parse();
|
||||||
|
}
|
||||||
|
return albumDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document page) {
|
||||||
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
|
if (page.select(".preview > span").size() > 0) {
|
||||||
|
// Page contains subalbums (not images)
|
||||||
|
Elements albumElements = page.select("a.preview");
|
||||||
|
List<Element> albumsList = albumElements.subList(0, albumElements.size());
|
||||||
|
Collections.reverse(albumsList);
|
||||||
|
// Iterate over elements in reverse order
|
||||||
|
for (Element subalbum : albumsList) {
|
||||||
|
String subUrl = subalbum.attr("href");
|
||||||
|
subUrl = subUrl.replaceAll("\\.\\./", "");
|
||||||
|
if (subUrl.startsWith("//")) {
|
||||||
|
subUrl = "http:";
|
||||||
|
}
|
||||||
|
else if (!subUrl.startsWith("http://")) {
|
||||||
|
subUrl = "http://www.8muses.com/" + subUrl;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
logger.info("Retrieving " + subUrl);
|
||||||
|
sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
|
||||||
|
Document subPage = Http.url(subUrl).get();
|
||||||
|
// Get all images in subalbum, add to list.
|
||||||
|
List<String> subalbumImages = getURLsFromPage(subPage);
|
||||||
|
logger.info("Found " + subalbumImages.size() + " images in subalbum");
|
||||||
|
imageURLs.addAll(subalbumImages);
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.warn("Error while loading subalbum " + subUrl, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Page contains images
|
||||||
|
for (Element thumb : page.select("img")) {
|
||||||
|
// Find thumbnail image source
|
||||||
|
String image = null;
|
||||||
|
if (thumb.hasAttr("data-cfsrc")) {
|
||||||
|
image = thumb.attr("data-cfsrc");
|
||||||
|
}
|
||||||
|
else if (thumb.hasAttr("src")) {
|
||||||
|
image = thumb.attr("src");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
logger.warn("Thumb does not have data-cfsrc or src: " + thumb);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Remove relative directory path naming
|
||||||
|
image = image.replaceAll("\\.\\./", "");
|
||||||
|
if (image.startsWith("//")) {
|
||||||
|
image = "http:" + image;
|
||||||
|
}
|
||||||
|
// Convert from thumb URL to full-size
|
||||||
|
if (image.contains("-cu_")) {
|
||||||
|
image = image.replaceAll("-cu_[^.]+", "-me");
|
||||||
|
}
|
||||||
|
imageURLs.add(image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getPrefix(int index) {
|
||||||
|
return String.format("%03d_", index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,82 +3,33 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
|
||||||
|
|
||||||
public class FapprovedRipper extends AlbumRipper {
|
public class FapprovedRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private static final String DOMAIN = "fapproved.com",
|
private int pageIndex = 1;
|
||||||
HOST = "fapproved";
|
private String username = null;
|
||||||
|
|
||||||
public FapprovedRipper(URL url) throws IOException {
|
public FapprovedRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
return (url.getHost().endsWith(DOMAIN));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("^https?://fapproved\\.com/users/([a-zA-Z0-9\\-_]{1,}).*$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return new URL("http://fapproved.com/users/" + m.group(1));
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("Expected username in URL (fapproved.com/users/username and not " + url);
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
int index = 0, page = 0;
|
|
||||||
String url, user = getGID(this.url);
|
|
||||||
boolean hasNextPage = true;
|
|
||||||
while (hasNextPage) {
|
|
||||||
page++;
|
|
||||||
url = "http://fapproved.com/users/" + user + "/images?page=" + page;
|
|
||||||
this.sendUpdate(STATUS.LOADING_RESOURCE, url);
|
|
||||||
logger.info(" Retrieving " + url);
|
|
||||||
Document doc = Http.url(url)
|
|
||||||
.ignoreContentType()
|
|
||||||
.get();
|
|
||||||
for (Element image : doc.select("div.actual-image img")) {
|
|
||||||
String imageUrl = image.attr("src");
|
|
||||||
if (imageUrl.startsWith("//")) {
|
|
||||||
imageUrl = "http:" + imageUrl;
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
String prefix = "";
|
|
||||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
|
||||||
prefix = String.format("%03d_", index);
|
|
||||||
}
|
|
||||||
addURLToDownload(new URL(imageUrl), prefix);
|
|
||||||
}
|
|
||||||
if ( (doc.select("div.pagination li.next.disabled").size() != 0)
|
|
||||||
|| (doc.select("div.pagination").size() == 0) ) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
Thread.sleep(3000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.error("[!] Interrupted while waiting to load next album:", e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return HOST;
|
return "fapproved";
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "fapproved.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -86,9 +37,62 @@ public class FapprovedRipper extends AlbumRipper {
|
|||||||
Pattern p = Pattern.compile("^https?://[w.]*fapproved.com/users/([a-zA-Z0-9\\-_]{3,}).*$");
|
Pattern p = Pattern.compile("^https?://[w.]*fapproved.com/users/([a-zA-Z0-9\\-_]{3,}).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
username = m.group(1);
|
||||||
|
return username;
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("Fapproved user not found in " + url + ", expected http://fapproved.com/users/username/images");
|
throw new MalformedURLException("Fapproved user not found in " + url + ", expected http://fapproved.com/users/username/images");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
return new URL("http://fapproved.com/users/" + getGID(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
pageIndex = 1;
|
||||||
|
String pageURL = getPageURL(pageIndex);
|
||||||
|
return Http.url(pageURL)
|
||||||
|
.ignoreContentType()
|
||||||
|
.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
if ( (doc.select("div.pagination li.next.disabled").size() != 0)
|
||||||
|
|| (doc.select("div.pagination").size() == 0) ) {
|
||||||
|
throw new IOException("No more pages found");
|
||||||
|
}
|
||||||
|
sleep(1000);
|
||||||
|
pageIndex++;
|
||||||
|
String pageURL = getPageURL(pageIndex);
|
||||||
|
return Http.url(pageURL)
|
||||||
|
.ignoreContentType()
|
||||||
|
.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getPageURL(int index) throws IOException {
|
||||||
|
if (username == null) {
|
||||||
|
username = getGID(this.url);
|
||||||
|
}
|
||||||
|
return "http://fapproved.com/users/" + username + "/images?page=" + pageIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document page) {
|
||||||
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
|
for (Element image : page.select("div.actual-image img")) {
|
||||||
|
String imageURL = image.attr("src");
|
||||||
|
if (imageURL.startsWith("//")) {
|
||||||
|
imageURL = "http:" + imageURL;
|
||||||
|
}
|
||||||
|
imageURLs.add(imageURL);
|
||||||
|
}
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,8 +3,10 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
@ -17,19 +19,22 @@ import org.jsoup.nodes.Document;
|
|||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||||
import com.rarchives.ripme.utils.Base64;
|
import com.rarchives.ripme.utils.Base64;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
public class FlickrRipper extends AlbumRipper {
|
public class FlickrRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private static final String DOMAIN = "flickr.com",
|
private int page = 1;
|
||||||
HOST = "flickr";
|
private Set<String> attempted = new HashSet<String>();
|
||||||
|
|
||||||
private DownloadThreadPool flickrThreadPool;
|
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
|
private DownloadThreadPool flickrThreadPool;
|
||||||
|
@Override
|
||||||
|
public DownloadThreadPool getThreadPool() {
|
||||||
|
return flickrThreadPool;
|
||||||
|
}
|
||||||
|
|
||||||
public FlickrRipper(URL url) throws IOException {
|
public FlickrRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -38,7 +43,11 @@ public class FlickrRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return HOST;
|
return "flickr";
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "flickr.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
@ -61,15 +70,13 @@ public class FlickrRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
if (albumDoc == null) {
|
Document doc = getFirstPage();
|
||||||
albumDoc = Http.url(url).get();
|
|
||||||
}
|
|
||||||
String user = url.toExternalForm();
|
String user = url.toExternalForm();
|
||||||
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
|
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
|
||||||
user = user.substring(0, user.indexOf("/"));
|
user = user.substring(0, user.indexOf("/"));
|
||||||
String title = albumDoc.select("meta[name=description]").get(0).attr("content");
|
String title = doc.select("meta[name=description]").get(0).attr("content");
|
||||||
if (!title.equals("")) {
|
if (!title.equals("")) {
|
||||||
return HOST + "_" + user + "_" + title;
|
return getHost() + "_" + user + "_" + title;
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Fall back to default album naming convention
|
// Fall back to default album naming convention
|
||||||
@ -114,79 +121,79 @@ public class FlickrRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
//Map<String,String> cookies = signinToFlickr();
|
if (albumDoc == null) {
|
||||||
Set<String> attempted = new HashSet<String>();
|
albumDoc = Http.url(url).get();
|
||||||
int index = 0, page = 1;
|
|
||||||
String nextURL = this.url.toExternalForm();
|
|
||||||
while (true) {
|
|
||||||
if (isStopped()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
logger.info(" Retrieving " + nextURL);
|
|
||||||
if (albumDoc == null) {
|
|
||||||
albumDoc = Http.url(nextURL).get();
|
|
||||||
}
|
|
||||||
for (Element thumb : albumDoc.select("a[data-track=photo-click]")) {
|
|
||||||
String imageTitle = null;
|
|
||||||
if (thumb.hasAttr("title")) {
|
|
||||||
imageTitle = thumb.attr("title");
|
|
||||||
}
|
|
||||||
String imagePage = thumb.attr("href");
|
|
||||||
if (imagePage.startsWith("/")) {
|
|
||||||
imagePage = "http://www.flickr.com" + imagePage;
|
|
||||||
}
|
|
||||||
if (imagePage.contains("/in/")) {
|
|
||||||
imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1);
|
|
||||||
}
|
|
||||||
if (!imagePage.endsWith("/")) {
|
|
||||||
imagePage += "/";
|
|
||||||
}
|
|
||||||
imagePage += "sizes/o/";
|
|
||||||
|
|
||||||
// Check for duplicates
|
|
||||||
if (attempted.contains(imagePage)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
attempted.add(imagePage);
|
|
||||||
|
|
||||||
index += 1;
|
|
||||||
// Add image page to threadpool to grab the image & download it
|
|
||||||
FlickrImageThread mit = new FlickrImageThread(new URL(imagePage), imageTitle, index);
|
|
||||||
flickrThreadPool.addThread(mit);
|
|
||||||
}
|
|
||||||
// Find how many pages there are
|
|
||||||
int lastPage = 0;
|
|
||||||
for (Element apage : albumDoc.select("a[data-track^=page-]")) {
|
|
||||||
String lastPageStr = apage.attr("data-track").replace("page-", "");
|
|
||||||
lastPage = Integer.parseInt(lastPageStr);
|
|
||||||
}
|
|
||||||
// If we're at the last page, stop.
|
|
||||||
if (page >= lastPage) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Load the next page
|
|
||||||
page++;
|
|
||||||
albumDoc = null;
|
|
||||||
nextURL = this.url.toExternalForm();
|
|
||||||
if (!nextURL.endsWith("/")) {
|
|
||||||
nextURL += "/";
|
|
||||||
}
|
|
||||||
nextURL += "page" + page + "/";
|
|
||||||
// Wait a bit
|
|
||||||
try {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.error("Interrupted while waiting to load next page " + nextURL, e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
flickrThreadPool.waitForThreads();
|
return albumDoc;
|
||||||
waitForThreads();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canRip(URL url) {
|
@Override
|
||||||
return url.getHost().endsWith(DOMAIN);
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
// Find how many pages there are
|
||||||
|
int lastPage = 0;
|
||||||
|
for (Element apage : doc.select("a[data-track^=page-]")) {
|
||||||
|
String lastPageStr = apage.attr("data-track").replace("page-", "");
|
||||||
|
lastPage = Integer.parseInt(lastPageStr);
|
||||||
|
}
|
||||||
|
// If we're at the last page, stop.
|
||||||
|
if (page >= lastPage) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
// Load the next page
|
||||||
|
page++;
|
||||||
|
albumDoc = null;
|
||||||
|
String nextURL = this.url.toExternalForm();
|
||||||
|
if (!nextURL.endsWith("/")) {
|
||||||
|
nextURL += "/";
|
||||||
|
}
|
||||||
|
nextURL += "page" + page + "/";
|
||||||
|
// Wait a bit
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new IOException("Interrupted while waiting to load next page " + nextURL);
|
||||||
|
}
|
||||||
|
return Http.url(nextURL).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document page) {
|
||||||
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
|
for (Element thumb : page.select("a[data-track=photo-click]")) {
|
||||||
|
/* TODO find a way to persist the image title
|
||||||
|
String imageTitle = null;
|
||||||
|
if (thumb.hasAttr("title")) {
|
||||||
|
imageTitle = thumb.attr("title");
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
String imagePage = thumb.attr("href");
|
||||||
|
if (imagePage.startsWith("/")) {
|
||||||
|
imagePage = "http://www.flickr.com" + imagePage;
|
||||||
|
}
|
||||||
|
if (imagePage.contains("/in/")) {
|
||||||
|
imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1);
|
||||||
|
}
|
||||||
|
if (!imagePage.endsWith("/")) {
|
||||||
|
imagePage += "/";
|
||||||
|
}
|
||||||
|
imagePage += "sizes/o/";
|
||||||
|
|
||||||
|
// Check for duplicates
|
||||||
|
if (attempted.contains(imagePage)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
attempted.add(imagePage);
|
||||||
|
imageURLs.add(imagePage);
|
||||||
|
}
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
// Add image page to threadpool to grab the image & download it
|
||||||
|
FlickrImageThread mit = new FlickrImageThread(url, index);
|
||||||
|
flickrThreadPool.addThread(mit);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -224,13 +231,11 @@ public class FlickrRipper extends AlbumRipper {
|
|||||||
*/
|
*/
|
||||||
private class FlickrImageThread extends Thread {
|
private class FlickrImageThread extends Thread {
|
||||||
private URL url;
|
private URL url;
|
||||||
private String title;
|
|
||||||
private int index;
|
private int index;
|
||||||
|
|
||||||
public FlickrImageThread(URL url, String title, int index) {
|
public FlickrImageThread(URL url, int index) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.title = title;
|
|
||||||
this.index = index;
|
this.index = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -248,9 +253,8 @@ public class FlickrRipper extends AlbumRipper {
|
|||||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
if (Utils.getConfigBoolean("download.save_order", true)) {
|
||||||
prefix = String.format("%03d_", index);
|
prefix = String.format("%03d_", index);
|
||||||
}
|
}
|
||||||
prefix += Utils.filesystemSafe(title);
|
|
||||||
synchronized (flickrThreadPool) {
|
synchronized (flickrThreadPool) {
|
||||||
addURLToDownload(new URL(fullsizeImages.get(0).attr("src")), prefix);
|
addURLToDownload(new URL(fullsizeImages.first().attr("src")), prefix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -3,7 +3,9 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
@ -12,99 +14,27 @@ import org.jsoup.Connection.Method;
|
|||||||
import org.jsoup.Connection.Response;
|
import org.jsoup.Connection.Response;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
public class GifyoRipper extends AlbumRipper {
|
public class GifyoRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private static final String DOMAIN = "gifyo.com",
|
private int page = 0;
|
||||||
HOST = "gifyo";
|
private Map<String,String> cookies = new HashMap<String,String>();
|
||||||
|
|
||||||
public GifyoRipper(URL url) throws IOException {
|
public GifyoRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
return (url.getHost().endsWith(DOMAIN));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("^https?://gifyo\\.com/([a-zA-Z0-9\\-_]+)/?$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return new URL("http://gifyo.com/" + m.group(1) + "/");
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("Expected username in URL (gifyo.com/username/ and not " + url);
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
int page = 0;
|
|
||||||
Map<String,String> cookies = new HashMap<String,String>();
|
|
||||||
while (true) {
|
|
||||||
this.sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm() + " (page #" + page + ")");
|
|
||||||
logger.info(" Retrieving " + this.url + "(page #" + page + ")");
|
|
||||||
Response resp = null;
|
|
||||||
if (page == 0) {
|
|
||||||
resp = Http.url(this.url)
|
|
||||||
.ignoreContentType()
|
|
||||||
.response();
|
|
||||||
cookies = resp.cookies();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Map<String,String> postData = new HashMap<String,String>();
|
|
||||||
postData.put("cmd", "refreshData");
|
|
||||||
postData.put("view", "gif");
|
|
||||||
postData.put("layout", "grid");
|
|
||||||
postData.put("page", Integer.toString(page));
|
|
||||||
resp = Http.url(this.url)
|
|
||||||
.ignoreContentType()
|
|
||||||
.data(postData)
|
|
||||||
.cookies(cookies)
|
|
||||||
.method(Method.POST)
|
|
||||||
.response();
|
|
||||||
cookies.putAll(resp.cookies());
|
|
||||||
}
|
|
||||||
Document doc = resp.parse();
|
|
||||||
Elements images = doc.select("div.gif img");
|
|
||||||
logger.info("Found " + images.size() + " images");
|
|
||||||
for (Element image : images) {
|
|
||||||
String imageUrl = image.attr("src");
|
|
||||||
if (imageUrl.startsWith("//")) {
|
|
||||||
imageUrl = "http:" + imageUrl;
|
|
||||||
}
|
|
||||||
imageUrl = imageUrl.replace("/medium/", "/large/");
|
|
||||||
imageUrl = imageUrl.replace("_s.gif", ".gif");
|
|
||||||
addURLToDownload(new URL(imageUrl));
|
|
||||||
}
|
|
||||||
if (images.size() == 0) {
|
|
||||||
if (doc.html().contains("profile is private")) {
|
|
||||||
sendUpdate(STATUS.RIP_ERRORED, "User has private profile");
|
|
||||||
throw new IOException("User has private profile");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
logger.info("Page " + page + " has 0 images");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
Thread.sleep(3000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.error("[!] Interrupted while waiting to load next album:", e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
page++;
|
|
||||||
}
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return HOST;
|
return "gifyo";
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "gifyo.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -117,4 +47,67 @@ public class GifyoRipper extends AlbumRipper {
|
|||||||
throw new MalformedURLException("Gifyo user not found in " + url + ", expected http://gifyo.com/username");
|
throw new MalformedURLException("Gifyo user not found in " + url + ", expected http://gifyo.com/username");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
return new URL("http://gifyo.com/" + getGID(url) + "/");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
Response resp = Http.url(this.url)
|
||||||
|
.ignoreContentType()
|
||||||
|
.response();
|
||||||
|
cookies = resp.cookies();
|
||||||
|
|
||||||
|
Document doc = resp.parse();
|
||||||
|
if (doc.html().contains("profile is private")) {
|
||||||
|
sendUpdate(STATUS.RIP_ERRORED, "User has private profile");
|
||||||
|
throw new IOException("User has private profile");
|
||||||
|
}
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
page++;
|
||||||
|
Map<String,String> postData = new HashMap<String,String>();
|
||||||
|
postData.put("cmd", "refreshData");
|
||||||
|
postData.put("view", "gif");
|
||||||
|
postData.put("layout", "grid");
|
||||||
|
postData.put("page", Integer.toString(page));
|
||||||
|
Response resp = Http.url(this.url)
|
||||||
|
.ignoreContentType()
|
||||||
|
.data(postData)
|
||||||
|
.cookies(cookies)
|
||||||
|
.method(Method.POST)
|
||||||
|
.response();
|
||||||
|
cookies.putAll(resp.cookies());
|
||||||
|
Document nextDoc = resp.parse();
|
||||||
|
if (nextDoc.select("div.gif img").size() == 0) {
|
||||||
|
throw new IOException("No more images found");
|
||||||
|
}
|
||||||
|
sleep(2000);
|
||||||
|
return nextDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
|
for (Element image : doc.select("div.gif img")) {
|
||||||
|
String imageUrl = image.attr("src");
|
||||||
|
if (imageUrl.startsWith("//")) {
|
||||||
|
imageUrl = "http:" + imageUrl;
|
||||||
|
}
|
||||||
|
imageUrl = imageUrl.replace("/medium/", "/large/");
|
||||||
|
imageUrl = imageUrl.replace("_s.gif", ".gif");
|
||||||
|
imageURLs.add(imageUrl);
|
||||||
|
}
|
||||||
|
logger.info("Found " + imageURLs.size() + " images");
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@ -10,16 +12,14 @@ import org.jsoup.nodes.Document;
|
|||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractSinglePageRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
public class GirlsOfDesireRipper extends AlbumRipper {
|
public class GirlsOfDesireRipper extends AbstractSinglePageRipper {
|
||||||
// All sleep times are in milliseconds
|
// All sleep times are in milliseconds
|
||||||
private static final int IMAGE_SLEEP_TIME = 100;
|
private static final int IMAGE_SLEEP_TIME = 100;
|
||||||
|
|
||||||
private static final String DOMAIN = "girlsofdesire.org", HOST = "GirlsOfDesire";
|
|
||||||
|
|
||||||
// Current HTML document
|
// Current HTML document
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
|
|
||||||
@ -29,23 +29,19 @@ public class GirlsOfDesireRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return HOST;
|
return "GirlsOfDesire";
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public String getDomain() {
|
||||||
return url;
|
return "girlsofdesire.org";
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
if (albumDoc == null) {
|
Document doc = getFirstPage();
|
||||||
logger.info(" Retrieving " + url.toExternalForm());
|
Elements elems = doc.select(".albumName");
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
|
return getHost() + "_" + elems.first().text();
|
||||||
albumDoc = Http.url(url).get();
|
|
||||||
}
|
|
||||||
Elements elems = albumDoc.select(".albumName");
|
|
||||||
return HOST + "_" + elems.first().text();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Fall back to default album naming convention
|
// Fall back to default album naming convention
|
||||||
logger.warn("Failed to get album title from " + url, e);
|
logger.warn("Failed to get album title from " + url, e);
|
||||||
@ -70,6 +66,33 @@ public class GirlsOfDesireRipper extends AlbumRipper {
|
|||||||
+ " Got: " + url);
|
+ " Got: " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
if (albumDoc == null) {
|
||||||
|
albumDoc = Http.url(url).get();
|
||||||
|
}
|
||||||
|
return albumDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
|
for (Element thumb : doc.select("td.vtop > a > img")) {
|
||||||
|
String imgSrc = thumb.attr("src");
|
||||||
|
imgSrc = imgSrc.replaceAll("_thumb\\.", ".");
|
||||||
|
if (imgSrc.startsWith("/")) {
|
||||||
|
imgSrc = "http://www.girlsofdesire.org" + imgSrc;
|
||||||
|
}
|
||||||
|
imageURLs.add(imgSrc);
|
||||||
|
}
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
String nextUrl = this.url.toExternalForm();
|
String nextUrl = this.url.toExternalForm();
|
||||||
@ -107,8 +130,4 @@ public class GirlsOfDesireRipper extends AlbumRipper {
|
|||||||
|
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
return url.getHost().endsWith(DOMAIN);
|
|
||||||
}
|
|
||||||
}
|
}
|
@ -3,21 +3,22 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
public class GonewildRipper extends AlbumRipper {
|
public class GonewildRipper extends AbstractJSONRipper {
|
||||||
|
|
||||||
private static final String HOST = "gonewild";
|
|
||||||
private static final int SLEEP_TIME = 1000;
|
|
||||||
|
|
||||||
|
private static final int count = 50;
|
||||||
|
private int startIndex = 0;
|
||||||
private static String API_DOMAIN;
|
private static String API_DOMAIN;
|
||||||
private String username;
|
private String username;
|
||||||
|
|
||||||
@ -26,6 +27,15 @@ public class GonewildRipper extends AlbumRipper {
|
|||||||
API_DOMAIN = Utils.getConfigString("gw.api", "gonewild");
|
API_DOMAIN = Utils.getConfigString("gw.api", "gonewild");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "gonewild";
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "gonewild.com";
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean canRip(URL url) {
|
public boolean canRip(URL url) {
|
||||||
return getUsernameMatcher(url).matches();
|
return getUsernameMatcher(url).matches();
|
||||||
@ -36,72 +46,64 @@ public class GonewildRipper extends AlbumRipper {
|
|||||||
return p.matcher(url.toExternalForm());
|
return p.matcher(url.toExternalForm());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
int start = 0,
|
|
||||||
count = 50;
|
|
||||||
String baseGwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi"
|
|
||||||
+ "?method=get_user"
|
|
||||||
+ "&user=" + username
|
|
||||||
+ "&count=" + count;
|
|
||||||
String gwURL, imagePath;
|
|
||||||
JSONArray posts, images;
|
|
||||||
JSONObject json, post, image;
|
|
||||||
while (true) {
|
|
||||||
logger.info(" Retrieving posts by " + username);
|
|
||||||
gwURL = baseGwURL
|
|
||||||
+ "&start=" + start;
|
|
||||||
start += count;
|
|
||||||
json = Http.url(gwURL)
|
|
||||||
.getJSON();
|
|
||||||
if (json.has("error")) {
|
|
||||||
logger.error("Error while retrieving user posts:" + json.getString("error"));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
posts = json.getJSONArray("posts");
|
|
||||||
if (posts.length() == 0) {
|
|
||||||
break; // No more posts to get
|
|
||||||
}
|
|
||||||
for (int i = 0; i < posts.length(); i++) {
|
|
||||||
post = (JSONObject) posts.get(i);
|
|
||||||
images = post.getJSONArray("images");
|
|
||||||
for (int j = 0; j < images.length(); j++) {
|
|
||||||
image = (JSONObject) images.get(j);
|
|
||||||
imagePath = image.getString("path");
|
|
||||||
if (imagePath.startsWith("..")) {
|
|
||||||
imagePath = imagePath.substring(2);
|
|
||||||
}
|
|
||||||
imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath;
|
|
||||||
logger.info(" Found file: " + imagePath);
|
|
||||||
addURLToDownload(new URL(imagePath));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
Thread.sleep(SLEEP_TIME);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.error("[!] Interrupted while waiting to load more posts", e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return HOST;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Matcher m = getUsernameMatcher(url);
|
Matcher m = getUsernameMatcher(url);
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
this.username = m.group(m.groupCount());
|
this.username = m.group(m.groupCount());
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
throw new MalformedURLException("Expected format: gonewild.com/<user>");
|
||||||
|
}
|
||||||
return username;
|
return username;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject getFirstPage() throws IOException {
|
||||||
|
String gwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi"
|
||||||
|
+ "?method=get_user"
|
||||||
|
+ "&user=" + username
|
||||||
|
+ "&count=" + count
|
||||||
|
+ "&start=" + startIndex;
|
||||||
|
JSONObject nextJSON = Http.url(gwURL).getJSON();
|
||||||
|
if (nextJSON.has("error")) {
|
||||||
|
throw new IOException(nextJSON.getString("error"));
|
||||||
|
}
|
||||||
|
if (nextJSON.getJSONArray("posts").length() == 0) {
|
||||||
|
throw new IOException("No posts found");
|
||||||
|
}
|
||||||
|
return nextJSON;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject getNextPage(JSONObject json) throws IOException {
|
||||||
|
startIndex += count;
|
||||||
|
sleep(1000);
|
||||||
|
return getFirstPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromJSON(JSONObject json) {
|
||||||
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
|
JSONArray posts = json.getJSONArray("posts");
|
||||||
|
for (int i = 0; i < posts.length(); i++) {
|
||||||
|
JSONObject post = posts.getJSONObject(i);
|
||||||
|
JSONArray images = post.getJSONArray("images");
|
||||||
|
for (int j = 0; j < images.length(); j++) {
|
||||||
|
JSONObject image = images.getJSONObject(j);
|
||||||
|
String imagePath = image.getString("path");
|
||||||
|
if (imagePath.startsWith("..")) {
|
||||||
|
imagePath = imagePath.substring(2);
|
||||||
|
}
|
||||||
|
imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath;
|
||||||
|
imageURLs.add(imagePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user