Migrated existing rippers to use more abstraction, 2 new video rippers

This commit is contained in:
4pr0n 2014-06-24 19:05:54 -07:00
parent b9e3d77449
commit 7de64ffd5a
16 changed files with 701 additions and 628 deletions

View File

@ -23,7 +23,9 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
public abstract String getHost();
public abstract JSONObject getFirstPage() throws IOException;
public abstract JSONObject getNextPage(JSONObject json) throws IOException;
public JSONObject getNextPage(JSONObject doc) throws IOException {
throw new IOException("getNextPage not implemented");
}
public abstract List<String> getURLsFromJSON(JSONObject json);
public abstract void downloadURL(URL url, int index);
public DownloadThreadPool getThreadPool() {

View File

@ -3,6 +3,9 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -12,95 +15,23 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class HentaifoundryRipper extends AlbumRipper {
private static final String DOMAIN = "hentai-foundry.com",
HOST = "hentai-foundry";
public class HentaifoundryRipper extends AbstractHTMLRipper {
private Map<String,String> cookies = new HashMap<String,String>();
public HentaifoundryRipper(URL url) throws IOException {
super(url);
}
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
}
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*");
String nextURL = this.url.toExternalForm();
int index = 0;
// Get cookies
Response resp = Http.url("http://www.hentai-foundry.com/").response();
Map<String,String> cookies = resp.cookies();
resp = Http.url("http://www.hentai-foundry.com/?enterAgree=1&size=1500")
.referrer("http://www.hentai-foundry.com/")
.cookies(cookies)
.response();
cookies = resp.cookies();
logger.info("cookies: " + cookies);
// Iterate over every page
while (true) {
if (isStopped()) {
break;
}
sendUpdate(STATUS.LOADING_RESOURCE, nextURL);
Document doc = Http.url(nextURL)
.referrer(this.url)
.cookies(cookies)
.get();
for (Element thumb : doc.select("td > a:first-child")) {
if (isStopped()) {
break;
}
Matcher imgMatcher = imgRegex.matcher(thumb.attr("href"));
if (!imgMatcher.matches()) {
logger.info("Couldn't find user & image ID in " + thumb.attr("href"));
continue;
}
String user = imgMatcher.group(1),
imageId = imgMatcher.group(2);
String image = "http://pictures.hentai-foundry.com//";
logger.info("user: " + user + "; imageId: " + imageId + "; image: " + image);
image += user.toLowerCase().charAt(0);
image += "/" + user + "/" + imageId + ".jpg";
index += 1;
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(image), prefix);
}
if (doc.select("li.next.hidden").size() > 0) {
// Last page
break;
}
Elements els = doc.select("li.next > a");
logger.info("li.next > a : " + els);
Element first = els.first();
logger.info("li.next > a .first() : " + first);
nextURL = first.attr("href");
logger.info("first().attr(href) : " + nextURL);
nextURL = "http://www.hentai-foundry.com" + nextURL;
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
return "hentai-foundry";
}
@Override
public String getDomain() {
return "hentai-foundry.com";
}
@Override
@ -115,4 +46,67 @@ public class HentaifoundryRipper extends AlbumRipper {
+ "hentai-foundry.com/pictures/user/USERNAME"
+ " Got: " + url);
}
@Override
public Document getFirstPage() throws IOException {
Response resp = Http.url("http://www.hentai-foundry.com/").response();
cookies = resp.cookies();
resp = Http.url("http://www.hentai-foundry.com/?enterAgree=1&size=1500")
.referrer("http://www.hentai-foundry.com/")
.cookies(cookies)
.response();
cookies.putAll(resp.cookies());
sleep(500);
resp = Http.url(url)
.referrer("http://www.hentai-foundry.com/")
.cookies(cookies)
.response();
cookies.putAll(resp.cookies());
return resp.parse();
}
@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("li.next.hidden").size() > 0) {
// Last page
throw new IOException("No more pages");
}
Elements els = doc.select("li.next > a");
Element first = els.first();
String nextURL = first.attr("href");
nextURL = "http://www.hentai-foundry.com" + nextURL;
return Http.url(nextURL)
.referrer(url)
.cookies(cookies)
.get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*");
for (Element thumb : doc.select("td > a:first-child")) {
if (isStopped()) {
break;
}
Matcher imgMatcher = imgRegex.matcher(thumb.attr("href"));
if (!imgMatcher.matches()) {
logger.info("Couldn't find user & image ID in " + thumb.attr("href"));
continue;
}
String user = imgMatcher.group(1),
imageId = imgMatcher.group(2);
String image = "http://pictures.hentai-foundry.com//";
image += user.toLowerCase().charAt(0);
image += "/" + user + "/" + imageId + ".jpg";
imageURLs.add(image);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -3,28 +3,43 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class ImagearnRipper extends AlbumRipper {
private static final String DOMAIN = "imagearn.com",
HOST = "imagearn";
public class ImagearnRipper extends AbstractHTMLRipper {
public ImagearnRipper(URL url) throws IOException {
super(url);
}
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
@Override
public String getHost() {
return "imagearn";
}
@Override
public String getDomain() {
return "imagearn.com";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected imagearn.com gallery formats: "
+ "imagearn.com/gallery.php?id=####..."
+ " Got: " + url);
}
public URL sanitizeURL(URL url) throws MalformedURLException {
@ -56,42 +71,24 @@ public class ImagearnRipper extends AlbumRipper {
}
@Override
public void rip() throws IOException {
int index = 0;
logger.info("Retrieving " + this.url.toExternalForm());
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
Document doc = Http.url(this.url).get();
for (Element thumb : doc.select("img.border")) {
if (isStopped()) {
break;
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
for (Element thumb : doc.select("img.border")) {
String image = thumb.attr("src");
image = image.replaceAll("thumbs[0-9]*\\.imagearn\\.com/", "img.imagearn.com/imags/");
index += 1;
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
imageURLs.add(image);
}
addURLToDownload(new URL(image), prefix);
}
waitForThreads();
return imageURLs;
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected imagearn.com gallery formats: "
+ "imagearn.com/gallery.php?id=####..."
+ " Got: " + url);
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
sleep(1000);
}
}

View File

@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -10,61 +12,34 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class ImagebamRipper extends AlbumRipper {
private static final int IMAGE_SLEEP_TIME = 250,
PAGE_SLEEP_TIME = 3000;
private static final String DOMAIN = "imagebam.com", HOST = "imagebam";
// Thread pool for finding direct image links from "image" pages (html)
private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam");
public class ImagebamRipper extends AbstractHTMLRipper {
// Current HTML document
private Document albumDoc = null;
// Thread pool for finding direct image links from "image" pages (html)
private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam");
@Override
public DownloadThreadPool getThreadPool() {
return imagebamThreadPool;
}
public ImagebamRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return HOST;
return "imagebam";
}
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
if (albumDoc == null) {
logger.info(" Retrieving " + url.toExternalForm());
sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
albumDoc = Http.url(url).get();
}
Elements elems = albumDoc.select("legend");
String title = elems.first().text();
logger.info("Title text: '" + title + "'");
Pattern p = Pattern.compile("^(.*)\\s\\d* image.*$");
Matcher m = p.matcher(title);
if (m.matches()) {
logger.info("matches!");
return HOST + "_" + getGID(url) + " (" + m.group(1).trim() + ")";
}
logger.info("Doesn't match " + p.pattern());
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
@Override
public String getDomain() {
return "imagebam.com";
}
@Override
@ -85,71 +60,59 @@ public class ImagebamRipper extends AlbumRipper {
}
@Override
public void rip() throws IOException {
int index = 0;
String nextUrl = this.url.toExternalForm();
while (true) {
if (isStopped()) {
break;
}
public Document getFirstPage() throws IOException {
if (albumDoc == null) {
logger.info(" Retrieving album page " + nextUrl);
sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
albumDoc = Http.url(nextUrl)
.referrer(this.url)
.get();
}
// Find thumbnails
Elements thumbs = albumDoc.select("div > a[target=_blank]:not(.footera)");
if (thumbs.size() == 0) {
logger.info("No images found at " + nextUrl);
break;
}
// Iterate over images on page
for (Element thumb : thumbs) {
if (isStopped()) {
break;
}
index++;
ImagebamImageThread t = new ImagebamImageThread(new URL(thumb.attr("href")), index);
imagebamThreadPool.addThread(t);
try {
Thread.sleep(IMAGE_SLEEP_TIME);
} catch (InterruptedException e) {
logger.warn("Interrupted while waiting to load next image", e);
albumDoc = Http.url(url).get();
}
return albumDoc;
}
if (isStopped()) {
break;
}
@Override
public Document getNextPage(Document doc) throws IOException {
// Find next page
Elements hrefs = albumDoc.select("a.pagination_current + a.pagination_link");
Elements hrefs = doc.select("a.pagination_current + a.pagination_link");
if (hrefs.size() == 0) {
logger.info("No more pages found at " + nextUrl);
break;
throw new IOException("No more pages");
}
String nextUrl = "http://www.imagebam.com" + hrefs.first().attr("href");
sleep(500);
return Http.url(nextUrl).get();
}
nextUrl = "http://www.imagebam.com" + hrefs.first().attr("href");
logger.info("Found next page: " + nextUrl);
// Reset albumDoc so we fetch the page next time
albumDoc = null;
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) {
imageURLs.add(thumb.attr("href"));
}
return imageURLs;
}
// Sleep before loading next page
@Override
public void downloadURL(URL url, int index) {
ImagebamImageThread t = new ImagebamImageThread(url, index);
imagebamThreadPool.addThread(t);
sleep(500);
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Thread.sleep(PAGE_SLEEP_TIME);
} catch (InterruptedException e) {
logger.error("Interrupted while waiting to load next page", e);
break;
// Attempt to use album title as GID
Elements elems = getFirstPage().select("legend");
String title = elems.first().text();
logger.info("Title text: '" + title + "'");
Pattern p = Pattern.compile("^(.*)\\s\\d* image.*$");
Matcher m = p.matcher(title);
if (m.matches()) {
return getHost() + "_" + getGID(url) + " (" + m.group(1).trim() + ")";
}
logger.info("Doesn't match " + p.pattern());
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
imagebamThreadPool.waitForThreads();
waitForThreads();
}
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
return super.getAlbumTitle(url);
}
/**

View File

@ -3,22 +3,18 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class ImagefapRipper extends AlbumRipper {
private static final String DOMAIN = "imagefap.com",
HOST = "imagefap";
public class ImagefapRipper extends AbstractHTMLRipper {
private Document albumDoc = null;
@ -28,38 +24,25 @@ public class ImagefapRipper extends AlbumRipper {
@Override
public String getHost() {
return HOST;
return "imagefap";
}
@Override
public String getDomain() {
return "imagefap.com";
}
/**
* Reformat given URL into the desired format (all images on single page)
*/
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
String gid = getGID(url);
URL newURL = new URL("http://www.imagefap.com/gallery.php?gid="
+ gid + "&view=2");
logger.debug("Sanitized URL from " + url + " to " + newURL);
logger.debug("Changed URL from " + url + " to " + newURL);
return newURL;
}
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
if (albumDoc == null) {
albumDoc = Http.url(url).get();
}
String title = albumDoc.title();
Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
Matcher m = p.matcher(title);
if (m.matches()) {
return m.group(1);
}
} catch (IOException e) {
// Fall back to default album naming convention
}
return super.getAlbumTitle(url);
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p; Matcher m;
@ -90,32 +73,15 @@ public class ImagefapRipper extends AlbumRipper {
}
@Override
public void rip() throws IOException {
int index = 0;
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
logger.info("Retrieving " + this.url);
public Document getFirstPage() throws IOException {
if (albumDoc == null) {
albumDoc = Http.url(this.url).get();
albumDoc = Http.url(url).get();
}
while (true) {
if (isStopped()) {
break;
}
for (Element thumb : albumDoc.select("#gallery img")) {
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
continue;
}
String image = thumb.attr("src");
image = image.replaceAll(
"http://x.*.fap.to/images/thumb/",
"http://fap.to/images/full/");
index += 1;
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(image), prefix);
return albumDoc;
}
@Override
public Document getNextPage(Document doc) throws IOException {
String nextURL = null;
for (Element a : albumDoc.select("a.link3")) {
if (a.text().contains("next")) {
@ -125,24 +91,47 @@ public class ImagefapRipper extends AlbumRipper {
}
}
if (nextURL == null) {
break;
throw new IOException("No next page found");
}
else {
sleep(1000);
return Http.url(nextURL).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
for (Element thumb : albumDoc.select("#gallery img")) {
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
continue;
}
String image = thumb.attr("src");
image = image.replaceAll(
"http://x.*.fap.to/images/thumb/",
"http://fap.to/images/full/");
imageURLs.add(image);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("Interrupted while waiting to load next page", e);
throw new IOException(e);
// Attempt to use album title as GID
String title = getFirstPage().title();
Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
Matcher m = p.matcher(title);
if (m.matches()) {
return getHost() + "_" + m.group(1);
}
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
albumDoc = Jsoup.connect(nextURL).get();
} catch (IOException e) {
// Fall back to default album naming convention
}
}
waitForThreads();
}
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
return super.getAlbumTitle(url);
}
}

View File

@ -3,88 +3,32 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class ImagestashRipper extends AlbumRipper {
public class ImagestashRipper extends AbstractJSONRipper {
private static final String DOMAIN = "imagestash.org",
HOST = "imagestash";
private int page = 1;
public ImagestashRipper(URL url) throws IOException {
super(url);
}
public boolean canRip(URL url) {
return url.getHost().equals(DOMAIN);
}
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
// Given URL: https://imagestash.org/tag/everydayuncensor
// GID: "everydayuncensor"
// JSON URL: https://imagestash.org/images?tags=everydayuncensor&page=1
String baseURL = "https://imagestash.org/images?tags=" + getGID(this.url);
int page = 0, index = 0;
while (true) {
if (isStopped()) {
break;
}
page++;
String nextURL = baseURL + "&page=" + page;
logger.info("[ ] Retrieving " + nextURL);
sendUpdate(STATUS.LOADING_RESOURCE, nextURL);
JSONObject json = Http.url(nextURL).getJSON();
JSONArray images = json.getJSONArray("images");
for (int i = 0; i < images.length(); i++) {
if (isStopped()) {
break;
}
JSONObject image = images.getJSONObject(i);
String imageURL = image.getString("src");
if (imageURL.startsWith("/")) {
imageURL = "http://imagestash.org" + imageURL;
}
index += 1;
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(imageURL), prefix);
}
// Check if there are more images to fetch
int count = json.getInt("count"),
offset = json.getInt("offset"),
total = json.getInt("total");
if (count + offset >= total || images.length() == 0) {
break;
}
// Wait a bit
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("Interrupted while waiting to load next page", e);
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
return "imagestash";
}
@Override
public String getDomain() {
return "imagestash.org";
}
@Override
@ -99,4 +43,45 @@ public class ImagestashRipper extends AlbumRipper {
+ "imagestash.org/tag/tagname"
+ " Got: " + url);
}
@Override
public JSONObject getFirstPage() throws IOException {
String baseURL = "https://imagestash.org/images?tags="
+ getGID(url)
+ "&page=" + page;
return Http.url(baseURL).getJSON();
}
@Override
public JSONObject getNextPage(JSONObject json) throws IOException {
int count = json.getInt("count"),
offset = json.getInt("offset"),
total = json.getInt("total");
if (count + offset >= total || json.getJSONArray("images").length() == 0) {
throw new IOException("No more images");
}
sleep(1000);
page++;
return getFirstPage();
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
JSONArray images = json.getJSONArray("images");
for (int i = 0; i < images.length(); i++) {
JSONObject image = images.getJSONObject(i);
String imageURL = image.getString("src");
if (imageURL.startsWith("/")) {
imageURL = "http://imagestash.org" + imageURL;
}
imageURLs.add(imageURL);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -10,20 +12,19 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class ImagevenueRipper extends AlbumRipper {
private static final int IMAGE_SLEEP_TIME = 0;
private static final String DOMAIN = "imagevenue.com", HOST = "imagevenue";
public class ImagevenueRipper extends AbstractHTMLRipper {
// Thread pool for finding direct image links from "image" pages (html)
private DownloadThreadPool imagevenueThreadPool = new DownloadThreadPool("imagevenue");
@Override
public DownloadThreadPool getThreadPool() {
return imagevenueThreadPool;
}
public ImagevenueRipper(URL url) throws IOException {
super(url);
@ -31,11 +32,11 @@ public class ImagevenueRipper extends AlbumRipper {
@Override
public String getHost() {
return HOST;
return "imagevenue";
}
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
@Override
public String getDomain() {
return "imagevenue.com";
}
@Override
@ -56,41 +57,21 @@ public class ImagevenueRipper extends AlbumRipper {
}
@Override
public void rip() throws IOException {
int index = 0;
String nextUrl = this.url.toExternalForm();
logger.info(" Retrieving album page " + nextUrl);
sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
Document albumDoc = Http.url(nextUrl).get();
// Find thumbnails
Elements thumbs = albumDoc.select("a[target=_blank]");
if (thumbs.size() == 0) {
logger.info("No images found at " + nextUrl);
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
else {
// Iterate over images on page
for (Element thumb : thumbs) {
if (isStopped()) {
break;
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
for (Element thumb : doc.select("a[target=_blank]")) {
imageURLs.add(thumb.attr("href"));
}
index++;
ImagevenueImageThread t = new ImagevenueImageThread(new URL(thumb.attr("href")), index);
return imageURLs;
}
public void downloadURL(URL url, int index) {
ImagevenueImageThread t = new ImagevenueImageThread(url, index);
imagevenueThreadPool.addThread(t);
try {
Thread.sleep(IMAGE_SLEEP_TIME);
} catch (InterruptedException e) {
logger.warn("Interrupted while waiting to load next image", e);
break;
}
}
}
imagevenueThreadPool.waitForThreads();
waitForThreads();
}
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
}
/**
@ -115,8 +96,9 @@ public class ImagevenueRipper extends AlbumRipper {
private void fetchImage() {
try {
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
Document doc = Http.url(this.url).get();
Document doc = Http.url(url)
.retries(3)
.get();
// Find image
Elements images = doc.select("a > img");
if (images.size() == 0) {

View File

@ -3,66 +3,30 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class ImgboxRipper extends AlbumRipper {
private static final String DOMAIN = "imgbox.com",
HOST = "imgbox";
public class ImgboxRipper extends AbstractHTMLRipper {
public ImgboxRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
Document doc = Http.url(this.url).get();
Elements images = doc.select("div.boxed-content > a > img");
if (images.size() == 0) {
logger.error("No images found at " + this.url);
throw new IOException("No images found at " + this.url);
}
int index = 0;
for (Element image : images) {
if (isStopped()) {
break;
}
index++;
String imageUrl = image.attr("src").replace("s.imgbox.com", "i.imgbox.com");
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(imageUrl), prefix);
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
return "imgbox";
}
@Override
public String getDomain() {
return "imgbox.com";
}
@Override
@ -75,4 +39,24 @@ public class ImgboxRipper extends AlbumRipper {
throw new MalformedURLException("Expected imgbox.com URL format: " +
"imgbox.com/g/albumid - got " + url + "instead");
}
@Override
public Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
for (Element thumb : doc.select("div.boxed-content > a > img")) {
String image = thumb.attr("src")
.replace("s.imgbox.com",
"i.imgbox.com");
imageURLs.add(image);
}
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -11,26 +13,44 @@ import org.json.JSONObject;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class InstagramRipper extends AlbumRipper {
public class InstagramRipper extends AbstractJSONRipper {
private static final String DOMAIN = "instagram.com",
HOST = "instagram";
private String userID;
public InstagramRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return "instagram";
}
@Override
public String getDomain() {
return "instagram.com";
}
@Override
public boolean canRip(URL url) {
return (url.getHost().endsWith(DOMAIN)
return (url.getHost().endsWith("instagram.com")
|| url.getHost().endsWith("statigr.am")
|| url.getHost().endsWith("iconosquare.com"));
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://iconosquare.com/([a-zA-Z0-9\\-_.]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Unable to find user in " + url);
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://instagram\\.com/p/([a-zA-Z0-9\\-_.]{1,}).*$");
@ -84,67 +104,66 @@ public class InstagramRipper extends AlbumRipper {
}
@Override
public void rip() throws IOException {
String userID = getUserID(this.url);
String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id=" + userID;
String params = "";
while (true) {
String url = baseURL + params;
this.sendUpdate(STATUS.LOADING_RESOURCE, url);
logger.info(" Retrieving " + url);
JSONObject json = Http.url(url).getJSON();
JSONArray datas = json.getJSONArray("data");
String nextMaxID = "";
if (datas.length() == 0) {
break;
public JSONObject getFirstPage() throws IOException {
userID = getUserID(url);
String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id="
+ userID;
logger.info("Loading " + baseURL);
return Http.url(baseURL).getJSON();
}
@Override
public JSONObject getNextPage(JSONObject json) throws IOException {
JSONObject pagination = json.getJSONObject("pagination");
String nextMaxID = "";
JSONArray datas = json.getJSONArray("data");
for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i);
JSONObject data = datas.getJSONObject(i);
if (data.has("id")) {
nextMaxID = data.getString("id");
}
String imageUrl;
}
if (nextMaxID.equals("")) {
if (!pagination.has("next_max_id")) {
throw new IOException("No next_max_id found, stopping");
}
nextMaxID = pagination.getString("next_max_id");
}
String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id="
+ userID
+ "&max_id=" + nextMaxID;
logger.info("Loading " + baseURL);
sleep(1000);
JSONObject nextJSON = Http.url(baseURL).getJSON();
datas = nextJSON.getJSONArray("data");
if (datas.length() == 0) {
throw new IOException("No more images found");
}
return nextJSON;
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
JSONArray datas = json.getJSONArray("data");
for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i);
String imageURL;
if (data.has("videos")) {
imageUrl = data.getJSONObject("videos").getJSONObject("standard_resolution").getString("url");
imageURL = data.getJSONObject("videos").getJSONObject("standard_resolution").getString("url");
} else if (data.has("images")) {
imageUrl = data.getJSONObject("images").getJSONObject("standard_resolution").getString("url");
imageURL = data.getJSONObject("images").getJSONObject("standard_resolution").getString("url");
} else {
continue;
}
addURLToDownload(new URL(imageUrl));
imageURLs.add(imageURL);
}
JSONObject pagination = json.getJSONObject("pagination");
if (nextMaxID.equals("")) {
if (!pagination.has("next_max_id")) {
break;
} else {
nextMaxID = pagination.getString("next_max_id");
}
}
params = "&max_id=" + nextMaxID;
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
logger.error("[!] Interrupted while waiting to load next album:", e);
break;
}
}
waitForThreads();
return imageURLs;
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://iconosquare.com/([a-zA-Z0-9\\-_.]{3,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Unable to find user in " + url);
public void downloadURL(URL url, int index) {
addURLToDownload(url);
}
}

View File

@ -3,56 +3,66 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.json.JSONArray;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum;
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class IrarchivesRipper extends AlbumRipper {
private static final String DOMAIN = "i.rarchives.com",
HOST = "irarchives";
public class IrarchivesRipper extends AbstractJSONRipper {
public IrarchivesRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
public String getHost() {
return "irarchives";
}
@Override
public String getDomain() {
return "i.rarchives.com";
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
String u = url.toExternalForm();
String searchTerm = u.substring(u.indexOf("?") + 1);
searchTerm = searchTerm.replace("%3A", "=");
if (searchTerm.startsWith("url=")) {
if (!searchTerm.contains("http")
&& !searchTerm.contains(":")) {
searchTerm = searchTerm.replace("url=", "user=");
}
}
searchTerm = searchTerm.replace("user=user=", "user=");
return new URL("http://i.rarchives.com/search.cgi?" + searchTerm);
}
@Override
public void rip() throws IOException {
logger.info(" Retrieving " + this.url);
JSONObject json = Http.url(url)
public String getGID(URL url) throws MalformedURLException {
String u = url.toExternalForm();
String searchTerm = u.substring(u.indexOf("?") + 1);
return Utils.filesystemSafe(searchTerm);
}
@Override
public JSONObject getFirstPage() throws IOException {
return Http.url(url)
.timeout(60 * 1000)
.getJSON();
JSONArray posts = json.getJSONArray("posts");
if (posts.length() == 0) {
logger.error("No posts found at " + this.url);
sendUpdate(STATUS.DOWNLOAD_ERRORED, "No posts found at " + this.url);
throw new IOException("No posts found at this URL");
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
JSONArray posts = json.getJSONArray("posts");
for (int i = 0; i < posts.length(); i++) {
JSONObject post = (JSONObject) posts.get(i);
String theUrl = post.getString("url");
@ -65,34 +75,18 @@ public class IrarchivesRipper extends AlbumRipper {
sendUpdate(STATUS.DOWNLOAD_ERRORED, "Can't download " + theUrl + " : " + e.getMessage());
continue;
}
int albumIndex = 0;
for (ImgurImage image : album.images) {
albumIndex++;
String saveAs = String.format("%s-", post.getString("hexid"));
if (Utils.getConfigBoolean("download.save_order", true)) {
saveAs += String.format("%03d_", albumIndex);
}
addURLToDownload(image.url, saveAs);
imageURLs.add(image.url.toExternalForm());
}
}
else {
theUrl = post.getString("imageurl");
String saveAs = String.format("%s-", post.getString("hexid"));
addURLToDownload(new URL(theUrl), saveAs);
imageURLs.add(post.getString("imageurl"));
}
}
waitForThreads();
return imageURLs;
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
String u = url.toExternalForm();
String searchTerm = u.substring(u.indexOf("?") + 1);
return Utils.filesystemSafe(searchTerm);
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
}

View File

@ -5,26 +5,24 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.security.InvalidAlgorithmParameterException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.ssl.SSLException;
import javax.swing.JOptionPane;
import org.json.JSONArray;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class MediacrushRipper extends AlbumRipper {
private static final String DOMAIN = "mediacru.sh",
HOST = "mediacrush";
public class MediacrushRipper extends AbstractJSONRipper {
/** Ordered list of preferred formats, sorted by preference (low-to-high) */
private static final Map<String, Integer> PREFERRED_FORMATS = new HashMap<String,Integer>();
@ -45,8 +43,23 @@ public class MediacrushRipper extends AlbumRipper {
}
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
public String getHost() {
return "mediacrush";
}
@Override
public String getDomain() {
return "mediacru.sh";
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://[wm.]*mediacru\\.sh/([a-zA-Z0-9]+).*");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Could not find mediacru.sh page ID from " + url
+ " expected format: http://mediacru.sh/pageid");
}
@Override
@ -64,16 +77,16 @@ public class MediacrushRipper extends AlbumRipper {
}
@Override
public void rip() throws IOException {
String url = this.url.toExternalForm();
logger.info(" Retrieving " + url);
sendUpdate(STATUS.LOADING_RESOURCE, url);
JSONObject json = null;
public JSONObject getFirstPage() throws IOException {
try {
json = Http.url(url).getJSON();
} catch (Exception re) {
String jsonString = Http.url(url)
.ignoreContentType()
.connection()
.execute().body();
jsonString = jsonString.replace("&quot;", "\"");
return new JSONObject(jsonString);
} catch (SSLException re) {
// Check for >1024 bit encryption but in older versions of Java
if (re.getCause().getCause() instanceof InvalidAlgorithmParameterException) {
// It's the bug. Suggest downloading the latest version.
int selection = JOptionPane.showOptionDialog(null,
"You need to upgrade to the latest Java (7+) to rip this album.\n"
@ -91,16 +104,16 @@ public class MediacrushRipper extends AlbumRipper {
Desktop.getDesktop().browse(javaUrl.toURI());
} catch (URISyntaxException use) { }
}
return;
throw new IOException("Cannot rip due to limitations in Java installation, consider upgrading Java", re.getCause());
}
catch (Exception e) {
throw new IOException("Unexpected error: " + e.getMessage(), e);
}
throw new IOException("Unexpected error occurred", re);
}
// Convert to JSON
if (!json.has("files")) {
sendUpdate(STATUS.RIP_ERRORED, "No files found at " + url);
throw new IOException("Could not find any files at " + url);
}
@Override
public List<String> getURLsFromJSON(JSONObject json) {
List<String> imageURLs = new ArrayList<String>();
// Iterate over all files
JSONArray files = json.getJSONArray("files");
for (int i = 0; i < files.length(); i++) {
@ -109,18 +122,19 @@ public class MediacrushRipper extends AlbumRipper {
JSONArray subfiles = file.getJSONArray("files");
String preferredUrl = getPreferredUrl(subfiles);
if (preferredUrl == null) {
logger.warn("Could not find 'file' inside of " + file);
sendUpdate(STATUS.DOWNLOAD_ERRORED, "Could not find file inside of " + file);
continue;
}
// Download
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", i + 1);
imageURLs.add(preferredUrl);
}
addURLToDownload(new URL(preferredUrl), prefix);
return imageURLs;
}
waitForThreads();
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
/**
@ -149,20 +163,4 @@ public class MediacrushRipper extends AlbumRipper {
}
return preferredUrl;
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("https?://[wm.]*mediacru\\.sh/([a-zA-Z0-9]+).*");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Could not find mediacru.sh page ID from " + url
+ " expected format: http://mediacru.sh/pageid");
}
}

View File

@ -41,6 +41,25 @@ public class MotherlessRipper extends AlbumRipper {
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
/*
@Override
public Document getFirstPage() throws IOException {
}
@Override
public Document getNextPage(Document doc) throws IOException {
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
return imageURLs;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}
*/
@Override
public String getGID(URL url) throws MalformedURLException {

View File

@ -15,8 +15,7 @@ import com.rarchives.ripme.utils.Utils;
public class XhamsterRipper extends AlbumRipper {
private static final String DOMAIN = "xhamster.com",
HOST = "xhamster";
private static final String HOST = "xhamster";
public XhamsterRipper(URL url) throws IOException {
super(url);
@ -24,7 +23,9 @@ public class XhamsterRipper extends AlbumRipper {
@Override
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
Pattern p = Pattern.compile("^https?://[wm.]*xhamster\\.com/photos/gallery/[0-9]+.*$");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
}
@Override

View File

@ -0,0 +1,77 @@
package com.rarchives.ripme.ripper.rippers.video;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONObject;
import com.rarchives.ripme.ripper.VideoRipper;
import com.rarchives.ripme.utils.Base64;
import com.rarchives.ripme.utils.Http;
public class CliphunterRipper extends VideoRipper {
private static final String HOST = "cliphunter";
private static final String decryptString="{'$':':','&':'.','(':'=','-':'-','_':'_','^':'&','a':'h','c':'c','b':'b','e':'v','d':'e','g':'f','f':'o','i':'d','m':'a','l':'n','n':'m','q':'t','p':'u','r':'s','w':'w','v':'p','y':'l','x':'r','z':'i','=':'/','?':'?'}";
private static final JSONObject decryptDict = new JSONObject(decryptString);
public CliphunterRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return HOST;
}
@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://[wm.]*cliphunter\\.com/w/[0-9]+.*$");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[wm.]*cliphunter\\.com/w/([0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected cliphunter format:"
+ "cliphunter.com/w/####..."
+ " Got: " + url);
}
@Override
public void rip() throws IOException {
logger.info("Retrieving " + this.url);
String html = Http.url(url).get().html();
String jsonString = html.substring(html.indexOf("var flashVars = {d: '") + 21);
jsonString = jsonString.substring(0, jsonString.indexOf("'"));
JSONObject json = new JSONObject(new String(Base64.decode(jsonString)));
JSONObject jsonURL = new JSONObject(new String(Base64.decode(json.getString("url"))));
String encryptedURL = jsonURL.getJSONObject("u").getString("l");
String vidURL = "";
for (char c : encryptedURL.toCharArray()) {
if (decryptDict.has(Character.toString(c))) {
vidURL += decryptDict.getString(Character.toString(c));
}
else {
vidURL += c;
}
}
addURLToDownload(new URL(vidURL), HOST + "_" + getGID(this.url));
waitForThreads();
}
}

View File

@ -0,0 +1,66 @@
package com.rarchives.ripme.ripper.rippers.video;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.VideoRipper;
import com.rarchives.ripme.utils.Http;
public class XhamsterRipper extends VideoRipper {
private static final String HOST = "xhamster";
public XhamsterRipper(URL url) throws IOException {
super(url);
}
@Override
public String getHost() {
return HOST;
}
@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://[wm.]*xhamster\\.com/movies/[0-9]+.*$");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[wm.]*xhamster\\.com/movies/([0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected xhamster format:"
+ "xhamster.com/movies/####"
+ " Got: " + url);
}
@Override
public void rip() throws IOException {
logger.info("Retrieving " + this.url);
Document doc = Http.url(url).get();
Elements videos = doc.select("video");
if (videos.size() == 0) {
throw new IOException("Could not find Embed code at " + url);
}
String vidUrl = videos.first().attr("file");
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
waitForThreads();
}
}

View File

@ -103,6 +103,9 @@ public class Http {
}
// Getters
public Connection connection() {
return connection;
}
public Document get() throws IOException {
connection.method(Method.GET);
return response().parse();