From 7de64ffd5adbf72da05452133983ef4f7890e46a Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Tue, 24 Jun 2014 19:05:54 -0700 Subject: [PATCH] Migrated existing rippers to use more abstraction, 2 new video rippers --- .../ripme/ripper/AbstractJSONRipper.java | 4 +- .../ripper/rippers/HentaifoundryRipper.java | 154 ++++++++-------- .../ripme/ripper/rippers/ImagearnRipper.java | 79 ++++---- .../ripme/ripper/rippers/ImagebamRipper.java | 169 +++++++----------- .../ripme/ripper/rippers/ImagefapRipper.java | 139 +++++++------- .../ripper/rippers/ImagestashRipper.java | 117 ++++++------ .../ripper/rippers/ImagevenueRipper.java | 78 ++++---- .../ripme/ripper/rippers/ImgboxRipper.java | 74 +++----- .../ripme/ripper/rippers/InstagramRipper.java | 145 ++++++++------- .../ripper/rippers/IrarchivesRipper.java | 70 ++++---- .../ripper/rippers/MediacrushRipper.java | 128 +++++++------ .../ripper/rippers/MotherlessRipper.java | 19 ++ .../ripme/ripper/rippers/XhamsterRipper.java | 7 +- .../rippers/video/CliphunterRipper.java | 77 ++++++++ .../ripper/rippers/video/XhamsterRipper.java | 66 +++++++ .../java/com/rarchives/ripme/utils/Http.java | 3 + 16 files changed, 701 insertions(+), 628 deletions(-) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/video/CliphunterRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/video/XhamsterRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java index fea5c49e..7d9a5640 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java @@ -23,7 +23,9 @@ public abstract class AbstractJSONRipper extends AlbumRipper { public abstract String getHost(); public abstract JSONObject getFirstPage() throws IOException; - public abstract JSONObject getNextPage(JSONObject json) throws IOException; + public JSONObject getNextPage(JSONObject doc) throws IOException { + throw new IOException("getNextPage not implemented"); + } public abstract List getURLsFromJSON(JSONObject json); public abstract void downloadURL(URL url, int index); public DownloadThreadPool getThreadPool() { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java index f98b5e86..d476969d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java @@ -3,6 +3,9 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -12,95 +15,23 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class HentaifoundryRipper extends AlbumRipper { - - private static final String DOMAIN = "hentai-foundry.com", - HOST = "hentai-foundry"; +public class HentaifoundryRipper extends AbstractHTMLRipper { + private Map cookies = new HashMap(); public HentaifoundryRipper(URL url) throws IOException { super(url); } - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); - } - - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - @Override - public void rip() throws IOException { - Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*"); - String nextURL = this.url.toExternalForm(); - int index = 0; - - // Get cookies - Response resp = Http.url("http://www.hentai-foundry.com/").response(); - Map cookies = resp.cookies(); - resp = Http.url("http://www.hentai-foundry.com/?enterAgree=1&size=1500") - .referrer("http://www.hentai-foundry.com/") - .cookies(cookies) - .response(); - cookies = resp.cookies(); - logger.info("cookies: " + cookies); - - // Iterate over every page - while (true) { - if (isStopped()) { - break; - } - sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - Document doc = Http.url(nextURL) - .referrer(this.url) - .cookies(cookies) - .get(); - for (Element thumb : doc.select("td > a:first-child")) { - if (isStopped()) { - break; - } - Matcher imgMatcher = imgRegex.matcher(thumb.attr("href")); - if (!imgMatcher.matches()) { - logger.info("Couldn't find user & image ID in " + thumb.attr("href")); - continue; - } - String user = imgMatcher.group(1), - imageId = imgMatcher.group(2); - String image = "http://pictures.hentai-foundry.com//"; - logger.info("user: " + user + "; imageId: " + imageId + "; image: " + image); - image += user.toLowerCase().charAt(0); - image += "/" + user + "/" + imageId + ".jpg"; - index += 1; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(image), prefix); - } - - if (doc.select("li.next.hidden").size() > 0) { - // Last page - break; - } - Elements els = doc.select("li.next > a"); - logger.info("li.next > a : " + els); - Element first = els.first(); - logger.info("li.next > a .first() : " + first); - nextURL = first.attr("href"); - logger.info("first().attr(href) : " + nextURL); - nextURL = "http://www.hentai-foundry.com" + nextURL; - } - waitForThreads(); - } - @Override public String getHost() { - return HOST; + return "hentai-foundry"; + } + @Override + public String getDomain() { + return "hentai-foundry.com"; } @Override @@ -115,4 +46,67 @@ public class HentaifoundryRipper extends AlbumRipper { + "hentai-foundry.com/pictures/user/USERNAME" + " Got: " + url); } + + @Override + public Document getFirstPage() throws IOException { + Response resp = Http.url("http://www.hentai-foundry.com/").response(); + cookies = resp.cookies(); + resp = Http.url("http://www.hentai-foundry.com/?enterAgree=1&size=1500") + .referrer("http://www.hentai-foundry.com/") + .cookies(cookies) + .response(); + cookies.putAll(resp.cookies()); + sleep(500); + resp = Http.url(url) + .referrer("http://www.hentai-foundry.com/") + .cookies(cookies) + .response(); + cookies.putAll(resp.cookies()); + return resp.parse(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + if (doc.select("li.next.hidden").size() > 0) { + // Last page + throw new IOException("No more pages"); + } + Elements els = doc.select("li.next > a"); + Element first = els.first(); + String nextURL = first.attr("href"); + nextURL = "http://www.hentai-foundry.com" + nextURL; + return Http.url(nextURL) + .referrer(url) + .cookies(cookies) + .get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*"); + for (Element thumb : doc.select("td > a:first-child")) { + if (isStopped()) { + break; + } + Matcher imgMatcher = imgRegex.matcher(thumb.attr("href")); + if (!imgMatcher.matches()) { + logger.info("Couldn't find user & image ID in " + thumb.attr("href")); + continue; + } + String user = imgMatcher.group(1), + imageId = imgMatcher.group(2); + String image = "http://pictures.hentai-foundry.com//"; + image += user.toLowerCase().charAt(0); + image += "/" + user + "/" + imageId + ".jpg"; + imageURLs.add(image); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java index d9d35375..9fede305 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java @@ -3,28 +3,43 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class ImagearnRipper extends AlbumRipper { - - private static final String DOMAIN = "imagearn.com", - HOST = "imagearn"; +public class ImagearnRipper extends AbstractHTMLRipper { public ImagearnRipper(URL url) throws IOException { super(url); } - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + @Override + public String getHost() { + return "imagearn"; + } + @Override + public String getDomain() { + return "imagearn.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException( + "Expected imagearn.com gallery formats: " + + "imagearn.com/gallery.php?id=####..." + + " Got: " + url); } public URL sanitizeURL(URL url) throws MalformedURLException { @@ -54,44 +69,26 @@ public class ImagearnRipper extends AlbumRipper { } throw new IOException("Failed to find gallery at URL " + url); } - + @Override - public void rip() throws IOException { - int index = 0; - logger.info("Retrieving " + this.url.toExternalForm()); - sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - Document doc = Http.url(this.url).get(); + public Document getFirstPage() throws IOException { + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); for (Element thumb : doc.select("img.border")) { - if (isStopped()) { - break; - } String image = thumb.attr("src"); image = image.replaceAll("thumbs[0-9]*\\.imagearn\\.com/", "img.imagearn.com/imags/"); - index += 1; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(image), prefix); + imageURLs.add(image); } - waitForThreads(); + return imageURLs; } - + @Override - public String getHost() { - return HOST; - } - - @Override - public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } - throw new MalformedURLException( - "Expected imagearn.com gallery formats: " - + "imagearn.com/gallery.php?id=####..." - + " Got: " + url); + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + sleep(1000); } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java index 0b0c4fbd..dd4ad482 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java @@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -10,61 +12,34 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class ImagebamRipper extends AlbumRipper { - - private static final int IMAGE_SLEEP_TIME = 250, - PAGE_SLEEP_TIME = 3000; - - private static final String DOMAIN = "imagebam.com", HOST = "imagebam"; - - // Thread pool for finding direct image links from "image" pages (html) - private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam"); +public class ImagebamRipper extends AbstractHTMLRipper { // Current HTML document private Document albumDoc = null; + // Thread pool for finding direct image links from "image" pages (html) + private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam"); + @Override + public DownloadThreadPool getThreadPool() { + return imagebamThreadPool; + } + public ImagebamRipper(URL url) throws IOException { super(url); } @Override public String getHost() { - return HOST; + return "imagebam"; } - - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - public String getAlbumTitle(URL url) throws MalformedURLException { - try { - // Attempt to use album title as GID - if (albumDoc == null) { - logger.info(" Retrieving " + url.toExternalForm()); - sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = Http.url(url).get(); - } - Elements elems = albumDoc.select("legend"); - String title = elems.first().text(); - logger.info("Title text: '" + title + "'"); - Pattern p = Pattern.compile("^(.*)\\s\\d* image.*$"); - Matcher m = p.matcher(title); - if (m.matches()) { - logger.info("matches!"); - return HOST + "_" + getGID(url) + " (" + m.group(1).trim() + ")"; - } - logger.info("Doesn't match " + p.pattern()); - } catch (Exception e) { - // Fall back to default album naming convention - logger.warn("Failed to get album title from " + url, e); - } - return super.getAlbumTitle(url); + @Override + public String getDomain() { + return "imagebam.com"; } @Override @@ -83,73 +58,61 @@ public class ImagebamRipper extends AlbumRipper { + "http://www.imagebam.com/gallery/galleryid" + " Got: " + url); } - + @Override - public void rip() throws IOException { - int index = 0; - String nextUrl = this.url.toExternalForm(); - while (true) { - if (isStopped()) { - break; - } - if (albumDoc == null) { - logger.info(" Retrieving album page " + nextUrl); - sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = Http.url(nextUrl) - .referrer(this.url) - .get(); - } - // Find thumbnails - Elements thumbs = albumDoc.select("div > a[target=_blank]:not(.footera)"); - if (thumbs.size() == 0) { - logger.info("No images found at " + nextUrl); - break; - } - // Iterate over images on page - for (Element thumb : thumbs) { - if (isStopped()) { - break; - } - index++; - ImagebamImageThread t = new ImagebamImageThread(new URL(thumb.attr("href")), index); - imagebamThreadPool.addThread(t); - try { - Thread.sleep(IMAGE_SLEEP_TIME); - } catch (InterruptedException e) { - logger.warn("Interrupted while waiting to load next image", e); - } - } - - if (isStopped()) { - break; - } - // Find next page - Elements hrefs = albumDoc.select("a.pagination_current + a.pagination_link"); - if (hrefs.size() == 0) { - logger.info("No more pages found at " + nextUrl); - break; - } - nextUrl = "http://www.imagebam.com" + hrefs.first().attr("href"); - logger.info("Found next page: " + nextUrl); - - // Reset albumDoc so we fetch the page next time - albumDoc = null; - - // Sleep before loading next page - try { - Thread.sleep(PAGE_SLEEP_TIME); - } catch (InterruptedException e) { - logger.error("Interrupted while waiting to load next page", e); - break; - } + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + albumDoc = Http.url(url).get(); } - - imagebamThreadPool.waitForThreads(); - waitForThreads(); + return albumDoc; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // Find next page + Elements hrefs = doc.select("a.pagination_current + a.pagination_link"); + if (hrefs.size() == 0) { + throw new IOException("No more pages"); + } + String nextUrl = "http://www.imagebam.com" + hrefs.first().attr("href"); + sleep(500); + return Http.url(nextUrl).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) { + imageURLs.add(thumb.attr("href")); + } + return imageURLs; } - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + @Override + public void downloadURL(URL url, int index) { + ImagebamImageThread t = new ImagebamImageThread(url, index); + imagebamThreadPool.addThread(t); + sleep(500); + } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + Elements elems = getFirstPage().select("legend"); + String title = elems.first().text(); + logger.info("Title text: '" + title + "'"); + Pattern p = Pattern.compile("^(.*)\\s\\d* image.*$"); + Matcher m = p.matcher(title); + if (m.matches()) { + return getHost() + "_" + getGID(url) + " (" + m.group(1).trim() + ")"; + } + logger.info("Doesn't match " + p.pattern()); + } catch (Exception e) { + // Fall back to default album naming convention + logger.warn("Failed to get album title from " + url, e); + } + return super.getAlbumTitle(url); } /** diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java index c527741c..065451ad 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java @@ -3,22 +3,18 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class ImagefapRipper extends AlbumRipper { - - private static final String DOMAIN = "imagefap.com", - HOST = "imagefap"; +public class ImagefapRipper extends AbstractHTMLRipper { private Document albumDoc = null; @@ -28,37 +24,24 @@ public class ImagefapRipper extends AlbumRipper { @Override public String getHost() { - return HOST; + return "imagefap"; + } + @Override + public String getDomain() { + return "imagefap.com"; } /** * Reformat given URL into the desired format (all images on single page) */ + @Override public URL sanitizeURL(URL url) throws MalformedURLException { String gid = getGID(url); URL newURL = new URL("http://www.imagefap.com/gallery.php?gid=" + gid + "&view=2"); - logger.debug("Sanitized URL from " + url + " to " + newURL); + logger.debug("Changed URL from " + url + " to " + newURL); return newURL; } - - public String getAlbumTitle(URL url) throws MalformedURLException { - try { - // Attempt to use album title as GID - if (albumDoc == null) { - albumDoc = Http.url(url).get(); - } - String title = albumDoc.title(); - Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$"); - Matcher m = p.matcher(title); - if (m.matches()) { - return m.group(1); - } - } catch (IOException e) { - // Fall back to default album naming convention - } - return super.getAlbumTitle(url); - } @Override public String getGID(URL url) throws MalformedURLException { @@ -88,61 +71,67 @@ public class ImagefapRipper extends AlbumRipper { + "imagefap.com/pictures/####..." + " Got: " + url); } - + @Override - public void rip() throws IOException { - int index = 0; - sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - logger.info("Retrieving " + this.url); + public Document getFirstPage() throws IOException { if (albumDoc == null) { - albumDoc = Http.url(this.url).get(); + albumDoc = Http.url(url).get(); } - while (true) { - if (isStopped()) { + return albumDoc; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + String nextURL = null; + for (Element a : albumDoc.select("a.link3")) { + if (a.text().contains("next")) { + nextURL = a.attr("href"); + nextURL = "http://imagefap.com/gallery.php" + nextURL; break; } - for (Element thumb : albumDoc.select("#gallery img")) { - if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) { - continue; - } - String image = thumb.attr("src"); - image = image.replaceAll( - "http://x.*.fap.to/images/thumb/", - "http://fap.to/images/full/"); - index += 1; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(image), prefix); - } - String nextURL = null; - for (Element a : albumDoc.select("a.link3")) { - if (a.text().contains("next")) { - nextURL = a.attr("href"); - nextURL = "http://imagefap.com/gallery.php" + nextURL; - break; - } - } - if (nextURL == null) { - break; - } - else { - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - logger.error("Interrupted while waiting to load next page", e); - throw new IOException(e); - } - sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - albumDoc = Jsoup.connect(nextURL).get(); - } } - waitForThreads(); + if (nextURL == null) { + throw new IOException("No next page found"); + } + sleep(1000); + return Http.url(nextURL).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + for (Element thumb : albumDoc.select("#gallery img")) { + if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) { + continue; + } + String image = thumb.attr("src"); + image = image.replaceAll( + "http://x.*.fap.to/images/thumb/", + "http://fap.to/images/full/"); + imageURLs.add(image); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); } - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + String title = getFirstPage().title(); + Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$"); + Matcher m = p.matcher(title); + if (m.matches()) { + return getHost() + "_" + m.group(1); + } + } catch (IOException e) { + // Fall back to default album naming convention + } + return super.getAlbumTitle(url); } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java index 1023cb3c..b4bb1fd1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java @@ -3,88 +3,32 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.ripper.AbstractJSONRipper; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class ImagestashRipper extends AlbumRipper { +public class ImagestashRipper extends AbstractJSONRipper { - private static final String DOMAIN = "imagestash.org", - HOST = "imagestash"; + private int page = 1; public ImagestashRipper(URL url) throws IOException { super(url); } - public boolean canRip(URL url) { - return url.getHost().equals(DOMAIN); - } - - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - @Override - public void rip() throws IOException { - // Given URL: https://imagestash.org/tag/everydayuncensor - // GID: "everydayuncensor" - // JSON URL: https://imagestash.org/images?tags=everydayuncensor&page=1 - String baseURL = "https://imagestash.org/images?tags=" + getGID(this.url); - int page = 0, index = 0; - while (true) { - if (isStopped()) { - break; - } - page++; - String nextURL = baseURL + "&page=" + page; - logger.info("[ ] Retrieving " + nextURL); - sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - JSONObject json = Http.url(nextURL).getJSON(); - JSONArray images = json.getJSONArray("images"); - for (int i = 0; i < images.length(); i++) { - if (isStopped()) { - break; - } - JSONObject image = images.getJSONObject(i); - String imageURL = image.getString("src"); - if (imageURL.startsWith("/")) { - imageURL = "http://imagestash.org" + imageURL; - } - index += 1; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(imageURL), prefix); - } - // Check if there are more images to fetch - int count = json.getInt("count"), - offset = json.getInt("offset"), - total = json.getInt("total"); - if (count + offset >= total || images.length() == 0) { - break; - } - // Wait a bit - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - logger.error("Interrupted while waiting to load next page", e); - break; - } - } - waitForThreads(); - } - @Override public String getHost() { - return HOST; + return "imagestash"; + } + @Override + public String getDomain() { + return "imagestash.org"; } @Override @@ -99,4 +43,45 @@ public class ImagestashRipper extends AlbumRipper { + "imagestash.org/tag/tagname" + " Got: " + url); } + + @Override + public JSONObject getFirstPage() throws IOException { + String baseURL = "https://imagestash.org/images?tags=" + + getGID(url) + + "&page=" + page; + return Http.url(baseURL).getJSON(); + } + + @Override + public JSONObject getNextPage(JSONObject json) throws IOException { + int count = json.getInt("count"), + offset = json.getInt("offset"), + total = json.getInt("total"); + if (count + offset >= total || json.getJSONArray("images").length() == 0) { + throw new IOException("No more images"); + } + sleep(1000); + page++; + return getFirstPage(); + } + + @Override + public List getURLsFromJSON(JSONObject json) { + List imageURLs = new ArrayList(); + JSONArray images = json.getJSONArray("images"); + for (int i = 0; i < images.length(); i++) { + JSONObject image = images.getJSONObject(i); + String imageURL = image.getString("src"); + if (imageURL.startsWith("/")) { + imageURL = "http://imagestash.org" + imageURL; + } + imageURLs.add(imageURL); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java index 749b0dd4..0bb2b6a4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java @@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -10,20 +12,19 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class ImagevenueRipper extends AlbumRipper { - - private static final int IMAGE_SLEEP_TIME = 0; - - private static final String DOMAIN = "imagevenue.com", HOST = "imagevenue"; +public class ImagevenueRipper extends AbstractHTMLRipper { // Thread pool for finding direct image links from "image" pages (html) private DownloadThreadPool imagevenueThreadPool = new DownloadThreadPool("imagevenue"); + @Override + public DownloadThreadPool getThreadPool() { + return imagevenueThreadPool; + } public ImagevenueRipper(URL url) throws IOException { super(url); @@ -31,11 +32,11 @@ public class ImagevenueRipper extends AlbumRipper { @Override public String getHost() { - return HOST; + return "imagevenue"; } - - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; + @Override + public String getDomain() { + return "imagevenue.com"; } @Override @@ -54,43 +55,23 @@ public class ImagevenueRipper extends AlbumRipper { + "http://...imagevenue.com/galshow.php?gal=gallery_...." + " Got: " + url); } - + @Override - public void rip() throws IOException { - int index = 0; - String nextUrl = this.url.toExternalForm(); - logger.info(" Retrieving album page " + nextUrl); - sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - Document albumDoc = Http.url(nextUrl).get(); - // Find thumbnails - Elements thumbs = albumDoc.select("a[target=_blank]"); - if (thumbs.size() == 0) { - logger.info("No images found at " + nextUrl); - } - else { - // Iterate over images on page - for (Element thumb : thumbs) { - if (isStopped()) { - break; - } - index++; - ImagevenueImageThread t = new ImagevenueImageThread(new URL(thumb.attr("href")), index); - imagevenueThreadPool.addThread(t); - try { - Thread.sleep(IMAGE_SLEEP_TIME); - } catch (InterruptedException e) { - logger.warn("Interrupted while waiting to load next image", e); - break; - } - } - } - - imagevenueThreadPool.waitForThreads(); - waitForThreads(); + public Document getFirstPage() throws IOException { + return Http.url(url).get(); } - - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + for (Element thumb : doc.select("a[target=_blank]")) { + imageURLs.add(thumb.attr("href")); + } + return imageURLs; + } + + public void downloadURL(URL url, int index) { + ImagevenueImageThread t = new ImagevenueImageThread(url, index); + imagevenueThreadPool.addThread(t); } /** @@ -115,8 +96,9 @@ public class ImagevenueRipper extends AlbumRipper { private void fetchImage() { try { - sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - Document doc = Http.url(this.url).get(); + Document doc = Http.url(url) + .retries(3) + .get(); // Find image Elements images = doc.select("a > img"); if (images.size() == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java index e7451979..dfbacf24 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java @@ -3,66 +3,30 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class ImgboxRipper extends AlbumRipper { +public class ImgboxRipper extends AbstractHTMLRipper { - private static final String DOMAIN = "imgbox.com", - HOST = "imgbox"; - public ImgboxRipper(URL url) throws IOException { super(url); } - @Override - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); - } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - @Override - public void rip() throws IOException { - sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); - Document doc = Http.url(this.url).get(); - Elements images = doc.select("div.boxed-content > a > img"); - if (images.size() == 0) { - logger.error("No images found at " + this.url); - throw new IOException("No images found at " + this.url); - } - int index = 0; - for (Element image : images) { - if (isStopped()) { - break; - } - index++; - String imageUrl = image.attr("src").replace("s.imgbox.com", "i.imgbox.com"); - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(imageUrl), prefix); - } - - waitForThreads(); - } - @Override public String getHost() { - return HOST; + return "imgbox"; + } + @Override + public String getDomain() { + return "imgbox.com"; } @Override @@ -75,4 +39,24 @@ public class ImgboxRipper extends AlbumRipper { throw new MalformedURLException("Expected imgbox.com URL format: " + "imgbox.com/g/albumid - got " + url + "instead"); } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(url).get(); + } + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + for (Element thumb : doc.select("div.boxed-content > a > img")) { + String image = thumb.attr("src") + .replace("s.imgbox.com", + "i.imgbox.com"); + imageURLs.add(image); + } + return imageURLs; + } + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 4ef7c5e1..95a2a0c9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -11,26 +13,44 @@ import org.json.JSONObject; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractJSONRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; -public class InstagramRipper extends AlbumRipper { +public class InstagramRipper extends AbstractJSONRipper { - private static final String DOMAIN = "instagram.com", - HOST = "instagram"; + private String userID; public InstagramRipper(URL url) throws IOException { super(url); } + @Override + public String getHost() { + return "instagram"; + } + @Override + public String getDomain() { + return "instagram.com"; + } + @Override public boolean canRip(URL url) { - return (url.getHost().endsWith(DOMAIN) + return (url.getHost().endsWith("instagram.com") || url.getHost().endsWith("statigr.am") || url.getHost().endsWith("iconosquare.com")); } + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://iconosquare.com/([a-zA-Z0-9\\-_.]{3,}).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Unable to find user in " + url); + } + @Override public URL sanitizeURL(URL url) throws MalformedURLException { Pattern p = Pattern.compile("^https?://instagram\\.com/p/([a-zA-Z0-9\\-_.]{1,}).*$"); @@ -82,69 +102,68 @@ public class InstagramRipper extends AlbumRipper { } throw new IOException("Unable to find userID at " + this.url); } - + @Override - public void rip() throws IOException { - String userID = getUserID(this.url); - String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id=" + userID; - String params = ""; - while (true) { - String url = baseURL + params; - this.sendUpdate(STATUS.LOADING_RESOURCE, url); - logger.info(" Retrieving " + url); - JSONObject json = Http.url(url).getJSON(); - JSONArray datas = json.getJSONArray("data"); - String nextMaxID = ""; - if (datas.length() == 0) { - break; - } - for (int i = 0; i < datas.length(); i++) { - JSONObject data = (JSONObject) datas.get(i); - if (data.has("id")) { - nextMaxID = data.getString("id"); - } - String imageUrl; - if (data.has("videos")) { - imageUrl = data.getJSONObject("videos").getJSONObject("standard_resolution").getString("url"); - } else if (data.has("images")) { - imageUrl = data.getJSONObject("images").getJSONObject("standard_resolution").getString("url"); - } else { - continue; - } - addURLToDownload(new URL(imageUrl)); - } - JSONObject pagination = json.getJSONObject("pagination"); - if (nextMaxID.equals("")) { - if (!pagination.has("next_max_id")) { - break; - } else { - nextMaxID = pagination.getString("next_max_id"); - } - } - params = "&max_id=" + nextMaxID; - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - logger.error("[!] Interrupted while waiting to load next album:", e); - break; - } - } - waitForThreads(); + public JSONObject getFirstPage() throws IOException { + userID = getUserID(url); + String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id=" + + userID; + logger.info("Loading " + baseURL); + return Http.url(baseURL).getJSON(); } @Override - public String getHost() { - return HOST; - } - - @Override - public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://iconosquare.com/([a-zA-Z0-9\\-_.]{3,}).*$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); + public JSONObject getNextPage(JSONObject json) throws IOException { + JSONObject pagination = json.getJSONObject("pagination"); + String nextMaxID = ""; + JSONArray datas = json.getJSONArray("data"); + for (int i = 0; i < datas.length(); i++) { + JSONObject data = datas.getJSONObject(i); + if (data.has("id")) { + nextMaxID = data.getString("id"); + } } - throw new MalformedURLException("Unable to find user in " + url); + if (nextMaxID.equals("")) { + if (!pagination.has("next_max_id")) { + throw new IOException("No next_max_id found, stopping"); + } + nextMaxID = pagination.getString("next_max_id"); + } + String baseURL = "http://iconosquare.com/controller_nl.php?action=getPhotoUserPublic&user_id=" + + userID + + "&max_id=" + nextMaxID; + logger.info("Loading " + baseURL); + sleep(1000); + JSONObject nextJSON = Http.url(baseURL).getJSON(); + datas = nextJSON.getJSONArray("data"); + if (datas.length() == 0) { + throw new IOException("No more images found"); + } + return nextJSON; + } + + @Override + public List getURLsFromJSON(JSONObject json) { + List imageURLs = new ArrayList(); + JSONArray datas = json.getJSONArray("data"); + for (int i = 0; i < datas.length(); i++) { + JSONObject data = (JSONObject) datas.get(i); + String imageURL; + if (data.has("videos")) { + imageURL = data.getJSONObject("videos").getJSONObject("standard_resolution").getString("url"); + } else if (data.has("images")) { + imageURL = data.getJSONObject("images").getJSONObject("standard_resolution").getString("url"); + } else { + continue; + } + imageURLs.add(imageURL); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url); } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java index 5096830f..b0360cb6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java @@ -3,56 +3,66 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import org.json.JSONArray; import org.json.JSONObject; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractJSONRipper; import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class IrarchivesRipper extends AlbumRipper { - - private static final String DOMAIN = "i.rarchives.com", - HOST = "irarchives"; +public class IrarchivesRipper extends AbstractJSONRipper { public IrarchivesRipper(URL url) throws IOException { super(url); } @Override - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + public String getHost() { + return "irarchives"; + } + @Override + public String getDomain() { + return "i.rarchives.com"; } @Override public URL sanitizeURL(URL url) throws MalformedURLException { String u = url.toExternalForm(); String searchTerm = u.substring(u.indexOf("?") + 1); + searchTerm = searchTerm.replace("%3A", "="); if (searchTerm.startsWith("url=")) { if (!searchTerm.contains("http") && !searchTerm.contains(":")) { searchTerm = searchTerm.replace("url=", "user="); } } + searchTerm = searchTerm.replace("user=user=", "user="); return new URL("http://i.rarchives.com/search.cgi?" + searchTerm); } @Override - public void rip() throws IOException { - logger.info(" Retrieving " + this.url); - JSONObject json = Http.url(url) - .timeout(60 * 1000) - .getJSON(); + public String getGID(URL url) throws MalformedURLException { + String u = url.toExternalForm(); + String searchTerm = u.substring(u.indexOf("?") + 1); + return Utils.filesystemSafe(searchTerm); + } + + @Override + public JSONObject getFirstPage() throws IOException { + return Http.url(url) + .timeout(60 * 1000) + .getJSON(); + } + @Override + public List getURLsFromJSON(JSONObject json) { + List imageURLs = new ArrayList(); JSONArray posts = json.getJSONArray("posts"); - if (posts.length() == 0) { - logger.error("No posts found at " + this.url); - sendUpdate(STATUS.DOWNLOAD_ERRORED, "No posts found at " + this.url); - throw new IOException("No posts found at this URL"); - } for (int i = 0; i < posts.length(); i++) { JSONObject post = (JSONObject) posts.get(i); String theUrl = post.getString("url"); @@ -65,34 +75,18 @@ public class IrarchivesRipper extends AlbumRipper { sendUpdate(STATUS.DOWNLOAD_ERRORED, "Can't download " + theUrl + " : " + e.getMessage()); continue; } - int albumIndex = 0; for (ImgurImage image : album.images) { - albumIndex++; - String saveAs = String.format("%s-", post.getString("hexid")); - if (Utils.getConfigBoolean("download.save_order", true)) { - saveAs += String.format("%03d_", albumIndex); - } - addURLToDownload(image.url, saveAs); + imageURLs.add(image.url.toExternalForm()); } } else { - theUrl = post.getString("imageurl"); - String saveAs = String.format("%s-", post.getString("hexid")); - addURLToDownload(new URL(theUrl), saveAs); + imageURLs.add(post.getString("imageurl")); } } - waitForThreads(); + return imageURLs; } - @Override - public String getHost() { - return HOST; - } - - @Override - public String getGID(URL url) throws MalformedURLException { - String u = url.toExternalForm(); - String searchTerm = u.substring(u.indexOf("?") + 1); - return Utils.filesystemSafe(searchTerm); + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java index e801b2ad..8970c22b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java @@ -5,27 +5,25 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; -import java.security.InvalidAlgorithmParameterException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.net.ssl.SSLException; import javax.swing.JOptionPane; import org.json.JSONArray; import org.json.JSONObject; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractJSONRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class MediacrushRipper extends AlbumRipper { +public class MediacrushRipper extends AbstractJSONRipper { - private static final String DOMAIN = "mediacru.sh", - HOST = "mediacrush"; - /** Ordered list of preferred formats, sorted by preference (low-to-high) */ private static final Map PREFERRED_FORMATS = new HashMap(); static { @@ -45,8 +43,23 @@ public class MediacrushRipper extends AlbumRipper { } @Override - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + public String getHost() { + return "mediacrush"; + } + @Override + public String getDomain() { + return "mediacru.sh"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://[wm.]*mediacru\\.sh/([a-zA-Z0-9]+).*"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Could not find mediacru.sh page ID from " + url + + " expected format: http://mediacru.sh/pageid"); } @Override @@ -64,43 +77,43 @@ public class MediacrushRipper extends AlbumRipper { } @Override - public void rip() throws IOException { - String url = this.url.toExternalForm(); - logger.info(" Retrieving " + url); - sendUpdate(STATUS.LOADING_RESOURCE, url); - JSONObject json = null; + public JSONObject getFirstPage() throws IOException { try { - json = Http.url(url).getJSON(); - } catch (Exception re) { + String jsonString = Http.url(url) + .ignoreContentType() + .connection() + .execute().body(); + jsonString = jsonString.replace(""", "\""); + return new JSONObject(jsonString); + } catch (SSLException re) { // Check for >1024 bit encryption but in older versions of Java - if (re.getCause().getCause() instanceof InvalidAlgorithmParameterException) { - // It's the bug. Suggest downloading the latest version. - int selection = JOptionPane.showOptionDialog(null, - "You need to upgrade to the latest Java (7+) to rip this album.\n" - + "Do you want to open java.com and download the latest version?", - "RipMe - Java Error", - JOptionPane.OK_CANCEL_OPTION, - JOptionPane.ERROR_MESSAGE, - null, - new String[] {"Go to java.com", "Cancel"}, - 0); - sendUpdate(STATUS.RIP_ERRORED, "Your version of Java can't handle some secure websites"); - if (selection == 0) { - URL javaUrl = new URL("https://www.java.com/en/download/"); - try { - Desktop.getDesktop().browse(javaUrl.toURI()); - } catch (URISyntaxException use) { } - } - return; + // It's the bug. Suggest downloading the latest version. + int selection = JOptionPane.showOptionDialog(null, + "You need to upgrade to the latest Java (7+) to rip this album.\n" + + "Do you want to open java.com and download the latest version?", + "RipMe - Java Error", + JOptionPane.OK_CANCEL_OPTION, + JOptionPane.ERROR_MESSAGE, + null, + new String[] {"Go to java.com", "Cancel"}, + 0); + sendUpdate(STATUS.RIP_ERRORED, "Your version of Java can't handle some secure websites"); + if (selection == 0) { + URL javaUrl = new URL("https://www.java.com/en/download/"); + try { + Desktop.getDesktop().browse(javaUrl.toURI()); + } catch (URISyntaxException use) { } } - throw new IOException("Unexpected error occurred", re); + throw new IOException("Cannot rip due to limitations in Java installation, consider upgrading Java", re.getCause()); } + catch (Exception e) { + throw new IOException("Unexpected error: " + e.getMessage(), e); + } + } - // Convert to JSON - if (!json.has("files")) { - sendUpdate(STATUS.RIP_ERRORED, "No files found at " + url); - throw new IOException("Could not find any files at " + url); - } + @Override + public List getURLsFromJSON(JSONObject json) { + List imageURLs = new ArrayList(); // Iterate over all files JSONArray files = json.getJSONArray("files"); for (int i = 0; i < files.length(); i++) { @@ -109,20 +122,21 @@ public class MediacrushRipper extends AlbumRipper { JSONArray subfiles = file.getJSONArray("files"); String preferredUrl = getPreferredUrl(subfiles); if (preferredUrl == null) { + logger.warn("Could not find 'file' inside of " + file); sendUpdate(STATUS.DOWNLOAD_ERRORED, "Could not find file inside of " + file); continue; } - // Download - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", i + 1); - } - addURLToDownload(new URL(preferredUrl), prefix); + imageURLs.add(preferredUrl); } - waitForThreads(); + return imageURLs; } - + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + /** * Iterates over list if "file" objects and returns the preferred * image format. @@ -149,20 +163,4 @@ public class MediacrushRipper extends AlbumRipper { } return preferredUrl; } - - @Override - public String getHost() { - return HOST; - } - - @Override - public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("https?://[wm.]*mediacru\\.sh/([a-zA-Z0-9]+).*"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } - throw new MalformedURLException("Could not find mediacru.sh page ID from " + url - + " expected format: http://mediacru.sh/pageid"); - } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java index 62369ce7..17dcc15b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java @@ -41,6 +41,25 @@ public class MotherlessRipper extends AlbumRipper { public URL sanitizeURL(URL url) throws MalformedURLException { return url; } + /* + @Override + public Document getFirstPage() throws IOException { + + } + @Override + public Document getNextPage(Document doc) throws IOException { + + } + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + return imageURLs; + } + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + */ @Override public String getGID(URL url) throws MalformedURLException { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java index 5d0932c8..7704d633 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java @@ -15,8 +15,7 @@ import com.rarchives.ripme.utils.Utils; public class XhamsterRipper extends AlbumRipper { - private static final String DOMAIN = "xhamster.com", - HOST = "xhamster"; + private static final String HOST = "xhamster"; public XhamsterRipper(URL url) throws IOException { super(url); @@ -24,7 +23,9 @@ public class XhamsterRipper extends AlbumRipper { @Override public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + Pattern p = Pattern.compile("^https?://[wm.]*xhamster\\.com/photos/gallery/[0-9]+.*$"); + Matcher m = p.matcher(url.toExternalForm()); + return m.matches(); } @Override diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/CliphunterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/CliphunterRipper.java new file mode 100644 index 00000000..19048f8a --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/CliphunterRipper.java @@ -0,0 +1,77 @@ +package com.rarchives.ripme.ripper.rippers.video; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONObject; + +import com.rarchives.ripme.ripper.VideoRipper; +import com.rarchives.ripme.utils.Base64; +import com.rarchives.ripme.utils.Http; + +public class CliphunterRipper extends VideoRipper { + + private static final String HOST = "cliphunter"; + private static final String decryptString="{'$':':','&':'.','(':'=','-':'-','_':'_','^':'&','a':'h','c':'c','b':'b','e':'v','d':'e','g':'f','f':'o','i':'d','m':'a','l':'n','n':'m','q':'t','p':'u','r':'s','w':'w','v':'p','y':'l','x':'r','z':'i','=':'/','?':'?'}"; + private static final JSONObject decryptDict = new JSONObject(decryptString); + + public CliphunterRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public boolean canRip(URL url) { + Pattern p = Pattern.compile("^https?://[wm.]*cliphunter\\.com/w/[0-9]+.*$"); + Matcher m = p.matcher(url.toExternalForm()); + return m.matches(); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://[wm.]*cliphunter\\.com/w/([0-9]+).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + + throw new MalformedURLException( + "Expected cliphunter format:" + + "cliphunter.com/w/####..." + + " Got: " + url); + } + + @Override + public void rip() throws IOException { + logger.info("Retrieving " + this.url); + String html = Http.url(url).get().html(); + String jsonString = html.substring(html.indexOf("var flashVars = {d: '") + 21); + jsonString = jsonString.substring(0, jsonString.indexOf("'")); + JSONObject json = new JSONObject(new String(Base64.decode(jsonString))); + JSONObject jsonURL = new JSONObject(new String(Base64.decode(json.getString("url")))); + String encryptedURL = jsonURL.getJSONObject("u").getString("l"); + String vidURL = ""; + for (char c : encryptedURL.toCharArray()) { + if (decryptDict.has(Character.toString(c))) { + vidURL += decryptDict.getString(Character.toString(c)); + } + else { + vidURL += c; + } + } + addURLToDownload(new URL(vidURL), HOST + "_" + getGID(this.url)); + waitForThreads(); + } +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XhamsterRipper.java new file mode 100644 index 00000000..c6c48c2d --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XhamsterRipper.java @@ -0,0 +1,66 @@ +package com.rarchives.ripme.ripper.rippers.video; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; + +import com.rarchives.ripme.ripper.VideoRipper; +import com.rarchives.ripme.utils.Http; + +public class XhamsterRipper extends VideoRipper { + + private static final String HOST = "xhamster"; + + public XhamsterRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public boolean canRip(URL url) { + Pattern p = Pattern.compile("^https?://[wm.]*xhamster\\.com/movies/[0-9]+.*$"); + Matcher m = p.matcher(url.toExternalForm()); + return m.matches(); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://[wm.]*xhamster\\.com/movies/([0-9]+).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + + throw new MalformedURLException( + "Expected xhamster format:" + + "xhamster.com/movies/####" + + " Got: " + url); + } + + @Override + public void rip() throws IOException { + logger.info("Retrieving " + this.url); + Document doc = Http.url(url).get(); + Elements videos = doc.select("video"); + if (videos.size() == 0) { + throw new IOException("Could not find Embed code at " + url); + } + String vidUrl = videos.first().attr("file"); + addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); + waitForThreads(); + } +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/utils/Http.java b/src/main/java/com/rarchives/ripme/utils/Http.java index 9a97a00a..3b3c4aca 100644 --- a/src/main/java/com/rarchives/ripme/utils/Http.java +++ b/src/main/java/com/rarchives/ripme/utils/Http.java @@ -103,6 +103,9 @@ public class Http { } // Getters + public Connection connection() { + return connection; + } public Document get() throws IOException { connection.method(Method.GET); return response().parse();