From cf414e1d44d6546e3390fd0f2122296135b9e5c4 Mon Sep 17 00:00:00 2001 From: Gaboso Date: Mon, 19 Feb 2018 21:34:51 -0400 Subject: [PATCH 01/61] Added shebang line to build.sh --- build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build.sh b/build.sh index 7e7c3221..a3ec0242 100755 --- a/build.sh +++ b/build.sh @@ -1 +1,2 @@ +#!/usr/bin/env bash mvn clean compile assembly:single \ No newline at end of file From 4b7db493d1dbd9f39038b70b822a32bf1965f89c Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 21 Feb 2018 18:38:10 -0500 Subject: [PATCH 02/61] Added support for incase.buttsimthy.com --- .../ripme/ripper/rippers/WordpressComicRipper.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java index 9401297d..dbc44585 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java @@ -37,6 +37,7 @@ public class WordpressComicRipper extends AbstractHTMLRipper { private static List explicit_domains = Arrays.asList( "www.totempole666.com", "buttsmithy.com", + "incase.buttsmithy.com", "themonsterunderthebed.net", "prismblush.com", "www.konradokonski.com", @@ -87,6 +88,12 @@ public class WordpressComicRipper extends AbstractHTMLRipper { return true; } + Pattern buttsmithyIncasePat = Pattern.compile("https?://incase.buttsmithy.com/comic/([a-zA-Z0-9_-]*)/?$"); + Matcher buttsmithyIncaseMat = buttsmithyIncasePat.matcher(url.toExternalForm()); + if (buttsmithyIncaseMat.matches()) { + return true; + } + Pattern theMonsterUnderTheBedPat = Pattern.compile("https?://themonsterunderthebed.net/\\?comic=([a-zA-Z0-9_-]*)/?$"); Matcher theMonsterUnderTheBedMat = theMonsterUnderTheBedPat.matcher(url.toExternalForm()); if (theMonsterUnderTheBedMat.matches()) { @@ -178,6 +185,12 @@ public class WordpressComicRipper extends AbstractHTMLRipper { return getHost() + "_" + prismblushMat.group(1); } + Pattern buttsmithyIncasePat = Pattern.compile("https?://incase.buttsmithy.com/comic/([a-zA-Z0-9_-]*)/?$"); + Matcher buttsmithyIncaseMat = buttsmithyIncasePat.matcher(url.toExternalForm()); + if (buttsmithyIncaseMat.matches()) { + return getHost() + "_" + buttsmithyIncaseMat.group(1).replaceAll("-page-\\d", "").replaceAll("-pg-\\d", ""); + } + Pattern comicsxxxPat = Pattern.compile("https?://comics-xxx.com/([a-zA-Z0-9_\\-]*)/?$"); Matcher comicsxxxMat = comicsxxxPat.matcher(url.toExternalForm()); if (comicsxxxMat.matches()) { From 66739e4c7cba2b049dd01b554947ac6c705a8fca Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 18:10:30 -0500 Subject: [PATCH 03/61] Implemented a last seen feature; Added comments --- src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index edddea78..edcff83d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -44,7 +44,8 @@ public abstract class AbstractRipper public abstract String getHost(); public abstract String getGID(URL url) throws MalformedURLException; public boolean hasASAPRipping() { return false; } - + // Everytime addUrlToDownload skips a already downloaded url this increases by 1 + public int alreadyDownloadedUrls = 0; private boolean shouldStop = false; private boolean thisIsATest = false; @@ -194,9 +195,11 @@ public abstract class AbstractRipper * False if failed to download */ protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + // Don't re-add the url if it was downloaded in a previous rip if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) { if (hasDownloadedURL(url.toExternalForm())) { sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm()); + alreadyDownloadedUrls += 1; return false; } } From 8c86377a7faccbb941b82802e295a983b925251d Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 18:10:38 -0500 Subject: [PATCH 04/61] Implemented a last seen feature; Added comments --- .../com/rarchives/ripme/ripper/AbstractHTMLRipper.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index e71eb389..d49c3292 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -68,7 +68,14 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { Document doc = getFirstPage(); while (doc != null) { + logger.debug("alreadyDownloadedUrls is " + alreadyDownloadedUrls); + if (alreadyDownloadedUrls >= Utils.getConfigInteger("skip_after_already_seen", -1) && !isThisATest()) { + sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); + break; + } List imageURLs = getURLsFromPage(doc); + // If hasASAPRipping() returns true then the ripper will handle downloading the files + // if not it's done in the following block of code if (!hasASAPRipping()) { // Remove all but 1 image if (isThisATest()) { From c0a0b9dce809c40efda7aa5f0f3c5cf559a68c1b Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 19:51:28 -0500 Subject: [PATCH 05/61] Removed debug statement; Changed option name --- .../java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index d49c3292..49a0fc7e 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -68,8 +68,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { Document doc = getFirstPage(); while (doc != null) { - logger.debug("alreadyDownloadedUrls is " + alreadyDownloadedUrls); - if (alreadyDownloadedUrls >= Utils.getConfigInteger("skip_after_already_seen", -1) && !isThisATest()) { + if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", -1) && !isThisATest()) { sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); break; } From 850be1ed1b2e0b8cbc4846a9ae91c62fb3b44445 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 19:54:36 -0500 Subject: [PATCH 06/61] Disabled FivehundredpxRipper test --- .../rippers/FivehundredpxRipperTest.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java index 538d493c..214220b8 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java @@ -1,13 +1,15 @@ -package com.rarchives.ripme.tst.ripper.rippers; +//package com.rarchives.ripme.tst.ripper.rippers; +// +//import java.io.IOException; +//import java.net.URL; +// +//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper; +// +//public class FivehundredpxRipperTest extends RippersTest { +// public void test500pxAlbum() throws IOException { +// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman")); +// testRipper(ripper); +// } +//} -import java.io.IOException; -import java.net.URL; - -import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper; - -public class FivehundredpxRipperTest extends RippersTest { - public void test500pxAlbum() throws IOException { - FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman")); - testRipper(ripper); - } -} +// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438 \ No newline at end of file From cac3ca161525a28fb12a08a645f399eda500bff5 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 20:22:07 -0500 Subject: [PATCH 07/61] Fixed 8muses --- .../com/rarchives/ripme/ripper/rippers/EightmusesRipper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index b7b5658f..621a4786 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { Elements pageImages = page.getElementsByClass("c-tile"); for (Element thumb : pageImages) { // If true this link is a sub album - if (thumb.attr("href").contains("/comix/album/")) { + if (thumb.attr("href").contains("/comics/album/")) { String subUrl = "https://www.8muses.com" + thumb.attr("href"); try { logger.info("Retrieving " + subUrl); @@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.warn("Error while loading subalbum " + subUrl, e); } - } else if (thumb.attr("href").contains("/comix/picture/")) { + } else if (thumb.attr("href").contains("/comics/picture/")) { + logger.info("This page is a album"); logger.info("Ripping image"); if (super.isStopped()) break; // Find thumbnail image source From c965de94929799f3326d5779774613de51fde409 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 20:26:06 -0500 Subject: [PATCH 08/61] Updated regex --- .../com/rarchives/ripme/ripper/rippers/EightmusesRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 621a4786..43873cf9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$"); + Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$"); Matcher m = p.matcher(url.toExternalForm()); if (!m.matches()) { throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url); From a5f08ff7c1b728af54e9d619cc48f3e66a35d65f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 20:37:36 -0500 Subject: [PATCH 09/61] Added unit test for new url format --- .../ripme/tst/ripper/rippers/EightmusesRipperTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java index 469c330a..4a6c3539 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java @@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest { // A simple image album EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); testRipper(ripper); + // Test the new url format + ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); + testRipper(ripper); // Test pages with subalbums ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor")); testRipper(ripper); From af2c5f3fe09e3446aae75047daa6232ff2e39f90 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 21:02:22 -0500 Subject: [PATCH 10/61] 1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 476068b2..dbf36b0d 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.19 + 1.7.20 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index de0dc0a6..f691644e 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.19", + "latestVersion": "1.7.20", "changeList": [ + "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", "1.7.19: imgurRipper no longer tries to add ?1 to file names", "1.7.18: AlbumRipper now removes bad chars from file names", "1.7.17: Fixed hentai.cafe autorip from clipboard", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 37ce6cfe..57a30407 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.19"; + private static final String DEFAULT_VERSION = "1.7.20"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 34e27cd2facaba6016e228afc6e717e59d8fca6a Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 21:14:35 -0500 Subject: [PATCH 11/61] Changed default value for history.end_rip_after_already_seen --- .../java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 49a0fc7e..e0fd3548 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -68,7 +68,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { Document doc = getFirstPage(); while (doc != null) { - if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", -1) && !isThisATest()) { + if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) { sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); break; } From 9f922e79650a7f03818f70b4170bc84771f64c43 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 21:23:31 -0500 Subject: [PATCH 12/61] 1.7.21: Fixed last seen feature --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index dbf36b0d..f732d378 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.20 + 1.7.21 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index f691644e..bfb0354f 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.20", + "latestVersion": "1.7.21", "changeList": [ + "1.7.21: Fixed last seen feature", "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", "1.7.19: imgurRipper no longer tries to add ?1 to file names", "1.7.18: AlbumRipper now removes bad chars from file names", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 57a30407..27b42258 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.20"; + private static final String DEFAULT_VERSION = "1.7.21"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From e6c43bb48210f55d59725b7158a29adbaa3d977d Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 5 Mar 2018 13:16:19 -0500 Subject: [PATCH 13/61] Added the normalizeUrl func, which allows a ripper to normalize a url before adding it to url histroy/check if its in url history --- .../rarchives/ripme/ripper/AbstractRipper.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index edcff83d..ff6b4102 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -61,7 +61,13 @@ public abstract class AbstractRipper } } + + /** + * Adds a URL to the url history file + * @param downloadedURL URL to check if downloaded + */ private void writeDownloadedURL(String downloadedURL) throws IOException { + downloadedURL = normalizeUrl(downloadedURL); BufferedWriter bw = null; FileWriter fw = null; try { @@ -86,6 +92,15 @@ public abstract class AbstractRipper } } } + + + /** + * Normalize a URL + * @param url URL to check if downloaded + */ + public String normalizeUrl(String url) { + return url; + } /** * Checks to see if Ripme has already downloaded a URL @@ -96,6 +111,7 @@ public abstract class AbstractRipper */ private boolean hasDownloadedURL(String url) { File file = new File(URLHistoryFile); + url = normalizeUrl(url); try { Scanner scanner = new Scanner(file); while (scanner.hasNextLine()) { From 48ffcf68d37d9bce2e866b4456af8f65dd8f3fdf Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 5 Mar 2018 13:20:12 -0500 Subject: [PATCH 14/61] Added normalizeUrl --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 076fcfc6..d1f16535 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -52,6 +52,12 @@ public class InstagramRipper extends AbstractHTMLRipper { return san_url; } + @Override + public String normalizeUrl(String url) { + // Remove the date sig from the url + return url.replaceAll("/[A-Z0-9]{8}/", "/"); + } + private List getPostsFromSinglePage(Document Doc) { List imageURLs = new ArrayList<>(); JSONArray datas; From a54b71065c6eda9272510b755d8304a4db04c72f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 6 Mar 2018 06:58:21 -0500 Subject: [PATCH 15/61] 1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index f732d378..9859442c 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.21 + 1.7.22 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index bfb0354f..33c2848c 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.21", + "latestVersion": "1.7.22", "changeList": [ + "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", "1.7.21: Fixed last seen feature", "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", "1.7.19: imgurRipper no longer tries to add ?1 to file names", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 27b42258..e198d672 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.21"; + private static final String DEFAULT_VERSION = "1.7.22"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 0aada1fd903f902108e74c0e4053de2a584814d7 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 8 Mar 2018 06:15:27 -0500 Subject: [PATCH 16/61] Added more features to readme --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 390106c2..e56276c1 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,9 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/ * Quickly downloads all images in an online album (see supported sites below) * Easily re-rip albums to fetch new content +* Built in updater +* Can rip images from tumblr in the size they were uploaded in [See here for how to enable](https://github.com/RipMeApp/ripme/wiki/Config-options#tumblrget_raw_image) +* Skips already downloaded images by default ## [List of Supported Sites](https://github.com/ripmeapp/ripme/wiki/Supported-Sites) From da8d6541f55cfefab56f074d22d73a2cac9799c9 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 8 Mar 2018 07:33:35 -0500 Subject: [PATCH 17/61] Fixed xvideos ripper --- .../ripper/rippers/video/XvideosRipper.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java index 06841ce9..4f2bac97 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java @@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper { public void rip() throws IOException { logger.info(" Retrieving " + this.url); Document doc = Http.url(this.url).get(); - Elements embeds = doc.select("embed"); - if (embeds.size() == 0) { - throw new IOException("Could not find Embed code at " + url); - } - Element embed = embeds.get(0); - String vars = embed.attr("flashvars"); - for (String var : vars.split("&")) { - if (var.startsWith("flv_url=")) { - String vidUrl = var.substring("flv_url=".length()); - vidUrl = URLDecoder.decode(vidUrl, "UTF-8"); - addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); + Elements scripts = doc.select("script"); + for (Element e : scripts) { + if (e.html().contains("html5player.setVideoUrlHigh")) { + logger.info("Found the right script"); + String[] lines = e.html().split("\n"); + for (String line: lines) { + if (line.contains("html5player.setVideoUrlHigh")) { + String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", ""); + addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url)); + waitForThreads(); + return; + } + } } } - waitForThreads(); + throw new IOException("Unable to find video url at " + this.url.toExternalForm()); } } \ No newline at end of file From e5096736bfa99217bea948dc8c06486ac8409a38 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 09:25:50 -0500 Subject: [PATCH 18/61] Added tsuminoRipper --- .../ripme/ripper/rippers/TsuminoRipper.java | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java new file mode 100644 index 00000000..ff6e8829 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -0,0 +1,108 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class TsuminoRipper extends AbstractHTMLRipper { + private Map cookies = new HashMap<>(); + + public TsuminoRipper(URL url) throws IOException { + super(url); + } + + private JSONArray getPageUrls() { + String postURL = "http://www.tsumino.com/Read/Load"; + try { + // This sessionId will expire and need to be replaced + cookies.put("ASP.NET_SessionId","c4rbzccf0dvy3e0cloolmlkq"); + logger.info(cookies); + Document doc = Jsoup.connect(postURL).data("q", getAlbumID()).userAgent(USER_AGENT).cookies(cookies).referrer("http://www.tsumino.com/Read/View/" + getAlbumID()).post(); + String jsonInfo = doc.html().replaceAll("","").replaceAll("", "").replaceAll("", "").replaceAll("", "") + .replaceAll("", "").replaceAll("\n", ""); + logger.info(jsonInfo); + JSONObject json = new JSONObject(jsonInfo); + logger.info(json.getJSONArray("reader_page_urls")); + return json.getJSONArray("reader_page_urls"); + } catch (IOException e) { + logger.info(e); + return null; + } + } + + @Override + public String getHost() { + return "tsumino"; + } + + @Override + public String getDomain() { + return "tsumino.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/([a-zA-Z0-9_-]*)"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1) + "_" + m.group(2); + } + throw new MalformedURLException("Expected tsumino URL format: " + + "tsumino.com/Book/Info/ID/TITLE - got " + url + " instead"); + } + + private String getAlbumID() { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/\\S*"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + return null; + } + + @Override + public Document getFirstPage() throws IOException { + Connection.Response resp = Http.url(url).response(); + cookies.putAll(resp.cookies()); + // We need to perform a get on http://www.tsumino.com/Read/View/albumID/1 or else the + //www.tsumino.com/Read/Load endpoint 404s + resp = Http.url("http://www.tsumino.com/Book/Info/" + getAlbumID()).response(); + cookies.putAll(resp.cookies()); + return resp.parse(); + } + + @Override + public List getURLsFromPage(Document doc) { + JSONArray imageIds = getPageUrls(); + List result = new ArrayList<>(); + for (int i = 0; i < imageIds.length(); i++) { + result.add("http://www.tsumino.com/Image/Object?name=" + URLEncoder.encode(imageIds.getString(i))); + } + + return result; + } + + @Override + public void downloadURL(URL url, int index) { + sleep(1000); + addURLToDownload(url, getPrefix(index)); + } +} From 6769bbfb7d6c0defa070ffe29dd084b9f7af0894 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 10:39:02 -0500 Subject: [PATCH 19/61] Now tells the user if they need to fill out a captcha --- .../com/rarchives/ripme/ripper/rippers/TsuminoRipper.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java index ff6e8829..9ca91e45 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ui.RipStatusMessage; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Connection; @@ -44,6 +45,8 @@ public class TsuminoRipper extends AbstractHTMLRipper { return json.getJSONArray("reader_page_urls"); } catch (IOException e) { logger.info(e); + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_ERRORED, "Unable to download album, please compete the captcha at http://www.tsumino.com/Read/Auth/" + + getAlbumID() + " and try again"); return null; } } @@ -82,10 +85,7 @@ public class TsuminoRipper extends AbstractHTMLRipper { public Document getFirstPage() throws IOException { Connection.Response resp = Http.url(url).response(); cookies.putAll(resp.cookies()); - // We need to perform a get on http://www.tsumino.com/Read/View/albumID/1 or else the - //www.tsumino.com/Read/Load endpoint 404s - resp = Http.url("http://www.tsumino.com/Book/Info/" + getAlbumID()).response(); - cookies.putAll(resp.cookies()); + logger.info(resp.parse()); return resp.parse(); } From 4af469189321057fef03c167f0177d2086d3086c Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 13:36:53 -0500 Subject: [PATCH 20/61] lastseen feature now works with instagram --- .../ripme/ripper/rippers/InstagramRipper.java | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index d1f16535..364b645c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { + String nextPageID = ""; private String userID; @@ -198,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document doc) { - String nextPageID = ""; List imageURLs = new ArrayList<>(); JSONObject json = new JSONObject(); try { @@ -261,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper { break; } } - // Rip the next page - if (!nextPageID.equals("") && !isThisATest()) { - if (url.toExternalForm().contains("/tags/")) { - try { - // Sleep for a while to avoid a ban - sleep(2500); - if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get()); - } else { - getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get()); - } - } catch (IOException e) { - return imageURLs; - } - - } - try { - // Sleep for a while to avoid a ban - sleep(2500); - getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get()); - } catch (IOException e) { - return imageURLs; - } - } else { - logger.warn("Can't get net page"); - } } else { // We're ripping from a single page logger.info("Ripping from single page"); imageURLs = getPostsFromSinglePage(doc); @@ -296,6 +270,37 @@ public class InstagramRipper extends AbstractHTMLRipper { return imageURLs; } + @Override + public Document getNextPage(Document doc) { + if (!nextPageID.equals("") && !isThisATest()) { + if (url.toExternalForm().contains("/tags/")) { + try { + // Sleep for a while to avoid a ban + sleep(2500); + if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { + return Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + } else { + return Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + } + + } catch (IOException e) { + return null; + } + + } + try { + // Sleep for a while to avoid a ban + sleep(2500); + return Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + } catch (IOException e) { + return null; + } + } else { + logger.warn("Can't get net page"); + } + return null; + } + @Override public void downloadURL(URL url, int index) { addURLToDownload(url); From ecf7a4b623605cb071f0e8299c9d51f09461e716 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 18:06:01 -0500 Subject: [PATCH 21/61] IG ripper now no longer errors out on last page --- .../ripme/ripper/rippers/InstagramRipper.java | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 364b645c..ab44edfd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -271,34 +271,43 @@ public class InstagramRipper extends AbstractHTMLRipper { } @Override - public Document getNextPage(Document doc) { + public Document getNextPage(Document doc) throws IOException { + Document toreturn; if (!nextPageID.equals("") && !isThisATest()) { if (url.toExternalForm().contains("/tags/")) { try { // Sleep for a while to avoid a ban sleep(2500); if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - return Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); } else { - return Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); } + logger.info(toreturn.html()); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; } catch (IOException e) { - return null; + throw new IOException("No more pages"); } } try { // Sleep for a while to avoid a ban sleep(2500); - return Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; } catch (IOException e) { return null; } } else { - logger.warn("Can't get net page"); + throw new IOException("No more pages"); } - return null; } @Override @@ -306,4 +315,20 @@ public class InstagramRipper extends AbstractHTMLRipper { addURLToDownload(url); } + private boolean hasImage(Document doc) { + try { + JSONObject json = getJSONFromPage(doc); + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); + logger.info(datas.length()); + if (datas.length() == 0) { + return false; + } + return true; + } catch (IOException e) { + return false; + } + + } + } From a4880ecf804fb91a0c559e38351556cb0641e20f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 10 Mar 2018 12:33:31 -0500 Subject: [PATCH 22/61] 1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 9859442c..547d26e9 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.22 + 1.7.23 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 33c2848c..bc2acca8 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.22", + "latestVersion": "1.7.23", "changeList": [ + "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", "1.7.21: Fixed last seen feature", "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index e198d672..318eec4c 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.22"; + private static final String DEFAULT_VERSION = "1.7.23"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From e88392b8b09467f3f8e92f909222df04fc383e11 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 10 Mar 2018 22:53:27 -0500 Subject: [PATCH 23/61] Added sta ripper --- .../ripme/ripper/rippers/StaRipper.java | 112 ++++++++++++++++++ .../tst/ripper/rippers/StaRipperTest.java | 13 ++ 2 files changed, 125 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/StaRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/StaRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/StaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/StaRipper.java new file mode 100644 index 00000000..4cfaf485 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/StaRipper.java @@ -0,0 +1,112 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class StaRipper extends AbstractHTMLRipper { + + public StaRipper(URL url) throws IOException { + super(url); + } + + private Map cookies = new HashMap<>(); + + @Override + public String getHost() { + return "sta"; + } + + @Override + public String getDomain() { + return "sta.sh"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https://sta.sh/([A-Za-z0-9]+)"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected sta.sh URL format: " + + "sta.sh/ALBUMID - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element el : doc.select("span > span > a.thumb")) { + String thumbPageURL = el.attr("href"); + Document thumbPage = null; + if (checkURL(thumbPageURL)) { + try { + Connection.Response resp = Http.url(new URL(thumbPageURL)).response(); + cookies.putAll(resp.cookies()); + thumbPage = resp.parse(); + } catch (MalformedURLException e) { + logger.info(thumbPageURL + " is a malformed URL"); + } catch (IOException e) { + logger.info(e.getMessage()); + } + String imageDownloadUrl = thumbPage.select("a.dev-page-download").attr("href"); + if (imageDownloadUrl != null && !imageDownloadUrl.equals("")) { + result.add(getImageLinkFromDLLink(imageDownloadUrl)); + } + } + + } + return result; + } + + private boolean checkURL(String url) { + try { + new URL(url); + return true; + } catch (MalformedURLException e) { + return false; + } + } + + private String getImageLinkFromDLLink(String url) { + try { + Connection.Response response = Jsoup.connect(url) + .userAgent(USER_AGENT) + .timeout(10000) + .cookies(cookies) + .followRedirects(false) + .execute(); + String imageURL = response.header("Location"); + logger.info(imageURL); + return imageURL; + } catch (IOException e) { + logger.info("Got error message " + e.getMessage() + " trying to download " + url); + return null; + } + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/StaRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/StaRipperTest.java new file mode 100644 index 00000000..128f3f17 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/StaRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.StaRipper; + +public class StaRipperTest extends RippersTest { + public void testRip() throws IOException { + StaRipper ripper = new StaRipper(new URL("https://sta.sh/2hn9rtavr1g")); + testRipper(ripper); + } +} \ No newline at end of file From 6245680fefdfbd13f63b838b6023a93e64bdeb1d Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 11 Mar 2018 22:35:27 -0400 Subject: [PATCH 24/61] Added ripper for femjoyhunter.com --- .../ripper/rippers/FemjoyhunterRipper.java | 64 +++++++++++++++++++ .../tst/ripper/rippers/FemJoyRipperTest.java | 4 ++ .../rippers/FemJoyhunterRipperTest.java | 13 ++++ 3 files changed, 81 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyhunterRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java new file mode 100644 index 00000000..522a9720 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java @@ -0,0 +1,64 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class FemjoyhunterRipper extends AbstractHTMLRipper { + + public FemjoyhunterRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "femjoyhunter"; + } + + @Override + public String getDomain() { + return "femjoyhunter.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://www.femjoyhunter.com/([a-zA-Z0-9_-]+)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected femjoyhunter URL format: " + + "femjoyhunter.com/ID - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element el : doc.select("ul.gallery-b > li > a")) { + result.add(el.attr("href")); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + + addURLToDownload(url, getPrefix(index), "", "https://a2h6m3w6.ssl.hwcdn.net/", null); + } +} \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java new file mode 100644 index 00000000..85a26d07 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java @@ -0,0 +1,4 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +public class FemJoyRipperTest { +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyhunterRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyhunterRipperTest.java new file mode 100644 index 00000000..2d7dbdc8 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyhunterRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.FemjoyhunterRipper; + +public class FemjoyRipperTest extends RippersTest { + public void testRip() throws IOException { + FemjoyhunterRipper ripper = new FemjoyhunterRipper(new URL("https://www.femjoyhunter.com/alisa-i-got-nice-big-breasts-and-fine-ass-so-she-seems-to-be-a-hottest-brunette-5936/")); + testRipper(ripper); + } +} \ No newline at end of file From cdc70f50cc8b7c549b18cbba9b445ab49aedc321 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 11 Mar 2018 22:35:52 -0400 Subject: [PATCH 25/61] Removed wrongly named file --- .../rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java deleted file mode 100644 index 85a26d07..00000000 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyRipperTest.java +++ /dev/null @@ -1,4 +0,0 @@ -package com.rarchives.ripme.tst.ripper.rippers; - -public class FemJoyRipperTest { -} From 1555a6c276eeed0ca0c2c58f382c84656f76f7a5 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 11 Mar 2018 22:42:07 -0400 Subject: [PATCH 26/61] Fixed test --- ...{FemJoyhunterRipperTest.java => FemjoyhunterRipperTest.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/test/java/com/rarchives/ripme/tst/ripper/rippers/{FemJoyhunterRipperTest.java => FemjoyhunterRipperTest.java} (88%) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyhunterRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemjoyhunterRipperTest.java similarity index 88% rename from src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyhunterRipperTest.java rename to src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemjoyhunterRipperTest.java index 2d7dbdc8..fee634ef 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemJoyhunterRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemjoyhunterRipperTest.java @@ -5,7 +5,7 @@ import java.net.URL; import com.rarchives.ripme.ripper.rippers.FemjoyhunterRipper; -public class FemjoyRipperTest extends RippersTest { +public class FemjoyhunterRipperTest extends RippersTest { public void testRip() throws IOException { FemjoyhunterRipper ripper = new FemjoyhunterRipper(new URL("https://www.femjoyhunter.com/alisa-i-got-nice-big-breasts-and-fine-ass-so-she-seems-to-be-a-hottest-brunette-5936/")); testRipper(ripper); From 28ce42a54f2ad36de99fe825f0e8b4f0203d9076 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 11 Mar 2018 23:03:24 -0400 Subject: [PATCH 27/61] Added sinfest ripper --- .../ripme/ripper/rippers/SinfestRipper.java | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java new file mode 100644 index 00000000..d30e9b63 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java @@ -0,0 +1,80 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class SinfestRipper extends AbstractHTMLRipper { + + public SinfestRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "sinfest"; + } + + @Override + public String getDomain() { + return "sinfest.net"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://sinfest.net/view.php\\?date=([0-9-]*)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected sinfest URL format: " + + "sinfest.net/view.php?date=XXXX-XX-XX/ - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + Element elem = doc.select("td.style5 > a > img").last(); + logger.info(elem.parent().attr("href")); + if (elem == null || elem.parent().attr("href").equals("view.php?date=")) { + throw new IOException("No more pages"); + } + String nextPage = elem.parent().attr("href"); + // Some times this returns a empty string + // This for stops that + if (nextPage == "") { + return null; + } + else { + return Http.url("http://sinfest.net/" + nextPage).get(); + } + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + Element elem = doc.select("tbody > tr > td > img").last(); + result.add("http://sinfest.net/" + elem.attr("src")); + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} From 8a145d1ff458b13c007db5c14ca46b5c12d69111 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 11 Mar 2018 23:05:23 -0400 Subject: [PATCH 28/61] Added sinfest ripper test --- .../ripme/tst/ripper/rippers/SinfestRipperTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinfestRipperTest.java diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinfestRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinfestRipperTest.java new file mode 100644 index 00000000..c46e922c --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinfestRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.SinfestRipper; + +public class SinfestRipperTest extends RippersTest { + public void testRip() throws IOException { + SinfestRipper ripper = new SinfestRipper(new URL("http://sinfest.net/view.php?date=2000-01-17")); + testRipper(ripper); + } +} \ No newline at end of file From f51d9c03e933674e93094f709290b6e3c2edfc70 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 12 Mar 2018 11:35:26 -0400 Subject: [PATCH 29/61] Disabled flaky unit test --- .../tst/ripper/rippers/WordpressComicRipperTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/WordpressComicRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/WordpressComicRipperTest.java index e3d5c4a0..2f7dbcf9 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/WordpressComicRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/WordpressComicRipperTest.java @@ -83,10 +83,10 @@ public class WordpressComicRipperTest extends RippersTest { new URL("http://tnbtu.com/comic/01-00/")); testRipper(ripper); } - - public void test_pepsaga() throws IOException { - WordpressComicRipper ripper = new WordpressComicRipper( - new URL("http://shipinbottle.pepsaga.com/?p=281")); - testRipper(ripper); - } + // https://github.com/RipMeApp/ripme/issues/269 - Disabled test - WordpressRipperTest: various domains flaky in CI +// public void test_pepsaga() throws IOException { +// WordpressComicRipper ripper = new WordpressComicRipper( +// new URL("http://shipinbottle.pepsaga.com/?p=281")); +// testRipper(ripper); +// } } From c1f46d82516cda02cb086841d7f0dea14a402c4c Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 12 Mar 2018 11:50:03 -0400 Subject: [PATCH 30/61] 1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 547d26e9..7c7b831f 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.23 + 1.7.24 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index bc2acca8..3fc7fde5 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.23", + "latestVersion": "1.7.24", "changeList": [ + "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", "1.7.21: Fixed last seen feature", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 318eec4c..98191c33 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.23"; + private static final String DEFAULT_VERSION = "1.7.24"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From c9c85429456543cd4fedc8083b1b9614e1b84bc8 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 12 Mar 2018 12:40:13 -0400 Subject: [PATCH 31/61] Added another overload to addURLToDownload which allows the ripper to set the name of the file; Fixed 8muses filename length issue --- .../ripme/ripper/AbstractRipper.java | 29 +++++++++++++++---- .../ripper/rippers/EightmusesRipper.java | 4 +-- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index ff6b4102..6068ed18 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -174,7 +174,7 @@ public abstract class AbstractRipper * URL of the file * @param saveAs * Path of the local file to save the content to. - * @return True on success, flase on failure. + * @return True on success, false on failure. */ public abstract boolean addURLToDownload(URL url, File saveAs); @@ -206,11 +206,13 @@ public abstract class AbstractRipper * The HTTP referrer to use while downloading this file. * @param cookies * The cookies to send to the server while downloading this file. + * @param fileName + * The name that file will be written to * @return * True if downloaded successfully * False if failed to download */ - protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies, String fileName) { // Don't re-add the url if it was downloaded in a previous rip if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) { if (hasDownloadedURL(url.toExternalForm())) { @@ -225,9 +227,18 @@ public abstract class AbstractRipper logger.debug("Ripper has been stopped"); return false; } - logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies); - String saveAs = url.toExternalForm(); - saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies + ", fileName: " + fileName); + String saveAs; + if (fileName != null) { + saveAs = fileName; + // Get the extension of the file + String extension = url.toExternalForm().substring(url.toExternalForm().lastIndexOf(".") + 1); + saveAs = saveAs + "." + extension; + } else { + saveAs = url.toExternalForm(); + saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + } + if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } @@ -274,7 +285,11 @@ public abstract class AbstractRipper * @return True on success, flase on failure. */ protected boolean addURLToDownload(URL url, String prefix, String subdirectory) { - return addURLToDownload(url, prefix, subdirectory, null, null); + return addURLToDownload(url, prefix, subdirectory, null, null, null); + } + + protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + return addURLToDownload(url, prefix, subdirectory, referrer, cookies, null); } /** @@ -290,6 +305,8 @@ public abstract class AbstractRipper // Use empty subdirectory return addURLToDownload(url, prefix, ""); } + + /** * Waits for downloading threads to complete. */ diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 43873cf9..80ac5b93 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -125,7 +125,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.info("Retrieving full-size image location from " + imageHref); image = getFullSizeImage(imageHref); URL imageUrl = new URL(image); - addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies); + addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, ""); // X is our page index x++; @@ -180,6 +180,6 @@ public class EightmusesRipper extends AbstractHTMLRipper { @Override public String getPrefix(int index) { - return String.format("%03d_", index); + return String.format("%03d", index); } } From d4c3d6a025c25c4fbb143d6577d9ac5e5e65be49 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 12 Mar 2018 15:02:34 -0400 Subject: [PATCH 32/61] Added an option to use short names for 8muses --- .../ripme/ripper/rippers/EightmusesRipper.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 80ac5b93..77ca9102 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.utils.Utils; import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -125,7 +126,11 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.info("Retrieving full-size image location from " + imageHref); image = getFullSizeImage(imageHref); URL imageUrl = new URL(image); - addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, ""); + if (Utils.getConfigBoolean("8muses.use_short_names", false)) { + addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, ""); + } else { + addURLToDownload(imageUrl, getPrefixLong(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies); + } // X is our page index x++; @@ -178,8 +183,11 @@ public class EightmusesRipper extends AbstractHTMLRipper { addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies); } - @Override - public String getPrefix(int index) { + public String getPrefixLong(int index) { + return String.format("%03d_", index); + } + + public String getPrefixShort(int index) { return String.format("%03d", index); } } From fd2ff087dd430b4f46b5fcb56ba1e35ffb8d2055 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 13 Mar 2018 20:41:55 -0400 Subject: [PATCH 33/61] Fixed instagram ripper --- .../ripme/ripper/rippers/InstagramRipper.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index ab44edfd..14f78cf2 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -212,7 +212,8 @@ public class InstagramRipper extends AbstractHTMLRipper { JSONArray datas = new JSONArray(); try { JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); + datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); } catch (JSONException e) { // Handle hashtag pages datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0) @@ -220,15 +221,16 @@ public class InstagramRipper extends AbstractHTMLRipper { } for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); - Long epoch = data.getLong("date"); + data = data.getJSONObject("node"); + Long epoch = data.getLong("taken_at_timestamp"); Instant instant = Instant.ofEpochSecond(epoch); String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC)); if (data.getString("__typename").equals("GraphSidecar")) { try { - Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("code"))).get(); + Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("shortcode"))).get(); List toAdd = getPostsFromSinglePage(slideShowDoc); for (int slideShowInt=0; slideShowInt Date: Tue, 13 Mar 2018 20:45:28 -0400 Subject: [PATCH 34/61] Ripper no longer supports tags --- .../ripme/ripper/rippers/InstagramRipper.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 14f78cf2..dbadf355 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -127,11 +127,11 @@ public class InstagramRipper extends AbstractHTMLRipper { return m.group(1); } - p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } +// p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); +// m = p.matcher(url.toExternalForm()); +// if (m.matches()) { +// return m.group(1); +// } throw new MalformedURLException("Unable to find user in " + url); } From 7356a13da13cd6dac580aa34ce197a690284aa46 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 14 Mar 2018 12:35:03 -0400 Subject: [PATCH 35/61] Minor code clean up --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index dbadf355..e78834ba 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -227,7 +227,7 @@ public class InstagramRipper extends AbstractHTMLRipper { String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC)); if (data.getString("__typename").equals("GraphSidecar")) { try { - Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("shortcode"))).get(); + Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get(); List toAdd = getPostsFromSinglePage(slideShowDoc); for (int slideShowInt=0; slideShowInt Date: Wed, 14 Mar 2018 12:58:34 -0400 Subject: [PATCH 36/61] 1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 7c7b831f..a36b52ef 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.24 + 1.7.25 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 3fc7fde5..1e4fb4dc 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.24", + "latestVersion": "1.7.25", "changeList": [ + "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 98191c33..6c31f378 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.24"; + private static final String DEFAULT_VERSION = "1.7.25"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 545bfce7c9f8b8845c617954b65cd934d7af1a75 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 15 Mar 2018 13:18:29 -0400 Subject: [PATCH 37/61] Insagram ripper now can get all pages of a profile --- .../ripme/ripper/rippers/InstagramRipper.java | 93 ++++++++++++++----- 1 file changed, 69 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index e78834ba..93de0e8c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -1,8 +1,11 @@ package com.rarchives.ripme.ripper.rippers; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; +import java.net.URLConnection; import java.time.*; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -25,6 +28,7 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { String nextPageID = ""; + private String qHash; private String userID; @@ -136,7 +140,21 @@ public class InstagramRipper extends AbstractHTMLRipper { throw new MalformedURLException("Unable to find user in " + url); } + private String stripHTMLTags(String t) { + t = t.replaceAll("\n" + + " \n" + + " ", ""); + t.replaceAll("\n" + + "", ""); + return t; + } + + private JSONObject getJSONFromPage(Document firstPage) throws IOException { + // Check if this page is HTML + JSON or jsut json + if (!firstPage.html().contains("window._sharedData =")) { + return new JSONObject(stripHTMLTags(firstPage.html())); + } String jsonText = ""; try { for (Element script : firstPage.select("script[type=text/javascript]")) { @@ -153,8 +171,10 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { - userID = getGID(url); - return Http.url(url).get(); + Document p = Http.url(url).get(); + // Get the query hash so we can download the next page + qHash = getQHash(p); + return p; } private String getVideoFromPage(String videoID) { @@ -210,14 +230,15 @@ public class InstagramRipper extends AbstractHTMLRipper { if (!url.toExternalForm().contains("/p/")) { JSONArray datas = new JSONArray(); + // This first try only works on data from the first page try { JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", ""); datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); } catch (JSONException e) { - // Handle hashtag pages - datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0) - .getJSONObject("tag").getJSONObject("media").getJSONArray("nodes"); + datas = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); } for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); @@ -281,14 +302,11 @@ public class InstagramRipper extends AbstractHTMLRipper { // Sleep for a while to avoid a ban sleep(2500); if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).ignoreContentType().get(); } else { - toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).ignoreContentType().get(); } logger.info(toreturn.html()); - if (!hasImage(toreturn)) { - throw new IOException("No more pages"); - } return toreturn; } catch (IOException e) { @@ -299,8 +317,9 @@ public class InstagramRipper extends AbstractHTMLRipper { try { // Sleep for a while to avoid a ban sleep(2500); - toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); - if (!hasImage(toreturn)) { + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + + "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}").ignoreContentType().get(); + if (!pageHasImages(toreturn)) { throw new IOException("No more pages"); } return toreturn; @@ -317,20 +336,46 @@ public class InstagramRipper extends AbstractHTMLRipper { addURLToDownload(url); } - private boolean hasImage(Document doc) { - try { - JSONObject json = getJSONFromPage(doc); - JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - JSONArray datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") - .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); - logger.info(datas.length()); - if (datas.length() == 0) { - return false; - } - return true; - } catch (IOException e) { + private boolean pageHasImages(Document doc) { + JSONObject json = new JSONObject(stripHTMLTags(doc.html())); + int numberOfImages = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length(); + if (numberOfImages == 0) { return false; } + return true; + } + + private String getQHash(Document doc) { + String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href"); + StringBuilder sb = new StringBuilder(); + Document jsPage; + try { + // We can't use Jsoup here because it won't download a non-html file larger than a MB + // even if you set maxBodySize to 0 + URLConnection connection = new URL(jsFileURL).openConnection(); + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); + String line; + while ((line = in.readLine()) != null) { + sb.append(line); + } + in.close(); + + } catch (MalformedURLException e) { + logger.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL"); + return null; + } catch (IOException e) { + logger.info("Unable to get query_hash"); + logger.info(e.getMessage()); + return null; + } + Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } + logger.info("Could not find query_hash on " + jsFileURL); + return null; } From da338eb7164e33e37f0b63a02e33ea645eb9e339 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 15 Mar 2018 13:40:11 -0400 Subject: [PATCH 38/61] 1.7.26: fixed instagram ripper --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index a36b52ef..35e1c586 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.25 + 1.7.26 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 1e4fb4dc..6674d63f 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.25", + "latestVersion": "1.7.26", "changeList": [ + "1.7.26: fixed instagram ripper", "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 6c31f378..8fada879 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.25"; + private static final String DEFAULT_VERSION = "1.7.26"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 5600b375b4f9595762f980a3c01cbe51e81abcf1 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 15 Mar 2018 16:27:15 -0400 Subject: [PATCH 39/61] IG ripper can now rip from tags --- .../ripme/ripper/rippers/InstagramRipper.java | 93 ++++++++++++------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 93de0e8c..92138861 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -29,6 +29,8 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { String nextPageID = ""; private String qHash; + private boolean rippingTag = false; + private String tagName; private String userID; @@ -131,11 +133,13 @@ public class InstagramRipper extends AbstractHTMLRipper { return m.group(1); } -// p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); -// m = p.matcher(url.toExternalForm()); -// if (m.matches()) { -// return m.group(1); -// } + p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + rippingTag = true; + tagName = m.group(1); + return m.group(1); + } throw new MalformedURLException("Unable to find user in " + url); } @@ -230,15 +234,26 @@ public class InstagramRipper extends AbstractHTMLRipper { if (!url.toExternalForm().contains("/p/")) { JSONArray datas = new JSONArray(); - // This first try only works on data from the first page - try { - JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", ""); - datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") - .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); - } catch (JSONException e) { - datas = json.getJSONObject("data").getJSONObject("user") - .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + if (!rippingTag) { + // This first try only works on data from the first page + try { + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", ""); + datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + } catch (JSONException e) { + datas = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + } + } else { + try { + JSONArray tagPage = json.getJSONObject("entry_data").getJSONArray("TagPage"); + datas = tagPage.getJSONObject(0).getJSONObject("graphql").getJSONObject("hashtag") + .getJSONObject("edge_hashtag_to_media").getJSONArray("edges"); + } catch (JSONException e) { + datas = json.getJSONObject("data").getJSONObject("hashtag").getJSONObject("edge_hashtag_to_media") + .getJSONArray("edges"); + } } for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); @@ -246,17 +261,20 @@ public class InstagramRipper extends AbstractHTMLRipper { Long epoch = data.getLong("taken_at_timestamp"); Instant instant = Instant.ofEpochSecond(epoch); String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC)); - if (data.getString("__typename").equals("GraphSidecar")) { - try { - Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get(); - List toAdd = getPostsFromSinglePage(slideShowDoc); - for (int slideShowInt=0; slideShowInt toAdd = getPostsFromSinglePage(slideShowDoc); + for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) { + addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode")); + } + } catch (MalformedURLException e) { + logger.error("Unable to download slide show, URL was malformed"); + } catch (IOException e) { + logger.error("Unable to download slide show"); } - } catch (MalformedURLException e) { - logger.error("Unable to download slide show, URL was malformed"); - } catch (IOException e) { - logger.error("Unable to download slide show"); } } try { @@ -297,15 +315,12 @@ public class InstagramRipper extends AbstractHTMLRipper { public Document getNextPage(Document doc) throws IOException { Document toreturn; if (!nextPageID.equals("") && !isThisATest()) { - if (url.toExternalForm().contains("/tags/")) { + if (rippingTag) { try { - // Sleep for a while to avoid a ban sleep(2500); - if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).ignoreContentType().get(); - } else { - toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).ignoreContentType().get(); - } + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + + "&variables={\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}").ignoreContentType().get(); + // Sleep for a while to avoid a ban logger.info(toreturn.html()); return toreturn; @@ -369,10 +384,18 @@ public class InstagramRipper extends AbstractHTMLRipper { logger.info(e.getMessage()); return null; } - Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); - Matcher m = jsP.matcher(sb.toString()); - if (m.find()) { - return m.group(1); + if (!rippingTag) { + Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } + } else { + Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } } logger.info("Could not find query_hash on " + jsFileURL); return null; From 5b5e86ed3b869457a26fcb9340c0caf7bbfabc4f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 16 Mar 2018 16:50:29 -0400 Subject: [PATCH 40/61] Fixed json parsing errors --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 92138861..bb2998f8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -150,6 +150,8 @@ public class InstagramRipper extends AbstractHTMLRipper { " ", ""); t.replaceAll("\n" + "", ""); + t = t.replaceAll("\n", ""); + t = t.replaceAll("=\"\"", ""); return t; } @@ -352,6 +354,7 @@ public class InstagramRipper extends AbstractHTMLRipper { } private boolean pageHasImages(Document doc) { + logger.info("BAD DATA: " + stripHTMLTags(doc.html())); JSONObject json = new JSONObject(stripHTMLTags(doc.html())); int numberOfImages = json.getJSONObject("data").getJSONObject("user") .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length(); From 6cea9e3ff75d3378fb1693bee7d54d28a5f1a774 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 17 Mar 2018 16:09:34 -0400 Subject: [PATCH 41/61] 1.7.27: IG ripper can now rip from tags; fixed json parsing issues --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 35e1c586..77bedc9c 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.26 + 1.7.27 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 6674d63f..5f47ad6b 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.26", + "latestVersion": "1.7.27", "changeList": [ + "1.7.27: IG ripper can now rip from tags; fixed json parsing issues", "1.7.26: fixed instagram ripper", "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 8fada879..80282ccb 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.26"; + private static final String DEFAULT_VERSION = "1.7.27"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From acb6356b946b433db563c2fda9ee6808a6b64f6f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 17 Mar 2018 19:52:47 -0400 Subject: [PATCH 42/61] Hentaifoundry Ripper no longer errors out when there is no next page --- .../rarchives/ripme/ripper/rippers/HentaifoundryRipper.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java index 561c4249..c0031548 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java @@ -68,8 +68,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { - if (doc.select("li.next.hidden").size() != 0) { - // Last page + if (doc.select("li.next > a").size() == 0) { throw new IOException("No more pages"); } Elements els = doc.select("li.next > a"); From 0f61dd00f0f88d12da327d946c8d675e3a65dfbf Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 21 Mar 2018 23:59:19 -0400 Subject: [PATCH 43/61] Reddit ripper now gets erome links --- .../ripme/ripper/rippers/RedditRipper.java | 6 +++ .../com/rarchives/ripme/utils/RipUtils.java | 40 +++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java index 52e9a6d2..e8798476 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java @@ -4,10 +4,13 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; import org.json.JSONArray; import org.json.JSONObject; import org.json.JSONTokener; @@ -17,6 +20,9 @@ import com.rarchives.ripme.ui.UpdateUtils; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.Utils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; public class RedditRipper extends AlbumRipper { diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index b7b8c239..01d20e7c 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -9,19 +9,18 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.ripper.rippers.EroShareRipper; +import com.rarchives.ripme.ripper.rippers.EromeRipper; +import com.rarchives.ripme.ripper.rippers.ImgurRipper; +import com.rarchives.ripme.ripper.rippers.VidbleRipper; +import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; import org.apache.commons.lang.math.NumberUtils; import org.apache.log4j.Logger; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AbstractRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage; -import com.rarchives.ripme.ripper.rippers.VidbleRipper; -import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; -import com.rarchives.ripme.ripper.rippers.EroShareRipper; public class RipUtils { private static final Logger logger = Logger.getLogger(RipUtils.class); @@ -35,8 +34,8 @@ public class RipUtils { && url.toExternalForm().contains("imgur.com/a/")) { try { logger.debug("Fetching imgur album at " + url); - ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url); - for (ImgurImage imgurImage : imgurAlbum.images) { + ImgurRipper.ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url); + for (ImgurRipper.ImgurImage imgurImage : imgurAlbum.images) { logger.debug("Got imgur image: " + imgurImage.url); result.add(imgurImage.url); } @@ -49,8 +48,8 @@ public class RipUtils { // Imgur image series. try { logger.debug("Fetching imgur series at " + url); - ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url); - for (ImgurImage imgurImage : imgurAlbum.images) { + ImgurRipper.ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url); + for (ImgurRipper.ImgurImage imgurImage : imgurAlbum.images) { logger.debug("Got imgur image: " + imgurImage.url); result.add(imgurImage.url); } @@ -91,6 +90,21 @@ public class RipUtils { return result; } + else if (url.toExternalForm().contains("erome.com")) { + try { + logger.info("Getting eroshare album " + url); + EromeRipper r = new EromeRipper(url); + Document tempDoc = r.getFirstPage(); + for (String u : r.getURLsFromPage(tempDoc)) { + result.add(new URL(u)); + } + } catch (IOException e) { + // Do nothing + logger.warn("Exception while retrieving eroshare page:", e); + } + return result; + } + Pattern p = Pattern.compile("https?://i.reddituploads.com/([a-zA-Z0-9]+)\\?.*"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { @@ -122,8 +136,8 @@ public class RipUtils { try { // Fetch the page Document doc = Jsoup.connect(url.toExternalForm()) - .userAgent(AbstractRipper.USER_AGENT) - .get(); + .userAgent(AbstractRipper.USER_AGENT) + .get(); for (Element el : doc.select("meta")) { if (el.attr("name").equals("twitter:image:src")) { result.add(new URL(el.attr("content"))); From 06e566ac63ff1205ed84379f3aa1628c9ceaa7da Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 22 Mar 2018 00:01:51 -0400 Subject: [PATCH 44/61] Revert "Hentaifoundry Ripper no longer errors out when there is no next page" This reverts commit acb6356b946b433db563c2fda9ee6808a6b64f6f. --- .../rarchives/ripme/ripper/rippers/HentaifoundryRipper.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java index c0031548..561c4249 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java @@ -68,7 +68,8 @@ public class HentaifoundryRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { - if (doc.select("li.next > a").size() == 0) { + if (doc.select("li.next.hidden").size() != 0) { + // Last page throw new IOException("No more pages"); } Elements els = doc.select("li.next > a"); From ace84b868befbdb4e96f85cec99bd85f52342306 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 23 Mar 2018 23:06:43 -0400 Subject: [PATCH 45/61] IG ripper now uses display_url when downloading images --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index bb2998f8..b38c2c08 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -284,9 +284,9 @@ public class InstagramRipper extends AbstractHTMLRipper { if (imageURLs.size() == 0) { // We add this one item to the array because either wise // the ripper will error out because we returned an empty array - imageURLs.add(getOriginalUrl(data.getString("thumbnail_src"))); + imageURLs.add(getOriginalUrl(data.getString("display_url"))); } - addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))), image_date); + addURLToDownload(new URL(data.getString("display_url")), image_date); } else { if (!Utils.getConfigBoolean("instagram.download_images_only", false)) { addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), image_date); From fc0e3588b63229cfb4f6d8b1a0c7fe568484fc03 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 23 Mar 2018 23:50:44 -0400 Subject: [PATCH 46/61] 1.7.28: IG ripper now uses display_url when downloading images; Reddit ripper now gets erome links; Hentaifoundry Ripper no longer errors out when there is no next page --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 77bedc9c..49b98ee0 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.27 + 1.7.28 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 5f47ad6b..5c3fc239 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.27", + "latestVersion": "1.7.28", "changeList": [ + "1.7.28: IG ripper now uses display_url when downloading images; Reddit ripper now gets erome links; Hentaifoundry Ripper no longer errors out when there is no next page", "1.7.27: IG ripper can now rip from tags; fixed json parsing issues", "1.7.26: fixed instagram ripper", "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 80282ccb..9a34cf98 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.27"; + private static final String DEFAULT_VERSION = "1.7.28"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 8237841df43782474a64ee142a3ff394627ca258 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Mar 2018 01:56:07 -0400 Subject: [PATCH 47/61] Fixed eroshae ripper --- .../rarchives/ripme/ripper/rippers/EroShareRipper.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java index 30dcfd4f..e7019178 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java @@ -119,7 +119,7 @@ public class EroShareRipper extends AbstractHTMLRipper { for (Element img : imgs) { if (img.hasClass("album-image")) { String imageURL = img.attr("src"); - imageURL = "https:" + imageURL; + imageURL = imageURL; URLs.add(imageURL); } } @@ -129,7 +129,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add("https:" + videoURL); + URLs.add(videoURL); } } // Profile videos @@ -148,7 +148,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add("https:" + videoURL); + URLs.add(videoURL); } } } @@ -209,7 +209,6 @@ public class EroShareRipper extends AbstractHTMLRipper { for (Element img : imgs) { if (img.hasClass("album-image")) { String imageURL = img.attr("src"); - imageURL = "https:" + imageURL; URLs.add(new URL(imageURL)); } } @@ -219,7 +218,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add(new URL("https:" + videoURL)); + URLs.add(new URL(videoURL)); } } From a88dcaad6acbbd9403bd76e275f15e03c1f9aeac Mon Sep 17 00:00:00 2001 From: dex-11 Date: Sun, 25 Mar 2018 09:42:38 +0200 Subject: [PATCH 48/61] Added modelx ripper --- .../ripme/ripper/rippers/ModelxRipper.java | 64 +++++++++++++++++++ .../tst/ripper/rippers/ModelxRipperTest.java | 13 ++++ 2 files changed, 77 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/ModelxRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java new file mode 100644 index 00000000..0b513b37 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java @@ -0,0 +1,64 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class ModelxRipper extends AbstractHTMLRipper { + + public ModelxRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "modelx"; + } + + @Override + public String getDomain() { + return "modelx.org"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^.*modelx.org/.*/(.+)$"); + Matcher m = p.matcher(url.toExternalForm()); + + if (m.matches()) { + return m.group(1); + } + + throw new MalformedURLException("Expected URL format: http://www.modelx.org/[category (one or more)]/xxxxx got: " + url); + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List result = new ArrayList<>(); + + for (Element el : page.select(".gallery-icon > a")) { + result.add(el.attr("href")); + } + + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ModelxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ModelxRipperTest.java new file mode 100644 index 00000000..2a0358d2 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ModelxRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.ModelxRipper; + +public class ModelxRipperTest extends RippersTest { + public void testModelxAlbum() throws IOException { + ModelxRipper ripper = new ModelxRipper(new URL("http://www.modelx.org/graphis-collection-2002-2016/ai-yuzuki-%e6%9f%9a%e6%9c%88%e3%81%82%e3%81%84-yuzuiro/")); + testRipper(ripper); + } +} \ No newline at end of file From b4832d32e8a6739cf7b6be5332c459adb5cf59db Mon Sep 17 00:00:00 2001 From: MrTimscampi Date: Fri, 30 Mar 2018 11:58:46 +0200 Subject: [PATCH 49/61] Improved handling of headless mode & OS-specific config This adds changes the check for command-lines arguments to avoir throwing an error when no argument is passed on an headless system. It will instead gracefully show the help page to the user. It also adds a check for the Mac OS-specific properties, to avoid setting them on other systems. The OS-specific properties and the app initialization have been moved to after the Headless check for cleanliness (No need to set GUI properties on an headless app and no need to initialize if we're only going to show an help page). --- src/main/java/com/rarchives/ripme/App.java | 48 ++++++++++++------- .../ripme/ripper/rippers/FacebookRipper.java | 4 ++ 2 files changed, 36 insertions(+), 16 deletions(-) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/FacebookRipper.java diff --git a/src/main/java/com/rarchives/ripme/App.java b/src/main/java/com/rarchives/ripme/App.java index 0bb5f3f6..9a6b1d09 100644 --- a/src/main/java/com/rarchives/ripme/App.java +++ b/src/main/java/com/rarchives/ripme/App.java @@ -1,5 +1,6 @@ package com.rarchives.ripme; +import java.awt.*; import java.io.File; import java.io.IOException; import java.io.BufferedReader; @@ -18,6 +19,7 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import org.apache.commons.lang.SystemUtils; import org.apache.log4j.Logger; import com.rarchives.ripme.ripper.AbstractRipper; @@ -34,35 +36,34 @@ import com.rarchives.ripme.utils.Utils; */ public class App { - public static final Logger logger; + public static final Logger logger = Logger.getLogger(App.class); private static final History HISTORY = new History(); - static { - //initialize logger - Utils.configureLogger(); - logger = Logger.getLogger(App.class); - } - public static void main(String[] args) throws MalformedURLException { CommandLine cl = getArgs(args); + if (args.length > 0 && cl.hasOption('v')){ - logger.error(UpdateUtils.getThisJarVersion()); + logger.info(UpdateUtils.getThisJarVersion()); System.exit(0); } - System.setProperty("apple.laf.useScreenMenuBar", "true"); - System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe"); - logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion()); - - if (args.length > 0) { - // CLI Mode + if (GraphicsEnvironment.isHeadless()) { handleArguments(args); } else { - // GUI Mode + if (SystemUtils.IS_OS_MAC_OSX) { + System.setProperty("apple.laf.useScreenMenuBar", "true"); + System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe"); + } + + Utils.configureLogger(); + + logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion()); + MainWindow mw = new MainWindow(); SwingUtilities.invokeLater(mw); } } + /** * Creates an abstract ripper and instructs it to rip. * @param url URL to be ripped @@ -80,20 +81,28 @@ public class App { */ private static void handleArguments(String[] args) { CommandLine cl = getArgs(args); - if (cl.hasOption('h')) { + + if (cl.hasOption('h') || args.length == 0) { HelpFormatter hf = new HelpFormatter(); hf.printHelp("java -jar ripme.jar [OPTIONS]", getOptions()); System.exit(0); } + + Utils.configureLogger(); + logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion()); + if (cl.hasOption('w')) { Utils.setConfigBoolean("file.overwrite", true); } + if (cl.hasOption('t')) { Utils.setConfigInteger("threads.size", Integer.parseInt(cl.getOptionValue('t'))); } + if (cl.hasOption('4')) { Utils.setConfigBoolean("errors.skip404", true); } + if (cl.hasOption('r')) { // Re-rip all via command-line List history = Utils.getConfigList("download.history"); @@ -115,6 +124,7 @@ public class App { // Exit System.exit(0); } + if (cl.hasOption('R')) { loadHistory(); if (HISTORY.toList().isEmpty()) { @@ -146,20 +156,25 @@ public class App { System.exit(-1); } } + if (cl.hasOption('d')) { Utils.setConfigBoolean("download.save_order", true); } + if (cl.hasOption('D')) { Utils.setConfigBoolean("download.save_order", false); } + if ((cl.hasOption('d'))&&(cl.hasOption('D'))) { logger.error("\nCannot specify '-d' and '-D' simultaneously"); System.exit(-1); } + if (cl.hasOption('l')) { // change the default rips directory Utils.setConfigString("rips.directory", cl.getOptionValue('l')); } + if (cl.hasOption('f')) { String filename = cl.getOptionValue('f'); try { @@ -175,6 +190,7 @@ public class App { logger.error("[!] Failed reading file containing list of URLs. Cannot continue."); } } + if (cl.hasOption('u')) { String url = cl.getOptionValue('u').trim(); ripURL(url, cl.hasOption("n")); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FacebookRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FacebookRipper.java new file mode 100644 index 00000000..acbf7a2e --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FacebookRipper.java @@ -0,0 +1,4 @@ +package com.rarchives.ripme.ripper.rippers; + +public class FacebookRipper { +} From b5abdb6ae057484b25c9724a1c6835e35d753eeb Mon Sep 17 00:00:00 2001 From: MrTimscampi Date: Fri, 30 Mar 2018 12:15:20 +0200 Subject: [PATCH 50/61] Removed empty file comitted by accident --- .../com/rarchives/ripme/ripper/rippers/FacebookRipper.java | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/FacebookRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FacebookRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FacebookRipper.java deleted file mode 100644 index acbf7a2e..00000000 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FacebookRipper.java +++ /dev/null @@ -1,4 +0,0 @@ -package com.rarchives.ripme.ripper.rippers; - -public class FacebookRipper { -} From 3716bbfb5414abfb30de77bfb19478b7d569d0e6 Mon Sep 17 00:00:00 2001 From: avesther Date: Mon, 2 Apr 2018 14:19:32 +0200 Subject: [PATCH 51/61] Fixes #410 Imgur - Cannot download albums with a single image in --- .../ripme/ripper/rippers/ImgurRipper.java | 138 +++++++++++------- .../tst/ripper/rippers/ImgurRipperTest.java | 16 +- 2 files changed, 100 insertions(+), 54 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index b595d9e2..fe7937d3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -36,6 +36,7 @@ public class ImgurRipper extends AlbumRipper { USER, USER_ALBUM, USER_IMAGES, + SINGLE_IMAGE, SERIES_OF_IMAGES, SUBREDDIT } @@ -155,34 +156,48 @@ public class ImgurRipper extends AlbumRipper { @Override public void rip() throws IOException { switch (albumType) { - case ALBUM: - // Fall-through - case USER_ALBUM: - logger.info("Album type is USER_ALBUM"); - // Don't call getAlbumTitle(this.url) with this - // as it seems to cause the album to be downloaded to a subdir. - ripAlbum(this.url); - break; - case SERIES_OF_IMAGES: - logger.info("Album type is SERIES_OF_IMAGES"); - ripAlbum(this.url); - break; - case USER: - logger.info("Album type is USER"); - ripUserAccount(url); - break; - case SUBREDDIT: - logger.info("Album type is SUBREDDIT"); - ripSubreddit(url); - break; - case USER_IMAGES: - logger.info("Album type is USER_IMAGES"); - ripUserImages(url); - break; + case ALBUM: + // Fall-through + case USER_ALBUM: + logger.info("Album type is USER_ALBUM"); + // Don't call getAlbumTitle(this.url) with this + // as it seems to cause the album to be downloaded to a subdir. + ripAlbum(this.url); + break; + case SERIES_OF_IMAGES: + logger.info("Album type is SERIES_OF_IMAGES"); + ripAlbum(this.url); + break; + case SINGLE_IMAGE: + logger.info("Album type is SINGLE_IMAGE"); + ripSingleImage(this.url); + break; + case USER: + logger.info("Album type is USER"); + ripUserAccount(url); + break; + case SUBREDDIT: + logger.info("Album type is SUBREDDIT"); + ripSubreddit(url); + break; + case USER_IMAGES: + logger.info("Album type is USER_IMAGES"); + ripUserImages(url); + break; } waitForThreads(); } + private void ripSingleImage(URL url) throws IOException { + String strUrl = url.toExternalForm(); + Document document = getDocument(strUrl); + Matcher m = getEmbeddedJsonMatcher(document); + if (m.matches()) { + JSONObject json = new JSONObject(m.group(1)).getJSONObject("image"); + addURLToDownload(extractImageUrlFromJson(json), ""); + } + } + private void ripAlbum(URL url) throws IOException { ripAlbum(url, ""); } @@ -257,38 +272,16 @@ public class ImgurRipper extends AlbumRipper { strUrl += "/all"; } logger.info(" Retrieving " + strUrl); - Document doc = Jsoup.connect(strUrl) - .userAgent(USER_AGENT) - .timeout(10 * 1000) - .maxBodySize(0) - .get(); - + Document doc = getDocument(strUrl); // Try to use embedded JSON to retrieve images - Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL); - Matcher m = p.matcher(doc.body().html()); + Matcher m = getEmbeddedJsonMatcher(doc); if (m.matches()) { try { - ImgurAlbum imgurAlbum = new ImgurAlbum(url); JSONObject json = new JSONObject(m.group(1)); - JSONArray images = json.getJSONObject("image") + JSONArray jsonImages = json.getJSONObject("image") .getJSONObject("album_images") .getJSONArray("images"); - int imagesLength = images.length(); - for (int i = 0; i < imagesLength; i++) { - JSONObject image = images.getJSONObject(i); - String ext = image.getString("ext"); - if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) { - ext = ".mp4"; - } - URL imageURL = new URL( - "http://i.imgur.com/" - + image.getString("hash") - + ext); - ImgurImage imgurImage = new ImgurImage(imageURL); - imgurImage.extension = ext; - imgurAlbum.addImage(imgurImage); - } - return imgurAlbum; + return createImgurAlbumFromJsonArray(url, jsonImages); } catch (JSONException e) { logger.debug("Error while parsing JSON at " + url + ", continuing", e); } @@ -330,6 +323,44 @@ public class ImgurRipper extends AlbumRipper { return imgurAlbum; } + private static Matcher getEmbeddedJsonMatcher(Document doc) { + Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL); + return p.matcher(doc.body().html()); + } + + private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException { + ImgurAlbum imgurAlbum = new ImgurAlbum(url); + int imagesLength = jsonImages.length(); + for (int i = 0; i < imagesLength; i++) { + JSONObject jsonImage = jsonImages.getJSONObject(i); + imgurAlbum.addImage(createImgurImageFromJson(jsonImage)); + } + return imgurAlbum; + } + + private static ImgurImage createImgurImageFromJson(JSONObject json) throws MalformedURLException { + return new ImgurImage(extractImageUrlFromJson(json)); + } + + private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException { + String ext = json.getString("ext"); + if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) { + ext = ".mp4"; + } + return new URL( + "http://i.imgur.com/" + + json.getString("hash") + + ext); + } + + private static Document getDocument(String strUrl) throws IOException { + return Jsoup.connect(strUrl) + .userAgent(USER_AGENT) + .timeout(10 * 1000) + .maxBodySize(0) + .get(); + } + /** * Rips all albums in an imgur user's account. * @param url @@ -507,6 +538,13 @@ public class ImgurRipper extends AlbumRipper { this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid); return "r_" + subreddit + "_" + gid; } + p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9]{5,})$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Single imgur image + albumType = ALBUM_TYPE.SINGLE_IMAGE; + return m.group(m.groupCount()); + } p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java index 46f5679f..c321a99e 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java @@ -1,13 +1,13 @@ package com.rarchives.ripme.tst.ripper.rippers; +import com.rarchives.ripme.ripper.rippers.ImgurRipper; +import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; + import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; -import com.rarchives.ripme.ripper.rippers.ImgurRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; - public class ImgurRipperTest extends RippersTest { public void testImgurURLFailures() throws IOException { @@ -17,7 +17,6 @@ public class ImgurRipperTest extends RippersTest { failURLs.add(new URL("http://imgur.com/")); failURLs.add(new URL("http://i.imgur.com")); failURLs.add(new URL("http://i.imgur.com/")); - failURLs.add(new URL("http://imgur.com/image")); failURLs.add(new URL("http://imgur.com/image.jpg")); failURLs.add(new URL("http://i.imgur.com/image.jpg")); for (URL url : failURLs) { @@ -50,6 +49,15 @@ public class ImgurRipperTest extends RippersTest { } } + public void testImgurSingleImage() throws IOException { + List contentURLs = new ArrayList<>(); + contentURLs.add(new URL("http://imgur.com/qbfcLyG")); // Single image URL + contentURLs.add(new URL("https://imgur.com/KexUO")); // Single image URL + for (URL url : contentURLs) { + ImgurRipper ripper = new ImgurRipper(url); + testRipper(ripper); + } + } public void testImgurAlbumWithMoreThan20Pictures() throws IOException { ImgurAlbum album = ImgurRipper.getImgurAlbum(new URL("http://imgur.com/a/HUMsq")); From 338edd18f9e194cfb1fd2718802443da86009cfa Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 2 Apr 2018 16:48:32 -0400 Subject: [PATCH 52/61] 1.7.29: Cano now download single images from imgur; Improved handling of headless mode & OS-specific config; Added modelx ripper; Fixed eroshae ripper --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 49b98ee0..842dbe01 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.28 + 1.7.29 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 5c3fc239..50040faf 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.28", + "latestVersion": "1.7.29", "changeList": [ + "1.7.29: Cano now download single images from imgur; Improved handling of headless mode & OS-specific config; Added modelx ripper; Fixed eroshae ripper", "1.7.28: IG ripper now uses display_url when downloading images; Reddit ripper now gets erome links; Hentaifoundry Ripper no longer errors out when there is no next page", "1.7.27: IG ripper can now rip from tags; fixed json parsing issues", "1.7.26: fixed instagram ripper", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 9a34cf98..ced70f44 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.28"; + private static final String DEFAULT_VERSION = "1.7.29"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 9e138f2aad0bd3a5b327d25275575160c3679255 Mon Sep 17 00:00:00 2001 From: MrTimscampi Date: Tue, 3 Apr 2018 12:29:04 +0200 Subject: [PATCH 53/61] Fix usage of command-line on non-headless systems --- src/main/java/com/rarchives/ripme/App.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/App.java b/src/main/java/com/rarchives/ripme/App.java index 9a6b1d09..67c44ab1 100644 --- a/src/main/java/com/rarchives/ripme/App.java +++ b/src/main/java/com/rarchives/ripme/App.java @@ -47,7 +47,7 @@ public class App { System.exit(0); } - if (GraphicsEnvironment.isHeadless()) { + if (GraphicsEnvironment.isHeadless() || args.length > 0) { handleArguments(args); } else { if (SystemUtils.IS_OS_MAC_OSX) { From 24f07523086414256400f6b41af7f58350ed996b Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 3 Apr 2018 10:45:10 -0400 Subject: [PATCH 54/61] 1.7.30: Fixed usage of command-line on non-headless systems --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 842dbe01..20564bb4 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.29 + 1.7.30 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 50040faf..78ba8967 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.29", + "latestVersion": "1.7.30", "changeList": [ + "1.7.30: Fixed usage of command-line on non-headless systems", "1.7.29: Cano now download single images from imgur; Improved handling of headless mode & OS-specific config; Added modelx ripper; Fixed eroshae ripper", "1.7.28: IG ripper now uses display_url when downloading images; Reddit ripper now gets erome links; Hentaifoundry Ripper no longer errors out when there is no next page", "1.7.27: IG ripper can now rip from tags; fixed json parsing issues", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index ced70f44..ed8b8d6e 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.29"; + private static final String DEFAULT_VERSION = "1.7.30"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 7a1d6263b175b70fa53fa525a21de6077b2c38d8 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 7 Apr 2018 19:42:51 -0400 Subject: [PATCH 55/61] InstagramRipper now send ig_pr=1 cookie when getting next page --- .../rarchives/ripme/ripper/rippers/InstagramRipper.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index b38c2c08..4b87f992 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -24,6 +24,7 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.utils.Utils; +import java.util.HashMap; public class InstagramRipper extends AbstractHTMLRipper { @@ -316,12 +317,15 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { Document toreturn; + java.util.Map cookies = new HashMap(); +// This shouldn't be hardcoded and will break one day + cookies.put("ig_pr", "1"); if (!nextPageID.equals("") && !isThisATest()) { if (rippingTag) { try { sleep(2500); toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + - "&variables={\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}").ignoreContentType().get(); + "&variables={\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}").cookies(cookies).ignoreContentType().get(); // Sleep for a while to avoid a ban logger.info(toreturn.html()); return toreturn; @@ -335,7 +339,7 @@ public class InstagramRipper extends AbstractHTMLRipper { // Sleep for a while to avoid a ban sleep(2500); toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + - "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}").ignoreContentType().get(); + "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}").cookies(cookies).ignoreContentType().get(); if (!pageHasImages(toreturn)) { throw new IOException("No more pages"); } From 23fee2779200020b876b93395b73e804d6a69f3f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 7 Apr 2018 20:00:14 -0400 Subject: [PATCH 56/61] 1.7.31: InstaGram ripper no longer errors out when getting next page --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 20564bb4..c6631f25 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.30 + 1.7.31 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 78ba8967..942daad4 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.30", + "latestVersion": "1.7.31", "changeList": [ + "1.7.31: InstaGram ripper no longer errors out when getting next page", "1.7.30: Fixed usage of command-line on non-headless systems", "1.7.29: Cano now download single images from imgur; Improved handling of headless mode & OS-specific config; Added modelx ripper; Fixed eroshae ripper", "1.7.28: IG ripper now uses display_url when downloading images; Reddit ripper now gets erome links; Hentaifoundry Ripper no longer errors out when there is no next page", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index ed8b8d6e..b02e7fed 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.30"; + private static final String DEFAULT_VERSION = "1.7.31"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 54f22eef3c4fd3056ce7fa5fed3ba9d266dbb89a Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 10 Apr 2018 18:23:31 -0400 Subject: [PATCH 57/61] Got getNextPage working again for non-tags --- .../ripme/ripper/rippers/InstagramRipper.java | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 4b87f992..e4e4e39f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -3,6 +3,7 @@ package com.rarchives.ripme.ripper.rippers; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; @@ -12,6 +13,7 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.security.*; import org.json.JSONArray; import org.json.JSONException; @@ -20,6 +22,7 @@ import org.json.JSONObject; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; +import org.jsoup.Connection; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ui.RipStatusMessage; @@ -34,6 +37,10 @@ public class InstagramRipper extends AbstractHTMLRipper { private String tagName; private String userID; + private String rhx_gis = null; + private String csrftoken; + + public InstagramRipper(URL url) throws IOException { super(url); @@ -178,7 +185,10 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { - Document p = Http.url(url).get(); + Connection.Response resp = Http.url(url).response(); + logger.info(resp.cookies()); + csrftoken = resp.cookie("csrftoken"); + Document p = resp.parse(); // Get the query hash so we can download the next page qHash = getQHash(p); return p; @@ -234,7 +244,10 @@ public class InstagramRipper extends AbstractHTMLRipper { logger.warn("Unable to exact json from page"); } - + // get the rhx_gis value so we can get the next page later on + if (rhx_gis == null) { + rhx_gis = json.getString("rhx_gis"); + } if (!url.toExternalForm().contains("/p/")) { JSONArray datas = new JSONArray(); if (!rippingTag) { @@ -314,12 +327,33 @@ public class InstagramRipper extends AbstractHTMLRipper { return imageURLs; } + private String getIGGis(String variables) { + String stringToMD5 = rhx_gis + ":" + csrftoken + ":" + USER_AGENT + ":" + variables; + logger.debug("String to md5 is \"" + stringToMD5 + "\""); + try { + byte[] bytesOfMessage = stringToMD5.getBytes("UTF-8"); + + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] hash = md.digest(bytesOfMessage); + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < hash.length; ++i) { + sb.append(Integer.toHexString((hash[i] & 0xFF) | 0x100).substring(1,3)); + } + return sb.toString(); + } catch(UnsupportedEncodingException e) { + return null; + } catch(NoSuchAlgorithmException e) { + return null; + } + } + @Override public Document getNextPage(Document doc) throws IOException { Document toreturn; java.util.Map cookies = new HashMap(); // This shouldn't be hardcoded and will break one day cookies.put("ig_pr", "1"); + cookies.put("csrftoken", csrftoken); if (!nextPageID.equals("") && !isThisATest()) { if (rippingTag) { try { @@ -338,8 +372,11 @@ public class InstagramRipper extends AbstractHTMLRipper { try { // Sleep for a while to avoid a ban sleep(2500); - toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + - "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}").cookies(cookies).ignoreContentType().get(); + String vars = "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}"; + String ig_gis = getIGGis(vars); + logger.info(ig_gis); + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars + ).header("x-instagram-gis", ig_gis).cookies(cookies).ignoreContentType().get(); if (!pageHasImages(toreturn)) { throw new IOException("No more pages"); } @@ -358,7 +395,6 @@ public class InstagramRipper extends AbstractHTMLRipper { } private boolean pageHasImages(Document doc) { - logger.info("BAD DATA: " + stripHTMLTags(doc.html())); JSONObject json = new JSONObject(stripHTMLTags(doc.html())); int numberOfImages = json.getJSONObject("data").getJSONObject("user") .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length(); From 73478344d8d5c6f43b55d134e727b6d8dc48452e Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 10 Apr 2018 18:28:13 -0400 Subject: [PATCH 58/61] Got getNextPage working again for tags --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index e4e4e39f..3f1433d0 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -358,8 +358,10 @@ public class InstagramRipper extends AbstractHTMLRipper { if (rippingTag) { try { sleep(2500); + String vars = "{\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}"; + String ig_gis = getIGGis(vars); toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + - "&variables={\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}").cookies(cookies).ignoreContentType().get(); + "&variables=" + vars).header("x-instagram-gis", ig_gis).cookies(cookies).ignoreContentType().get(); // Sleep for a while to avoid a ban logger.info(toreturn.html()); return toreturn; From 6b4d01af1d8f9c07413fb40c96047fda36a2488e Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 10 Apr 2018 18:52:52 -0400 Subject: [PATCH 59/61] 1.7.32: Instagram ripper update to use new enpoints --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index c6631f25..7e7b1c05 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.31 + 1.7.32 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 942daad4..6770542a 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.31", + "latestVersion": "1.7.32", "changeList": [ + "1.7.32: Instagram ripper update to use new enpoints", "1.7.31: InstaGram ripper no longer errors out when getting next page", "1.7.30: Fixed usage of command-line on non-headless systems", "1.7.29: Cano now download single images from imgur; Improved handling of headless mode & OS-specific config; Added modelx ripper; Fixed eroshae ripper", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index b02e7fed..8a05aaa4 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.31"; + private static final String DEFAULT_VERSION = "1.7.32"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 2871805b5da83ee73a33aa42941f48962637eda6 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 13 Apr 2018 05:45:22 -0400 Subject: [PATCH 60/61] Instagram ripper no longer errors out when downloading from more than 1 page --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 3f1433d0..12842aa8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -328,7 +328,7 @@ public class InstagramRipper extends AbstractHTMLRipper { } private String getIGGis(String variables) { - String stringToMD5 = rhx_gis + ":" + csrftoken + ":" + USER_AGENT + ":" + variables; + String stringToMD5 = rhx_gis + ":" + csrftoken + ":" + variables; logger.debug("String to md5 is \"" + stringToMD5 + "\""); try { byte[] bytesOfMessage = stringToMD5.getBytes("UTF-8"); From fd0db7e2424348345d1d3fb18d942826417229ed Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 13 Apr 2018 05:49:47 -0400 Subject: [PATCH 61/61] 1.7.33: Instagram ripper no longer errors out when downloading from more than 1 page --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 7e7b1c05..6b7084c8 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.32 + 1.7.33 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 6770542a..8dc6e2cf 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.32", + "latestVersion": "1.7.33", "changeList": [ + "1.7.33: Instagram ripper no longer errors out when downloading from more than 1 page", "1.7.32: Instagram ripper update to use new enpoints", "1.7.31: InstaGram ripper no longer errors out when getting next page", "1.7.30: Fixed usage of command-line on non-headless systems", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 8a05aaa4..e1d579a4 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.32"; + private static final String DEFAULT_VERSION = "1.7.33"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar";