From 970c181ca51e681c4b32a210e6b2e00a28c498fe Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 11 Dec 2017 09:28:21 -0500 Subject: [PATCH 01/19] Revert "Added basic docs for some of ripmes options" --- docs/options.md | 40 ---------------------------------------- 1 file changed, 40 deletions(-) delete mode 100644 docs/options.md diff --git a/docs/options.md b/docs/options.md deleted file mode 100644 index d972561b..00000000 --- a/docs/options.md +++ /dev/null @@ -1,40 +0,0 @@ -file.overwrite | bool | If true ripme will overwrite existing files rather than skip them - -clipboard.autorip | bool | If true ripme will try to download any links in the clip board - -error.skip404 | bool | Don't retry on 404 errors - -download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download - -auto.update | bool | If true ripme will auto-update every time it's started - -play.sound | bool | If true ripme will play a sound every time a rip finishes - -download.show_popup| bool | TODO figure out what this is for - -log.save | bool | If true ripme will save it's logs - -urls_only.save | bool | If true ripme will save all urls to a text file and download no files - -album_titles.save | bool | Currently does nothing - -prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format - -download.timeout | int | File download timeout (in milliseconds) - -page.timeout | int | Page download timeout (in milliseconds) - -download.max_size | int | Maximum size of downloaded files in bytes - -threads.size | int | The number of threads to use - -twitter.auth | String | Twitter API key (Base64'd) - -tumblr.auth | String | Tumblr API key - -log.level | String | The debug log level (Example: Log level: Debug) - -gw.api | String | TODO figure out what this is for - -twitter.max_requests | int | TODO figure out what this is for - From cf414e1d44d6546e3390fd0f2122296135b9e5c4 Mon Sep 17 00:00:00 2001 From: Gaboso Date: Mon, 19 Feb 2018 21:34:51 -0400 Subject: [PATCH 02/19] Added shebang line to build.sh --- build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build.sh b/build.sh index 7e7c3221..a3ec0242 100755 --- a/build.sh +++ b/build.sh @@ -1 +1,2 @@ +#!/usr/bin/env bash mvn clean compile assembly:single \ No newline at end of file From 66739e4c7cba2b049dd01b554947ac6c705a8fca Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 18:10:30 -0500 Subject: [PATCH 03/19] Implemented a last seen feature; Added comments --- src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index edddea78..edcff83d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -44,7 +44,8 @@ public abstract class AbstractRipper public abstract String getHost(); public abstract String getGID(URL url) throws MalformedURLException; public boolean hasASAPRipping() { return false; } - + // Everytime addUrlToDownload skips a already downloaded url this increases by 1 + public int alreadyDownloadedUrls = 0; private boolean shouldStop = false; private boolean thisIsATest = false; @@ -194,9 +195,11 @@ public abstract class AbstractRipper * False if failed to download */ protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + // Don't re-add the url if it was downloaded in a previous rip if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) { if (hasDownloadedURL(url.toExternalForm())) { sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm()); + alreadyDownloadedUrls += 1; return false; } } From 8c86377a7faccbb941b82802e295a983b925251d Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 18:10:38 -0500 Subject: [PATCH 04/19] Implemented a last seen feature; Added comments --- .../com/rarchives/ripme/ripper/AbstractHTMLRipper.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index e71eb389..d49c3292 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -68,7 +68,14 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { Document doc = getFirstPage(); while (doc != null) { + logger.debug("alreadyDownloadedUrls is " + alreadyDownloadedUrls); + if (alreadyDownloadedUrls >= Utils.getConfigInteger("skip_after_already_seen", -1) && !isThisATest()) { + sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); + break; + } List imageURLs = getURLsFromPage(doc); + // If hasASAPRipping() returns true then the ripper will handle downloading the files + // if not it's done in the following block of code if (!hasASAPRipping()) { // Remove all but 1 image if (isThisATest()) { From c0a0b9dce809c40efda7aa5f0f3c5cf559a68c1b Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 19:51:28 -0500 Subject: [PATCH 05/19] Removed debug statement; Changed option name --- .../java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index d49c3292..49a0fc7e 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -68,8 +68,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { Document doc = getFirstPage(); while (doc != null) { - logger.debug("alreadyDownloadedUrls is " + alreadyDownloadedUrls); - if (alreadyDownloadedUrls >= Utils.getConfigInteger("skip_after_already_seen", -1) && !isThisATest()) { + if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", -1) && !isThisATest()) { sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); break; } From 850be1ed1b2e0b8cbc4846a9ae91c62fb3b44445 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 19:54:36 -0500 Subject: [PATCH 06/19] Disabled FivehundredpxRipper test --- .../rippers/FivehundredpxRipperTest.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java index 538d493c..214220b8 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java @@ -1,13 +1,15 @@ -package com.rarchives.ripme.tst.ripper.rippers; +//package com.rarchives.ripme.tst.ripper.rippers; +// +//import java.io.IOException; +//import java.net.URL; +// +//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper; +// +//public class FivehundredpxRipperTest extends RippersTest { +// public void test500pxAlbum() throws IOException { +// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman")); +// testRipper(ripper); +// } +//} -import java.io.IOException; -import java.net.URL; - -import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper; - -public class FivehundredpxRipperTest extends RippersTest { - public void test500pxAlbum() throws IOException { - FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman")); - testRipper(ripper); - } -} +// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438 \ No newline at end of file From cac3ca161525a28fb12a08a645f399eda500bff5 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 20:22:07 -0500 Subject: [PATCH 07/19] Fixed 8muses --- .../com/rarchives/ripme/ripper/rippers/EightmusesRipper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index b7b5658f..621a4786 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { Elements pageImages = page.getElementsByClass("c-tile"); for (Element thumb : pageImages) { // If true this link is a sub album - if (thumb.attr("href").contains("/comix/album/")) { + if (thumb.attr("href").contains("/comics/album/")) { String subUrl = "https://www.8muses.com" + thumb.attr("href"); try { logger.info("Retrieving " + subUrl); @@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.warn("Error while loading subalbum " + subUrl, e); } - } else if (thumb.attr("href").contains("/comix/picture/")) { + } else if (thumb.attr("href").contains("/comics/picture/")) { + logger.info("This page is a album"); logger.info("Ripping image"); if (super.isStopped()) break; // Find thumbnail image source From c965de94929799f3326d5779774613de51fde409 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 20:26:06 -0500 Subject: [PATCH 08/19] Updated regex --- .../com/rarchives/ripme/ripper/rippers/EightmusesRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 621a4786..43873cf9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$"); + Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$"); Matcher m = p.matcher(url.toExternalForm()); if (!m.matches()) { throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url); From a5f08ff7c1b728af54e9d619cc48f3e66a35d65f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 20:37:36 -0500 Subject: [PATCH 09/19] Added unit test for new url format --- .../ripme/tst/ripper/rippers/EightmusesRipperTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java index 469c330a..4a6c3539 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java @@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest { // A simple image album EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); testRipper(ripper); + // Test the new url format + ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); + testRipper(ripper); // Test pages with subalbums ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor")); testRipper(ripper); From af2c5f3fe09e3446aae75047daa6232ff2e39f90 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 21:02:22 -0500 Subject: [PATCH 10/19] 1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 476068b2..dbf36b0d 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.19 + 1.7.20 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index de0dc0a6..f691644e 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.19", + "latestVersion": "1.7.20", "changeList": [ + "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", "1.7.19: imgurRipper no longer tries to add ?1 to file names", "1.7.18: AlbumRipper now removes bad chars from file names", "1.7.17: Fixed hentai.cafe autorip from clipboard", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 37ce6cfe..57a30407 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.19"; + private static final String DEFAULT_VERSION = "1.7.20"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 34e27cd2facaba6016e228afc6e717e59d8fca6a Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 21:14:35 -0500 Subject: [PATCH 11/19] Changed default value for history.end_rip_after_already_seen --- .../java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 49a0fc7e..e0fd3548 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -68,7 +68,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { Document doc = getFirstPage(); while (doc != null) { - if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", -1) && !isThisATest()) { + if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) { sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); break; } From 9f922e79650a7f03818f70b4170bc84771f64c43 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 25 Feb 2018 21:23:31 -0500 Subject: [PATCH 12/19] 1.7.21: Fixed last seen feature --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index dbf36b0d..f732d378 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.20 + 1.7.21 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index f691644e..bfb0354f 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.20", + "latestVersion": "1.7.21", "changeList": [ + "1.7.21: Fixed last seen feature", "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", "1.7.19: imgurRipper no longer tries to add ?1 to file names", "1.7.18: AlbumRipper now removes bad chars from file names", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 57a30407..27b42258 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.20"; + private static final String DEFAULT_VERSION = "1.7.21"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From e6c43bb48210f55d59725b7158a29adbaa3d977d Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 5 Mar 2018 13:16:19 -0500 Subject: [PATCH 13/19] Added the normalizeUrl func, which allows a ripper to normalize a url before adding it to url histroy/check if its in url history --- .../rarchives/ripme/ripper/AbstractRipper.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index edcff83d..ff6b4102 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -61,7 +61,13 @@ public abstract class AbstractRipper } } + + /** + * Adds a URL to the url history file + * @param downloadedURL URL to check if downloaded + */ private void writeDownloadedURL(String downloadedURL) throws IOException { + downloadedURL = normalizeUrl(downloadedURL); BufferedWriter bw = null; FileWriter fw = null; try { @@ -86,6 +92,15 @@ public abstract class AbstractRipper } } } + + + /** + * Normalize a URL + * @param url URL to check if downloaded + */ + public String normalizeUrl(String url) { + return url; + } /** * Checks to see if Ripme has already downloaded a URL @@ -96,6 +111,7 @@ public abstract class AbstractRipper */ private boolean hasDownloadedURL(String url) { File file = new File(URLHistoryFile); + url = normalizeUrl(url); try { Scanner scanner = new Scanner(file); while (scanner.hasNextLine()) { From 48ffcf68d37d9bce2e866b4456af8f65dd8f3fdf Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 5 Mar 2018 13:20:12 -0500 Subject: [PATCH 14/19] Added normalizeUrl --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 076fcfc6..d1f16535 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -52,6 +52,12 @@ public class InstagramRipper extends AbstractHTMLRipper { return san_url; } + @Override + public String normalizeUrl(String url) { + // Remove the date sig from the url + return url.replaceAll("/[A-Z0-9]{8}/", "/"); + } + private List getPostsFromSinglePage(Document Doc) { List imageURLs = new ArrayList<>(); JSONArray datas; From a54b71065c6eda9272510b755d8304a4db04c72f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 6 Mar 2018 06:58:21 -0500 Subject: [PATCH 15/19] 1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index f732d378..9859442c 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.21 + 1.7.22 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index bfb0354f..33c2848c 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.21", + "latestVersion": "1.7.22", "changeList": [ + "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", "1.7.21: Fixed last seen feature", "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", "1.7.19: imgurRipper no longer tries to add ?1 to file names", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 27b42258..e198d672 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.21"; + private static final String DEFAULT_VERSION = "1.7.22"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From da8d6541f55cfefab56f074d22d73a2cac9799c9 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 8 Mar 2018 07:33:35 -0500 Subject: [PATCH 16/19] Fixed xvideos ripper --- .../ripper/rippers/video/XvideosRipper.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java index 06841ce9..4f2bac97 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java @@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper { public void rip() throws IOException { logger.info(" Retrieving " + this.url); Document doc = Http.url(this.url).get(); - Elements embeds = doc.select("embed"); - if (embeds.size() == 0) { - throw new IOException("Could not find Embed code at " + url); - } - Element embed = embeds.get(0); - String vars = embed.attr("flashvars"); - for (String var : vars.split("&")) { - if (var.startsWith("flv_url=")) { - String vidUrl = var.substring("flv_url=".length()); - vidUrl = URLDecoder.decode(vidUrl, "UTF-8"); - addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); + Elements scripts = doc.select("script"); + for (Element e : scripts) { + if (e.html().contains("html5player.setVideoUrlHigh")) { + logger.info("Found the right script"); + String[] lines = e.html().split("\n"); + for (String line: lines) { + if (line.contains("html5player.setVideoUrlHigh")) { + String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", ""); + addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url)); + waitForThreads(); + return; + } + } } } - waitForThreads(); + throw new IOException("Unable to find video url at " + this.url.toExternalForm()); } } \ No newline at end of file From 4af469189321057fef03c167f0177d2086d3086c Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 13:36:53 -0500 Subject: [PATCH 17/19] lastseen feature now works with instagram --- .../ripme/ripper/rippers/InstagramRipper.java | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index d1f16535..364b645c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { + String nextPageID = ""; private String userID; @@ -198,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document doc) { - String nextPageID = ""; List imageURLs = new ArrayList<>(); JSONObject json = new JSONObject(); try { @@ -261,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper { break; } } - // Rip the next page - if (!nextPageID.equals("") && !isThisATest()) { - if (url.toExternalForm().contains("/tags/")) { - try { - // Sleep for a while to avoid a ban - sleep(2500); - if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get()); - } else { - getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get()); - } - } catch (IOException e) { - return imageURLs; - } - - } - try { - // Sleep for a while to avoid a ban - sleep(2500); - getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get()); - } catch (IOException e) { - return imageURLs; - } - } else { - logger.warn("Can't get net page"); - } } else { // We're ripping from a single page logger.info("Ripping from single page"); imageURLs = getPostsFromSinglePage(doc); @@ -296,6 +270,37 @@ public class InstagramRipper extends AbstractHTMLRipper { return imageURLs; } + @Override + public Document getNextPage(Document doc) { + if (!nextPageID.equals("") && !isThisATest()) { + if (url.toExternalForm().contains("/tags/")) { + try { + // Sleep for a while to avoid a ban + sleep(2500); + if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { + return Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + } else { + return Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + } + + } catch (IOException e) { + return null; + } + + } + try { + // Sleep for a while to avoid a ban + sleep(2500); + return Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + } catch (IOException e) { + return null; + } + } else { + logger.warn("Can't get net page"); + } + return null; + } + @Override public void downloadURL(URL url, int index) { addURLToDownload(url); From ecf7a4b623605cb071f0e8299c9d51f09461e716 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 18:06:01 -0500 Subject: [PATCH 18/19] IG ripper now no longer errors out on last page --- .../ripme/ripper/rippers/InstagramRipper.java | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 364b645c..ab44edfd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -271,34 +271,43 @@ public class InstagramRipper extends AbstractHTMLRipper { } @Override - public Document getNextPage(Document doc) { + public Document getNextPage(Document doc) throws IOException { + Document toreturn; if (!nextPageID.equals("") && !isThisATest()) { if (url.toExternalForm().contains("/tags/")) { try { // Sleep for a while to avoid a ban sleep(2500); if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - return Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); } else { - return Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); } + logger.info(toreturn.html()); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; } catch (IOException e) { - return null; + throw new IOException("No more pages"); } } try { // Sleep for a while to avoid a ban sleep(2500); - return Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; } catch (IOException e) { return null; } } else { - logger.warn("Can't get net page"); + throw new IOException("No more pages"); } - return null; } @Override @@ -306,4 +315,20 @@ public class InstagramRipper extends AbstractHTMLRipper { addURLToDownload(url); } + private boolean hasImage(Document doc) { + try { + JSONObject json = getJSONFromPage(doc); + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); + logger.info(datas.length()); + if (datas.length() == 0) { + return false; + } + return true; + } catch (IOException e) { + return false; + } + + } + } From a4880ecf804fb91a0c559e38351556cb0641e20f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 10 Mar 2018 12:33:31 -0500 Subject: [PATCH 19/19] 1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 9859442c..547d26e9 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.22 + 1.7.23 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 33c2848c..bc2acca8 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.22", + "latestVersion": "1.7.23", "changeList": [ + "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", "1.7.21: Fixed last seen feature", "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index e198d672..318eec4c 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.22"; + private static final String DEFAULT_VERSION = "1.7.23"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar";