diff --git a/build.sh b/build.sh index 7e7c3221..a3ec0242 100755 --- a/build.sh +++ b/build.sh @@ -1 +1,2 @@ +#!/usr/bin/env bash mvn clean compile assembly:single \ No newline at end of file diff --git a/docs/options.md b/docs/options.md deleted file mode 100644 index d972561b..00000000 --- a/docs/options.md +++ /dev/null @@ -1,40 +0,0 @@ -file.overwrite | bool | If true ripme will overwrite existing files rather than skip them - -clipboard.autorip | bool | If true ripme will try to download any links in the clip board - -error.skip404 | bool | Don't retry on 404 errors - -download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download - -auto.update | bool | If true ripme will auto-update every time it's started - -play.sound | bool | If true ripme will play a sound every time a rip finishes - -download.show_popup| bool | TODO figure out what this is for - -log.save | bool | If true ripme will save it's logs - -urls_only.save | bool | If true ripme will save all urls to a text file and download no files - -album_titles.save | bool | Currently does nothing - -prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format - -download.timeout | int | File download timeout (in milliseconds) - -page.timeout | int | Page download timeout (in milliseconds) - -download.max_size | int | Maximum size of downloaded files in bytes - -threads.size | int | The number of threads to use - -twitter.auth | String | Twitter API key (Base64'd) - -tumblr.auth | String | Tumblr API key - -log.level | String | The debug log level (Example: Log level: Debug) - -gw.api | String | TODO figure out what this is for - -twitter.max_requests | int | TODO figure out what this is for - diff --git a/pom.xml b/pom.xml index 476068b2..547d26e9 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.19 + 1.7.23 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index de0dc0a6..bc2acca8 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,10 @@ { - "latestVersion": "1.7.19", + "latestVersion": "1.7.23", "changeList": [ + "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", + "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", + "1.7.21: Fixed last seen feature", + "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", "1.7.19: imgurRipper no longer tries to add ?1 to file names", "1.7.18: AlbumRipper now removes bad chars from file names", "1.7.17: Fixed hentai.cafe autorip from clipboard", diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index e71eb389..e0fd3548 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { Document doc = getFirstPage(); while (doc != null) { + if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) { + sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); + break; + } List imageURLs = getURLsFromPage(doc); + // If hasASAPRipping() returns true then the ripper will handle downloading the files + // if not it's done in the following block of code if (!hasASAPRipping()) { // Remove all but 1 image if (isThisATest()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index edddea78..ff6b4102 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -44,7 +44,8 @@ public abstract class AbstractRipper public abstract String getHost(); public abstract String getGID(URL url) throws MalformedURLException; public boolean hasASAPRipping() { return false; } - + // Everytime addUrlToDownload skips a already downloaded url this increases by 1 + public int alreadyDownloadedUrls = 0; private boolean shouldStop = false; private boolean thisIsATest = false; @@ -60,7 +61,13 @@ public abstract class AbstractRipper } } + + /** + * Adds a URL to the url history file + * @param downloadedURL URL to check if downloaded + */ private void writeDownloadedURL(String downloadedURL) throws IOException { + downloadedURL = normalizeUrl(downloadedURL); BufferedWriter bw = null; FileWriter fw = null; try { @@ -85,6 +92,15 @@ public abstract class AbstractRipper } } } + + + /** + * Normalize a URL + * @param url URL to check if downloaded + */ + public String normalizeUrl(String url) { + return url; + } /** * Checks to see if Ripme has already downloaded a URL @@ -95,6 +111,7 @@ public abstract class AbstractRipper */ private boolean hasDownloadedURL(String url) { File file = new File(URLHistoryFile); + url = normalizeUrl(url); try { Scanner scanner = new Scanner(file); while (scanner.hasNextLine()) { @@ -194,9 +211,11 @@ public abstract class AbstractRipper * False if failed to download */ protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + // Don't re-add the url if it was downloaded in a previous rip if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) { if (hasDownloadedURL(url.toExternalForm())) { sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm()); + alreadyDownloadedUrls += 1; return false; } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index b7b5658f..43873cf9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$"); + Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$"); Matcher m = p.matcher(url.toExternalForm()); if (!m.matches()) { throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url); @@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { Elements pageImages = page.getElementsByClass("c-tile"); for (Element thumb : pageImages) { // If true this link is a sub album - if (thumb.attr("href").contains("/comix/album/")) { + if (thumb.attr("href").contains("/comics/album/")) { String subUrl = "https://www.8muses.com" + thumb.attr("href"); try { logger.info("Retrieving " + subUrl); @@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.warn("Error while loading subalbum " + subUrl, e); } - } else if (thumb.attr("href").contains("/comix/picture/")) { + } else if (thumb.attr("href").contains("/comics/picture/")) { + logger.info("This page is a album"); logger.info("Ripping image"); if (super.isStopped()) break; // Find thumbnail image source diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 076fcfc6..ab44edfd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { + String nextPageID = ""; private String userID; @@ -52,6 +53,12 @@ public class InstagramRipper extends AbstractHTMLRipper { return san_url; } + @Override + public String normalizeUrl(String url) { + // Remove the date sig from the url + return url.replaceAll("/[A-Z0-9]{8}/", "/"); + } + private List getPostsFromSinglePage(Document Doc) { List imageURLs = new ArrayList<>(); JSONArray datas; @@ -192,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document doc) { - String nextPageID = ""; List imageURLs = new ArrayList<>(); JSONObject json = new JSONObject(); try { @@ -255,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper { break; } } - // Rip the next page - if (!nextPageID.equals("") && !isThisATest()) { - if (url.toExternalForm().contains("/tags/")) { - try { - // Sleep for a while to avoid a ban - sleep(2500); - if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get()); - } else { - getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get()); - } - } catch (IOException e) { - return imageURLs; - } - - } - try { - // Sleep for a while to avoid a ban - sleep(2500); - getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get()); - } catch (IOException e) { - return imageURLs; - } - } else { - logger.warn("Can't get net page"); - } } else { // We're ripping from a single page logger.info("Ripping from single page"); imageURLs = getPostsFromSinglePage(doc); @@ -290,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper { return imageURLs; } + @Override + public Document getNextPage(Document doc) throws IOException { + Document toreturn; + if (!nextPageID.equals("") && !isThisATest()) { + if (url.toExternalForm().contains("/tags/")) { + try { + // Sleep for a while to avoid a ban + sleep(2500); + if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { + toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + } else { + toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + } + logger.info(toreturn.html()); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; + + } catch (IOException e) { + throw new IOException("No more pages"); + } + + } + try { + // Sleep for a while to avoid a ban + sleep(2500); + toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; + } catch (IOException e) { + return null; + } + } else { + throw new IOException("No more pages"); + } + } + @Override public void downloadURL(URL url, int index) { addURLToDownload(url); } + private boolean hasImage(Document doc) { + try { + JSONObject json = getJSONFromPage(doc); + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); + logger.info(datas.length()); + if (datas.length() == 0) { + return false; + } + return true; + } catch (IOException e) { + return false; + } + + } + } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java index 06841ce9..4f2bac97 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java @@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper { public void rip() throws IOException { logger.info(" Retrieving " + this.url); Document doc = Http.url(this.url).get(); - Elements embeds = doc.select("embed"); - if (embeds.size() == 0) { - throw new IOException("Could not find Embed code at " + url); - } - Element embed = embeds.get(0); - String vars = embed.attr("flashvars"); - for (String var : vars.split("&")) { - if (var.startsWith("flv_url=")) { - String vidUrl = var.substring("flv_url=".length()); - vidUrl = URLDecoder.decode(vidUrl, "UTF-8"); - addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); + Elements scripts = doc.select("script"); + for (Element e : scripts) { + if (e.html().contains("html5player.setVideoUrlHigh")) { + logger.info("Found the right script"); + String[] lines = e.html().split("\n"); + for (String line: lines) { + if (line.contains("html5player.setVideoUrlHigh")) { + String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", ""); + addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url)); + waitForThreads(); + return; + } + } } } - waitForThreads(); + throw new IOException("Unable to find video url at " + this.url.toExternalForm()); } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 37ce6cfe..318eec4c 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.19"; + private static final String DEFAULT_VERSION = "1.7.23"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java index 469c330a..4a6c3539 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java @@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest { // A simple image album EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); testRipper(ripper); + // Test the new url format + ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); + testRipper(ripper); // Test pages with subalbums ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor")); testRipper(ripper); diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java index 538d493c..214220b8 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java @@ -1,13 +1,15 @@ -package com.rarchives.ripme.tst.ripper.rippers; +//package com.rarchives.ripme.tst.ripper.rippers; +// +//import java.io.IOException; +//import java.net.URL; +// +//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper; +// +//public class FivehundredpxRipperTest extends RippersTest { +// public void test500pxAlbum() throws IOException { +// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman")); +// testRipper(ripper); +// } +//} -import java.io.IOException; -import java.net.URL; - -import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper; - -public class FivehundredpxRipperTest extends RippersTest { - public void test500pxAlbum() throws IOException { - FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman")); - testRipper(ripper); - } -} +// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438 \ No newline at end of file