diff --git a/build.sh b/build.sh
index 7e7c3221..a3ec0242 100755
--- a/build.sh
+++ b/build.sh
@@ -1 +1,2 @@
+#!/usr/bin/env bash
mvn clean compile assembly:single
\ No newline at end of file
diff --git a/docs/options.md b/docs/options.md
deleted file mode 100644
index d972561b..00000000
--- a/docs/options.md
+++ /dev/null
@@ -1,40 +0,0 @@
-file.overwrite | bool | If true ripme will overwrite existing files rather than skip them
-
-clipboard.autorip | bool | If true ripme will try to download any links in the clip board
-
-error.skip404 | bool | Don't retry on 404 errors
-
-download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
-
-auto.update | bool | If true ripme will auto-update every time it's started
-
-play.sound | bool | If true ripme will play a sound every time a rip finishes
-
-download.show_popup| bool | TODO figure out what this is for
-
-log.save | bool | If true ripme will save it's logs
-
-urls_only.save | bool | If true ripme will save all urls to a text file and download no files
-
-album_titles.save | bool | Currently does nothing
-
-prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format
-
-download.timeout | int | File download timeout (in milliseconds)
-
-page.timeout | int | Page download timeout (in milliseconds)
-
-download.max_size | int | Maximum size of downloaded files in bytes
-
-threads.size | int | The number of threads to use
-
-twitter.auth | String | Twitter API key (Base64'd)
-
-tumblr.auth | String | Tumblr API key
-
-log.level | String | The debug log level (Example: Log level: Debug)
-
-gw.api | String | TODO figure out what this is for
-
-twitter.max_requests | int | TODO figure out what this is for
-
diff --git a/pom.xml b/pom.xml
index 476068b2..547d26e9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
com.rarchives.ripme
ripme
jar
- 1.7.19
+ 1.7.23
ripme
http://rip.rarchives.com
diff --git a/ripme.json b/ripme.json
index de0dc0a6..bc2acca8 100644
--- a/ripme.json
+++ b/ripme.json
@@ -1,6 +1,10 @@
{
- "latestVersion": "1.7.19",
+ "latestVersion": "1.7.23",
"changeList": [
+ "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature",
+ "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram",
+ "1.7.21: Fixed last seen feature",
+ "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test",
"1.7.19: imgurRipper no longer tries to add ?1 to file names",
"1.7.18: AlbumRipper now removes bad chars from file names",
"1.7.17: Fixed hentai.cafe autorip from clipboard",
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
index e71eb389..e0fd3548 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
Document doc = getFirstPage();
while (doc != null) {
+ if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
+ sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
+ break;
+ }
List imageURLs = getURLsFromPage(doc);
+ // If hasASAPRipping() returns true then the ripper will handle downloading the files
+ // if not it's done in the following block of code
if (!hasASAPRipping()) {
// Remove all but 1 image
if (isThisATest()) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
index edddea78..ff6b4102 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@@ -44,7 +44,8 @@ public abstract class AbstractRipper
public abstract String getHost();
public abstract String getGID(URL url) throws MalformedURLException;
public boolean hasASAPRipping() { return false; }
-
+ // Everytime addUrlToDownload skips a already downloaded url this increases by 1
+ public int alreadyDownloadedUrls = 0;
private boolean shouldStop = false;
private boolean thisIsATest = false;
@@ -60,7 +61,13 @@ public abstract class AbstractRipper
}
}
+
+ /**
+ * Adds a URL to the url history file
+ * @param downloadedURL URL to check if downloaded
+ */
private void writeDownloadedURL(String downloadedURL) throws IOException {
+ downloadedURL = normalizeUrl(downloadedURL);
BufferedWriter bw = null;
FileWriter fw = null;
try {
@@ -85,6 +92,15 @@ public abstract class AbstractRipper
}
}
}
+
+
+ /**
+ * Normalize a URL
+ * @param url URL to check if downloaded
+ */
+ public String normalizeUrl(String url) {
+ return url;
+ }
/**
* Checks to see if Ripme has already downloaded a URL
@@ -95,6 +111,7 @@ public abstract class AbstractRipper
*/
private boolean hasDownloadedURL(String url) {
File file = new File(URLHistoryFile);
+ url = normalizeUrl(url);
try {
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
@@ -194,9 +211,11 @@ public abstract class AbstractRipper
* False if failed to download
*/
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) {
+ // Don't re-add the url if it was downloaded in a previous rip
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
if (hasDownloadedURL(url.toExternalForm())) {
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
+ alreadyDownloadedUrls += 1;
return false;
}
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
index b7b5658f..43873cf9 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
@@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
@Override
public String getGID(URL url) throws MalformedURLException {
- Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$");
+ Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) {
throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
@@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
Elements pageImages = page.getElementsByClass("c-tile");
for (Element thumb : pageImages) {
// If true this link is a sub album
- if (thumb.attr("href").contains("/comix/album/")) {
+ if (thumb.attr("href").contains("/comics/album/")) {
String subUrl = "https://www.8muses.com" + thumb.attr("href");
try {
logger.info("Retrieving " + subUrl);
@@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
logger.warn("Error while loading subalbum " + subUrl, e);
}
- } else if (thumb.attr("href").contains("/comix/picture/")) {
+ } else if (thumb.attr("href").contains("/comics/picture/")) {
+ logger.info("This page is a album");
logger.info("Ripping image");
if (super.isStopped()) break;
// Find thumbnail image source
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
index 076fcfc6..ab44edfd 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils;
public class InstagramRipper extends AbstractHTMLRipper {
+ String nextPageID = "";
private String userID;
@@ -52,6 +53,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
return san_url;
}
+ @Override
+ public String normalizeUrl(String url) {
+ // Remove the date sig from the url
+ return url.replaceAll("/[A-Z0-9]{8}/", "/");
+ }
+
private List getPostsFromSinglePage(Document Doc) {
List imageURLs = new ArrayList<>();
JSONArray datas;
@@ -192,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper {
@Override
public List getURLsFromPage(Document doc) {
- String nextPageID = "";
List imageURLs = new ArrayList<>();
JSONObject json = new JSONObject();
try {
@@ -255,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
break;
}
}
- // Rip the next page
- if (!nextPageID.equals("") && !isThisATest()) {
- if (url.toExternalForm().contains("/tags/")) {
- try {
- // Sleep for a while to avoid a ban
- sleep(2500);
- if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
- getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
- } else {
- getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
- }
- } catch (IOException e) {
- return imageURLs;
- }
-
- }
- try {
- // Sleep for a while to avoid a ban
- sleep(2500);
- getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
- } catch (IOException e) {
- return imageURLs;
- }
- } else {
- logger.warn("Can't get net page");
- }
} else { // We're ripping from a single page
logger.info("Ripping from single page");
imageURLs = getPostsFromSinglePage(doc);
@@ -290,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper {
return imageURLs;
}
+ @Override
+ public Document getNextPage(Document doc) throws IOException {
+ Document toreturn;
+ if (!nextPageID.equals("") && !isThisATest()) {
+ if (url.toExternalForm().contains("/tags/")) {
+ try {
+ // Sleep for a while to avoid a ban
+ sleep(2500);
+ if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
+ toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
+ } else {
+ toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
+ }
+ logger.info(toreturn.html());
+ if (!hasImage(toreturn)) {
+ throw new IOException("No more pages");
+ }
+ return toreturn;
+
+ } catch (IOException e) {
+ throw new IOException("No more pages");
+ }
+
+ }
+ try {
+ // Sleep for a while to avoid a ban
+ sleep(2500);
+ toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
+ if (!hasImage(toreturn)) {
+ throw new IOException("No more pages");
+ }
+ return toreturn;
+ } catch (IOException e) {
+ return null;
+ }
+ } else {
+ throw new IOException("No more pages");
+ }
+ }
+
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url);
}
+ private boolean hasImage(Document doc) {
+ try {
+ JSONObject json = getJSONFromPage(doc);
+ JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
+ JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
+ logger.info(datas.length());
+ if (datas.length() == 0) {
+ return false;
+ }
+ return true;
+ } catch (IOException e) {
+ return false;
+ }
+
+ }
+
}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java
index 06841ce9..4f2bac97 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java
@@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper {
public void rip() throws IOException {
logger.info(" Retrieving " + this.url);
Document doc = Http.url(this.url).get();
- Elements embeds = doc.select("embed");
- if (embeds.size() == 0) {
- throw new IOException("Could not find Embed code at " + url);
- }
- Element embed = embeds.get(0);
- String vars = embed.attr("flashvars");
- for (String var : vars.split("&")) {
- if (var.startsWith("flv_url=")) {
- String vidUrl = var.substring("flv_url=".length());
- vidUrl = URLDecoder.decode(vidUrl, "UTF-8");
- addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
+ Elements scripts = doc.select("script");
+ for (Element e : scripts) {
+ if (e.html().contains("html5player.setVideoUrlHigh")) {
+ logger.info("Found the right script");
+ String[] lines = e.html().split("\n");
+ for (String line: lines) {
+ if (line.contains("html5player.setVideoUrlHigh")) {
+ String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
+ addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url));
+ waitForThreads();
+ return;
+ }
+ }
}
}
- waitForThreads();
+ throw new IOException("Unable to find video url at " + this.url.toExternalForm());
}
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
index 37ce6cfe..318eec4c 100644
--- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
+++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
@@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
- private static final String DEFAULT_VERSION = "1.7.19";
+ private static final String DEFAULT_VERSION = "1.7.23";
private static final String REPO_NAME = "ripmeapp/ripme";
private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
private static final String mainFileName = "ripme.jar";
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java
index 469c330a..4a6c3539 100644
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java
@@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest {
// A simple image album
EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
testRipper(ripper);
+ // Test the new url format
+ ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
+ testRipper(ripper);
// Test pages with subalbums
ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
testRipper(ripper);
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java
index 538d493c..214220b8 100644
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java
@@ -1,13 +1,15 @@
-package com.rarchives.ripme.tst.ripper.rippers;
+//package com.rarchives.ripme.tst.ripper.rippers;
+//
+//import java.io.IOException;
+//import java.net.URL;
+//
+//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
+//
+//public class FivehundredpxRipperTest extends RippersTest {
+// public void test500pxAlbum() throws IOException {
+// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
+// testRipper(ripper);
+// }
+//}
-import java.io.IOException;
-import java.net.URL;
-
-import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
-
-public class FivehundredpxRipperTest extends RippersTest {
- public void test500pxAlbum() throws IOException {
- FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
- testRipper(ripper);
- }
-}
+// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438
\ No newline at end of file