Merge pull request #5 from RipMeApp/master

Sync.
2018-03-12 10:16:58 -05:00 · 2018-03-12 10:16:58 -05:00 · 41eb9fd09b
commit 41eb9fd09b
parent d9423c1fae a4880ecf80
12 changed files with 132 additions and 98 deletions
--- a/build.sh
+++ b/build.sh
@ -1 +1,2 @@
 #!/usr/bin/env bash
 mvn clean compile assembly:single
--- a/docs/options.md
+++ b/docs/options.md
@ -1,40 +0,0 @@
 file.overwrite     | bool | If true ripme will overwrite existing files rather than skip them
 clipboard.autorip  | bool | If true ripme will try to download any links in the clip board
 error.skip404      | bool | Don't retry on 404 errors
 download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
 auto.update        | bool | If true ripme will auto-update every time it's started
 play.sound         | bool | If true ripme will play a sound every time a rip finishes
 download.show_popup| bool | TODO figure out what this is for
 log.save           | bool | If true ripme will save it's logs
 urls_only.save     | bool | If true ripme will save all urls to a text file and download no files
 album_titles.save  | bool | Currently does nothing
 prefer.mp4         | bool | Prefer mp4 when downloading a video that has more than 1 format
 download.timeout   | int  | File download timeout (in milliseconds)
 page.timeout       | int  | Page download timeout (in milliseconds)
 download.max_size  | int  | Maximum size of downloaded files in bytes
 threads.size       | int  | The number of threads to use
 twitter.auth       | String | Twitter API key (Base64'd)
 tumblr.auth        | String | Tumblr API key
 log.level          | String | The debug log level (Example: Log level: Debug)
 gw.api             | String | TODO figure out what this is for
 twitter.max_requests | int | TODO figure out what this is for
--- a/pom.xml
+++ b/pom.xml
@ -4,7 +4,7 @@
  <groupId>com.rarchives.ripme</groupId>
  <artifactId>ripme</artifactId>
  <packaging>jar</packaging>
-  <version>1.7.19</version>
+  <version>1.7.23</version>
  <name>ripme</name>
  <url>http://rip.rarchives.com</url>
  <properties>
--- a/ripme.json
+++ b/ripme.json
@ -1,6 +1,10 @@
 {
-    "latestVersion": "1.7.19",
+    "latestVersion": "1.7.23",
    "changeList": [
        "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature",
        "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram",
        "1.7.21: Fixed last seen feature",
        "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test",
        "1.7.19: imgurRipper no longer tries to add ?1 to file names",
        "1.7.18: AlbumRipper now removes bad chars from file names",
        "1.7.17: Fixed hentai.cafe autorip from clipboard",
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
        Document doc = getFirstPage();
        while (doc != null) {
            if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) {
                sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
                break;
            }
            List<String> imageURLs = getURLsFromPage(doc);
            // If hasASAPRipping() returns true then the ripper will handle downloading the files
            // if not it's done in the following block of code
            if (!hasASAPRipping()) {
                // Remove all but 1 image
                if (isThisATest()) {
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@ -44,7 +44,8 @@ public abstract class AbstractRipper
    public abstract String getHost();
    public abstract String getGID(URL url) throws MalformedURLException;
    public boolean hasASAPRipping() { return false; }
-
+    // Everytime addUrlToDownload skips a already downloaded url this increases by 1
    public int alreadyDownloadedUrls = 0;
    private boolean shouldStop = false;
    private boolean thisIsATest = false;
@ -60,7 +61,13 @@ public abstract class AbstractRipper
        }
    }
    /**
     * Adds a URL to the url history file
     * @param downloadedURL URL to check if downloaded
     */
    private void writeDownloadedURL(String downloadedURL) throws IOException {
        downloadedURL = normalizeUrl(downloadedURL);
        BufferedWriter bw = null;
        FileWriter fw = null;
        try {
@ -86,6 +93,15 @@ public abstract class AbstractRipper
        }
    }
    /**
     * Normalize a URL
     * @param url URL to check if downloaded
     */
    public String normalizeUrl(String url) {
        return url;
    }
    /**
     * Checks to see if Ripme has already downloaded a URL
     * @param url URL to check if downloaded
@ -95,6 +111,7 @@ public abstract class AbstractRipper
     */
    private boolean hasDownloadedURL(String url) {
        File file = new File(URLHistoryFile);
        url = normalizeUrl(url);
        try {
            Scanner scanner = new Scanner(file);
            while (scanner.hasNextLine()) {
@ -194,9 +211,11 @@ public abstract class AbstractRipper
     *      False if failed to download
     */
    protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
        // Don't re-add the url if it was downloaded in a previous rip
        if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
            if (hasDownloadedURL(url.toExternalForm())) {
                sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
                alreadyDownloadedUrls += 1;
                return false;
            }
        }
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java
@ -51,7 +51,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
    @Override
    public String getGID(URL url) throws MalformedURLException {
-        Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$");
+        Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$");
        Matcher m = p.matcher(url.toExternalForm());
        if (!m.matches()) {
            throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url);
@ -93,7 +93,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
        Elements pageImages = page.getElementsByClass("c-tile");
        for (Element thumb : pageImages) {
            // If true this link is a sub album
-            if (thumb.attr("href").contains("/comix/album/")) {
+            if (thumb.attr("href").contains("/comics/album/")) {
                String subUrl = "https://www.8muses.com" + thumb.attr("href");
                try {
                    logger.info("Retrieving " + subUrl);
@ -106,7 +106,8 @@ public class EightmusesRipper extends AbstractHTMLRipper {
                    logger.warn("Error while loading subalbum " + subUrl, e);
                }
-            } else if (thumb.attr("href").contains("/comix/picture/")) {
+            } else if (thumb.attr("href").contains("/comics/picture/")) {
                logger.info("This page is a album");
                logger.info("Ripping image");
                if (super.isStopped()) break;
                // Find thumbnail image source
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils;
 public class InstagramRipper extends AbstractHTMLRipper {
    String nextPageID = "";
    private String userID;
@ -52,6 +53,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
        return san_url;
    }
    @Override
    public String normalizeUrl(String url) {
        // Remove the date sig from the url
        return url.replaceAll("/[A-Z0-9]{8}/", "/");
    }
    private List<String> getPostsFromSinglePage(Document Doc) {
        List<String> imageURLs = new ArrayList<>();
        JSONArray datas;
@ -192,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper {
    @Override
    public List<String> getURLsFromPage(Document doc) {
        String nextPageID = "";
        List<String> imageURLs = new ArrayList<>();
        JSONObject json = new JSONObject();
        try {
@ -255,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
                    break;
                }
            }
            // Rip the next page
            if (!nextPageID.equals("") && !isThisATest()) {
                if (url.toExternalForm().contains("/tags/")) {
                    try {
                        // Sleep for a while to avoid a ban
                        sleep(2500);
                        if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
                            getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
                        } else {
                            getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
                        }
                    } catch (IOException e) {
                        return imageURLs;
                    }
                }
                try {
                    // Sleep for a while to avoid a ban
                    sleep(2500);
                    getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
                } catch (IOException e) {
                    return imageURLs;
                }
            } else {
                logger.warn("Can't get net page");
            }
        } else { // We're ripping from a single page
            logger.info("Ripping from single page");
            imageURLs = getPostsFromSinglePage(doc);
@ -290,9 +270,65 @@ public class InstagramRipper extends AbstractHTMLRipper {
        return imageURLs;
    }
    @Override
    public Document getNextPage(Document doc) throws IOException {
        Document toreturn;
        if (!nextPageID.equals("") && !isThisATest()) {
            if (url.toExternalForm().contains("/tags/")) {
                try {
                    // Sleep for a while to avoid a ban
                    sleep(2500);
                    if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
                        toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get();
                    } else {
                        toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get();
                    }
                    logger.info(toreturn.html());
                    if (!hasImage(toreturn)) {
                        throw new IOException("No more pages");
                    }
                    return toreturn;
                } catch (IOException e) {
                    throw new IOException("No more pages");
                }
            }
            try {
                // Sleep for a while to avoid a ban
                sleep(2500);
                toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get();
                if (!hasImage(toreturn)) {
                    throw new IOException("No more pages");
                }
                return toreturn;
            } catch (IOException e) {
                return null;
            }
        } else {
            throw new IOException("No more pages");
        }
    }
    @Override
    public void downloadURL(URL url, int index) {
        addURLToDownload(url);
    }
    private boolean hasImage(Document doc) {
        try {
            JSONObject json = getJSONFromPage(doc);
            JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
            JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
            logger.info(datas.length());
            if (datas.length() == 0) {
                return false;
            }
            return true;
        } catch (IOException e) {
            return false;
        }
    }
 }
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java
@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper {
    public void rip() throws IOException {
        logger.info("    Retrieving " + this.url);
        Document doc = Http.url(this.url).get();
-        Elements embeds = doc.select("embed");
+        Elements scripts = doc.select("script");
-        if (embeds.size() == 0) {
+        for (Element e : scripts) {
-            throw new IOException("Could not find Embed code at " + url);
+            if (e.html().contains("html5player.setVideoUrlHigh")) {
-        }
+                logger.info("Found the right script");
-        Element embed = embeds.get(0);
+                String[] lines = e.html().split("\n");
-        String vars = embed.attr("flashvars");
+                for (String line: lines) {
-        for (String var : vars.split("&")) {
+                    if (line.contains("html5player.setVideoUrlHigh")) {
-            if (var.startsWith("flv_url=")) {
+                        String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
-                String vidUrl = var.substring("flv_url=".length());
+                        addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url));
-                vidUrl = URLDecoder.decode(vidUrl, "UTF-8");
+                        waitForThreads();
-                addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
+                        return;
                    }
                }
            }
        }
-        waitForThreads();
+        throw new IOException("Unable to find video url at " + this.url.toExternalForm());
    }
 }
--- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
+++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
 public class UpdateUtils {
    private static final Logger logger = Logger.getLogger(UpdateUtils.class);
-    private static final String DEFAULT_VERSION = "1.7.19";
+    private static final String DEFAULT_VERSION = "1.7.23";
    private static final String REPO_NAME = "ripmeapp/ripme";
    private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json";
    private static final String mainFileName = "ripme.jar";
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java
@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest {
        // A simple image album
        EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
        testRipper(ripper);
        // Test the new url format
        ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore"));
        testRipper(ripper);
        // Test pages with subalbums
        ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor"));
        testRipper(ripper);
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java
@ -1,13 +1,15 @@
-package com.rarchives.ripme.tst.ripper.rippers;
+//package com.rarchives.ripme.tst.ripper.rippers;
 //
 //import java.io.IOException;
 //import java.net.URL;
 //
 //import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
 //
 //public class FivehundredpxRipperTest extends RippersTest {
 //    public void test500pxAlbum() throws IOException {
 //        FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
 //        testRipper(ripper);
 //    }
 //}
-import java.io.IOException;
+// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438
 import java.net.URL;
 import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper;
 public class FivehundredpxRipperTest extends RippersTest {
    public void test500pxAlbum() throws IOException {
        FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman"));
        testRipper(ripper);
    }
 }
`@ -1 +1,2 @@`
		`#!/usr/bin/env bash`
	`mvn clean compile assembly:single`	`mvn clean compile assembly:single`