diff --git a/README.md b/README.md index 390106c2..e56276c1 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,9 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/ * Quickly downloads all images in an online album (see supported sites below) * Easily re-rip albums to fetch new content +* Built in updater +* Can rip images from tumblr in the size they were uploaded in [See here for how to enable](https://github.com/RipMeApp/ripme/wiki/Config-options#tumblrget_raw_image) +* Skips already downloaded images by default ## [List of Supported Sites](https://github.com/ripmeapp/ripme/wiki/Supported-Sites) diff --git a/build.sh b/build.sh index 7e7c3221..a3ec0242 100755 --- a/build.sh +++ b/build.sh @@ -1 +1,2 @@ +#!/usr/bin/env bash mvn clean compile assembly:single \ No newline at end of file diff --git a/pom.xml b/pom.xml index 476068b2..6b7084c8 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.19 + 1.7.33 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index de0dc0a6..8dc6e2cf 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,20 @@ { - "latestVersion": "1.7.19", + "latestVersion": "1.7.33", "changeList": [ + "1.7.33: Instagram ripper no longer errors out when downloading from more than 1 page", + "1.7.32: Instagram ripper update to use new enpoints", + "1.7.31: InstaGram ripper no longer errors out when getting next page", + "1.7.30: Fixed usage of command-line on non-headless systems", + "1.7.29: Cano now download single images from imgur; Improved handling of headless mode & OS-specific config; Added modelx ripper; Fixed eroshae ripper", + "1.7.28: IG ripper now uses display_url when downloading images; Reddit ripper now gets erome links; Hentaifoundry Ripper no longer errors out when there is no next page", + "1.7.27: IG ripper can now rip from tags; fixed json parsing issues", + "1.7.26: fixed instagram ripper", + "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", + "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", + "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", + "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", + "1.7.21: Fixed last seen feature", + "1.7.20: Fixed 8muses ripper; Added last seen feature; disabled 500px test", "1.7.19: imgurRipper no longer tries to add ?1 to file names", "1.7.18: AlbumRipper now removes bad chars from file names", "1.7.17: Fixed hentai.cafe autorip from clipboard", diff --git a/src/main/java/com/rarchives/ripme/App.java b/src/main/java/com/rarchives/ripme/App.java index 0bb5f3f6..67c44ab1 100644 --- a/src/main/java/com/rarchives/ripme/App.java +++ b/src/main/java/com/rarchives/ripme/App.java @@ -1,5 +1,6 @@ package com.rarchives.ripme; +import java.awt.*; import java.io.File; import java.io.IOException; import java.io.BufferedReader; @@ -18,6 +19,7 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import org.apache.commons.lang.SystemUtils; import org.apache.log4j.Logger; import com.rarchives.ripme.ripper.AbstractRipper; @@ -34,35 +36,34 @@ import com.rarchives.ripme.utils.Utils; */ public class App { - public static final Logger logger; + public static final Logger logger = Logger.getLogger(App.class); private static final History HISTORY = new History(); - static { - //initialize logger - Utils.configureLogger(); - logger = Logger.getLogger(App.class); - } - public static void main(String[] args) throws MalformedURLException { CommandLine cl = getArgs(args); + if (args.length > 0 && cl.hasOption('v')){ - logger.error(UpdateUtils.getThisJarVersion()); + logger.info(UpdateUtils.getThisJarVersion()); System.exit(0); } - System.setProperty("apple.laf.useScreenMenuBar", "true"); - System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe"); - logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion()); - - if (args.length > 0) { - // CLI Mode + if (GraphicsEnvironment.isHeadless() || args.length > 0) { handleArguments(args); } else { - // GUI Mode + if (SystemUtils.IS_OS_MAC_OSX) { + System.setProperty("apple.laf.useScreenMenuBar", "true"); + System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe"); + } + + Utils.configureLogger(); + + logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion()); + MainWindow mw = new MainWindow(); SwingUtilities.invokeLater(mw); } } + /** * Creates an abstract ripper and instructs it to rip. * @param url URL to be ripped @@ -80,20 +81,28 @@ public class App { */ private static void handleArguments(String[] args) { CommandLine cl = getArgs(args); - if (cl.hasOption('h')) { + + if (cl.hasOption('h') || args.length == 0) { HelpFormatter hf = new HelpFormatter(); hf.printHelp("java -jar ripme.jar [OPTIONS]", getOptions()); System.exit(0); } + + Utils.configureLogger(); + logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion()); + if (cl.hasOption('w')) { Utils.setConfigBoolean("file.overwrite", true); } + if (cl.hasOption('t')) { Utils.setConfigInteger("threads.size", Integer.parseInt(cl.getOptionValue('t'))); } + if (cl.hasOption('4')) { Utils.setConfigBoolean("errors.skip404", true); } + if (cl.hasOption('r')) { // Re-rip all via command-line List history = Utils.getConfigList("download.history"); @@ -115,6 +124,7 @@ public class App { // Exit System.exit(0); } + if (cl.hasOption('R')) { loadHistory(); if (HISTORY.toList().isEmpty()) { @@ -146,20 +156,25 @@ public class App { System.exit(-1); } } + if (cl.hasOption('d')) { Utils.setConfigBoolean("download.save_order", true); } + if (cl.hasOption('D')) { Utils.setConfigBoolean("download.save_order", false); } + if ((cl.hasOption('d'))&&(cl.hasOption('D'))) { logger.error("\nCannot specify '-d' and '-D' simultaneously"); System.exit(-1); } + if (cl.hasOption('l')) { // change the default rips directory Utils.setConfigString("rips.directory", cl.getOptionValue('l')); } + if (cl.hasOption('f')) { String filename = cl.getOptionValue('f'); try { @@ -175,6 +190,7 @@ public class App { logger.error("[!] Failed reading file containing list of URLs. Cannot continue."); } } + if (cl.hasOption('u')) { String url = cl.getOptionValue('u').trim(); ripURL(url, cl.hasOption("n")); diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index e71eb389..e0fd3548 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -68,7 +68,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { Document doc = getFirstPage(); while (doc != null) { + if (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()) { + sendUpdate(STATUS.DOWNLOAD_COMPLETE, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); + break; + } List imageURLs = getURLsFromPage(doc); + // If hasASAPRipping() returns true then the ripper will handle downloading the files + // if not it's done in the following block of code if (!hasASAPRipping()) { // Remove all but 1 image if (isThisATest()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index edddea78..6068ed18 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -44,7 +44,8 @@ public abstract class AbstractRipper public abstract String getHost(); public abstract String getGID(URL url) throws MalformedURLException; public boolean hasASAPRipping() { return false; } - + // Everytime addUrlToDownload skips a already downloaded url this increases by 1 + public int alreadyDownloadedUrls = 0; private boolean shouldStop = false; private boolean thisIsATest = false; @@ -60,7 +61,13 @@ public abstract class AbstractRipper } } + + /** + * Adds a URL to the url history file + * @param downloadedURL URL to check if downloaded + */ private void writeDownloadedURL(String downloadedURL) throws IOException { + downloadedURL = normalizeUrl(downloadedURL); BufferedWriter bw = null; FileWriter fw = null; try { @@ -85,6 +92,15 @@ public abstract class AbstractRipper } } } + + + /** + * Normalize a URL + * @param url URL to check if downloaded + */ + public String normalizeUrl(String url) { + return url; + } /** * Checks to see if Ripme has already downloaded a URL @@ -95,6 +111,7 @@ public abstract class AbstractRipper */ private boolean hasDownloadedURL(String url) { File file = new File(URLHistoryFile); + url = normalizeUrl(url); try { Scanner scanner = new Scanner(file); while (scanner.hasNextLine()) { @@ -157,7 +174,7 @@ public abstract class AbstractRipper * URL of the file * @param saveAs * Path of the local file to save the content to. - * @return True on success, flase on failure. + * @return True on success, false on failure. */ public abstract boolean addURLToDownload(URL url, File saveAs); @@ -189,14 +206,18 @@ public abstract class AbstractRipper * The HTTP referrer to use while downloading this file. * @param cookies * The cookies to send to the server while downloading this file. + * @param fileName + * The name that file will be written to * @return * True if downloaded successfully * False if failed to download */ - protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies, String fileName) { + // Don't re-add the url if it was downloaded in a previous rip if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) { if (hasDownloadedURL(url.toExternalForm())) { sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm()); + alreadyDownloadedUrls += 1; return false; } } @@ -206,9 +227,18 @@ public abstract class AbstractRipper logger.debug("Ripper has been stopped"); return false; } - logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies); - String saveAs = url.toExternalForm(); - saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies + ", fileName: " + fileName); + String saveAs; + if (fileName != null) { + saveAs = fileName; + // Get the extension of the file + String extension = url.toExternalForm().substring(url.toExternalForm().lastIndexOf(".") + 1); + saveAs = saveAs + "." + extension; + } else { + saveAs = url.toExternalForm(); + saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + } + if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } @@ -255,7 +285,11 @@ public abstract class AbstractRipper * @return True on success, flase on failure. */ protected boolean addURLToDownload(URL url, String prefix, String subdirectory) { - return addURLToDownload(url, prefix, subdirectory, null, null); + return addURLToDownload(url, prefix, subdirectory, null, null, null); + } + + protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + return addURLToDownload(url, prefix, subdirectory, referrer, cookies, null); } /** @@ -271,6 +305,8 @@ public abstract class AbstractRipper // Use empty subdirectory return addURLToDownload(url, prefix, ""); } + + /** * Waits for downloading threads to complete. */ diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index b7b5658f..77ca9102 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.utils.Utils; import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -51,7 +52,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$"); + Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/(comix|comics)/album/([a-zA-Z0-9\\-_]+).*$"); Matcher m = p.matcher(url.toExternalForm()); if (!m.matches()) { throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url); @@ -93,7 +94,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { Elements pageImages = page.getElementsByClass("c-tile"); for (Element thumb : pageImages) { // If true this link is a sub album - if (thumb.attr("href").contains("/comix/album/")) { + if (thumb.attr("href").contains("/comics/album/")) { String subUrl = "https://www.8muses.com" + thumb.attr("href"); try { logger.info("Retrieving " + subUrl); @@ -106,7 +107,8 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.warn("Error while loading subalbum " + subUrl, e); } - } else if (thumb.attr("href").contains("/comix/picture/")) { + } else if (thumb.attr("href").contains("/comics/picture/")) { + logger.info("This page is a album"); logger.info("Ripping image"); if (super.isStopped()) break; // Find thumbnail image source @@ -124,7 +126,11 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.info("Retrieving full-size image location from " + imageHref); image = getFullSizeImage(imageHref); URL imageUrl = new URL(image); - addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies); + if (Utils.getConfigBoolean("8muses.use_short_names", false)) { + addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, ""); + } else { + addURLToDownload(imageUrl, getPrefixLong(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies); + } // X is our page index x++; @@ -177,8 +183,11 @@ public class EightmusesRipper extends AbstractHTMLRipper { addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies); } - @Override - public String getPrefix(int index) { + public String getPrefixLong(int index) { return String.format("%03d_", index); } + + public String getPrefixShort(int index) { + return String.format("%03d", index); + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java index 30dcfd4f..e7019178 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java @@ -119,7 +119,7 @@ public class EroShareRipper extends AbstractHTMLRipper { for (Element img : imgs) { if (img.hasClass("album-image")) { String imageURL = img.attr("src"); - imageURL = "https:" + imageURL; + imageURL = imageURL; URLs.add(imageURL); } } @@ -129,7 +129,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add("https:" + videoURL); + URLs.add(videoURL); } } // Profile videos @@ -148,7 +148,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add("https:" + videoURL); + URLs.add(videoURL); } } } @@ -209,7 +209,6 @@ public class EroShareRipper extends AbstractHTMLRipper { for (Element img : imgs) { if (img.hasClass("album-image")) { String imageURL = img.attr("src"); - imageURL = "https:" + imageURL; URLs.add(new URL(imageURL)); } } @@ -219,7 +218,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add(new URL("https:" + videoURL)); + URLs.add(new URL(videoURL)); } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java new file mode 100644 index 00000000..522a9720 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FemjoyhunterRipper.java @@ -0,0 +1,64 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class FemjoyhunterRipper extends AbstractHTMLRipper { + + public FemjoyhunterRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "femjoyhunter"; + } + + @Override + public String getDomain() { + return "femjoyhunter.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://www.femjoyhunter.com/([a-zA-Z0-9_-]+)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected femjoyhunter URL format: " + + "femjoyhunter.com/ID - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element el : doc.select("ul.gallery-b > li > a")) { + result.add(el.attr("href")); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + + addURLToDownload(url, getPrefix(index), "", "https://a2h6m3w6.ssl.hwcdn.net/", null); + } +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index b595d9e2..fe7937d3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -36,6 +36,7 @@ public class ImgurRipper extends AlbumRipper { USER, USER_ALBUM, USER_IMAGES, + SINGLE_IMAGE, SERIES_OF_IMAGES, SUBREDDIT } @@ -155,34 +156,48 @@ public class ImgurRipper extends AlbumRipper { @Override public void rip() throws IOException { switch (albumType) { - case ALBUM: - // Fall-through - case USER_ALBUM: - logger.info("Album type is USER_ALBUM"); - // Don't call getAlbumTitle(this.url) with this - // as it seems to cause the album to be downloaded to a subdir. - ripAlbum(this.url); - break; - case SERIES_OF_IMAGES: - logger.info("Album type is SERIES_OF_IMAGES"); - ripAlbum(this.url); - break; - case USER: - logger.info("Album type is USER"); - ripUserAccount(url); - break; - case SUBREDDIT: - logger.info("Album type is SUBREDDIT"); - ripSubreddit(url); - break; - case USER_IMAGES: - logger.info("Album type is USER_IMAGES"); - ripUserImages(url); - break; + case ALBUM: + // Fall-through + case USER_ALBUM: + logger.info("Album type is USER_ALBUM"); + // Don't call getAlbumTitle(this.url) with this + // as it seems to cause the album to be downloaded to a subdir. + ripAlbum(this.url); + break; + case SERIES_OF_IMAGES: + logger.info("Album type is SERIES_OF_IMAGES"); + ripAlbum(this.url); + break; + case SINGLE_IMAGE: + logger.info("Album type is SINGLE_IMAGE"); + ripSingleImage(this.url); + break; + case USER: + logger.info("Album type is USER"); + ripUserAccount(url); + break; + case SUBREDDIT: + logger.info("Album type is SUBREDDIT"); + ripSubreddit(url); + break; + case USER_IMAGES: + logger.info("Album type is USER_IMAGES"); + ripUserImages(url); + break; } waitForThreads(); } + private void ripSingleImage(URL url) throws IOException { + String strUrl = url.toExternalForm(); + Document document = getDocument(strUrl); + Matcher m = getEmbeddedJsonMatcher(document); + if (m.matches()) { + JSONObject json = new JSONObject(m.group(1)).getJSONObject("image"); + addURLToDownload(extractImageUrlFromJson(json), ""); + } + } + private void ripAlbum(URL url) throws IOException { ripAlbum(url, ""); } @@ -257,38 +272,16 @@ public class ImgurRipper extends AlbumRipper { strUrl += "/all"; } logger.info(" Retrieving " + strUrl); - Document doc = Jsoup.connect(strUrl) - .userAgent(USER_AGENT) - .timeout(10 * 1000) - .maxBodySize(0) - .get(); - + Document doc = getDocument(strUrl); // Try to use embedded JSON to retrieve images - Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL); - Matcher m = p.matcher(doc.body().html()); + Matcher m = getEmbeddedJsonMatcher(doc); if (m.matches()) { try { - ImgurAlbum imgurAlbum = new ImgurAlbum(url); JSONObject json = new JSONObject(m.group(1)); - JSONArray images = json.getJSONObject("image") + JSONArray jsonImages = json.getJSONObject("image") .getJSONObject("album_images") .getJSONArray("images"); - int imagesLength = images.length(); - for (int i = 0; i < imagesLength; i++) { - JSONObject image = images.getJSONObject(i); - String ext = image.getString("ext"); - if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) { - ext = ".mp4"; - } - URL imageURL = new URL( - "http://i.imgur.com/" - + image.getString("hash") - + ext); - ImgurImage imgurImage = new ImgurImage(imageURL); - imgurImage.extension = ext; - imgurAlbum.addImage(imgurImage); - } - return imgurAlbum; + return createImgurAlbumFromJsonArray(url, jsonImages); } catch (JSONException e) { logger.debug("Error while parsing JSON at " + url + ", continuing", e); } @@ -330,6 +323,44 @@ public class ImgurRipper extends AlbumRipper { return imgurAlbum; } + private static Matcher getEmbeddedJsonMatcher(Document doc) { + Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL); + return p.matcher(doc.body().html()); + } + + private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException { + ImgurAlbum imgurAlbum = new ImgurAlbum(url); + int imagesLength = jsonImages.length(); + for (int i = 0; i < imagesLength; i++) { + JSONObject jsonImage = jsonImages.getJSONObject(i); + imgurAlbum.addImage(createImgurImageFromJson(jsonImage)); + } + return imgurAlbum; + } + + private static ImgurImage createImgurImageFromJson(JSONObject json) throws MalformedURLException { + return new ImgurImage(extractImageUrlFromJson(json)); + } + + private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException { + String ext = json.getString("ext"); + if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) { + ext = ".mp4"; + } + return new URL( + "http://i.imgur.com/" + + json.getString("hash") + + ext); + } + + private static Document getDocument(String strUrl) throws IOException { + return Jsoup.connect(strUrl) + .userAgent(USER_AGENT) + .timeout(10 * 1000) + .maxBodySize(0) + .get(); + } + /** * Rips all albums in an imgur user's account. * @param url @@ -507,6 +538,13 @@ public class ImgurRipper extends AlbumRipper { this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid); return "r_" + subreddit + "_" + gid; } + p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9]{5,})$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Single imgur image + albumType = ALBUM_TYPE.SINGLE_IMAGE; + return m.group(m.groupCount()); + } p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 076fcfc6..12842aa8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -1,14 +1,19 @@ package com.rarchives.ripme.ripper.rippers; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; +import java.net.URLConnection; import java.time.*; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.security.*; import org.json.JSONArray; import org.json.JSONException; @@ -17,15 +22,25 @@ import org.json.JSONObject; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; +import org.jsoup.Connection; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.utils.Utils; +import java.util.HashMap; public class InstagramRipper extends AbstractHTMLRipper { + String nextPageID = ""; + private String qHash; + private boolean rippingTag = false; + private String tagName; private String userID; + private String rhx_gis = null; + private String csrftoken; + + public InstagramRipper(URL url) throws IOException { super(url); @@ -52,6 +67,12 @@ public class InstagramRipper extends AbstractHTMLRipper { return san_url; } + @Override + public String normalizeUrl(String url) { + // Remove the date sig from the url + return url.replaceAll("/[A-Z0-9]{8}/", "/"); + } + private List getPostsFromSinglePage(Document Doc) { List imageURLs = new ArrayList<>(); JSONArray datas; @@ -123,13 +144,31 @@ public class InstagramRipper extends AbstractHTMLRipper { p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); m = p.matcher(url.toExternalForm()); if (m.matches()) { + rippingTag = true; + tagName = m.group(1); return m.group(1); } throw new MalformedURLException("Unable to find user in " + url); } + private String stripHTMLTags(String t) { + t = t.replaceAll("\n" + + " \n" + + " ", ""); + t.replaceAll("\n" + + "", ""); + t = t.replaceAll("\n", ""); + t = t.replaceAll("=\"\"", ""); + return t; + } + + private JSONObject getJSONFromPage(Document firstPage) throws IOException { + // Check if this page is HTML + JSON or jsut json + if (!firstPage.html().contains("window._sharedData =")) { + return new JSONObject(stripHTMLTags(firstPage.html())); + } String jsonText = ""; try { for (Element script : firstPage.select("script[type=text/javascript]")) { @@ -146,8 +185,13 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { - userID = getGID(url); - return Http.url(url).get(); + Connection.Response resp = Http.url(url).response(); + logger.info(resp.cookies()); + csrftoken = resp.cookie("csrftoken"); + Document p = resp.parse(); + // Get the query hash so we can download the next page + qHash = getQHash(p); + return p; } private String getVideoFromPage(String videoID) { @@ -192,7 +236,6 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document doc) { - String nextPageID = ""; List imageURLs = new ArrayList<>(); JSONObject json = new JSONObject(); try { @@ -201,33 +244,53 @@ public class InstagramRipper extends AbstractHTMLRipper { logger.warn("Unable to exact json from page"); } - + // get the rhx_gis value so we can get the next page later on + if (rhx_gis == null) { + rhx_gis = json.getString("rhx_gis"); + } if (!url.toExternalForm().contains("/p/")) { JSONArray datas = new JSONArray(); - try { - JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); - } catch (JSONException e) { - // Handle hashtag pages - datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0) - .getJSONObject("tag").getJSONObject("media").getJSONArray("nodes"); + if (!rippingTag) { + // This first try only works on data from the first page + try { + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", ""); + datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + } catch (JSONException e) { + datas = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + } + } else { + try { + JSONArray tagPage = json.getJSONObject("entry_data").getJSONArray("TagPage"); + datas = tagPage.getJSONObject(0).getJSONObject("graphql").getJSONObject("hashtag") + .getJSONObject("edge_hashtag_to_media").getJSONArray("edges"); + } catch (JSONException e) { + datas = json.getJSONObject("data").getJSONObject("hashtag").getJSONObject("edge_hashtag_to_media") + .getJSONArray("edges"); + } } for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); - Long epoch = data.getLong("date"); + data = data.getJSONObject("node"); + Long epoch = data.getLong("taken_at_timestamp"); Instant instant = Instant.ofEpochSecond(epoch); String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC)); - if (data.getString("__typename").equals("GraphSidecar")) { - try { - Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("code"))).get(); - List toAdd = getPostsFromSinglePage(slideShowDoc); - for (int slideShowInt=0; slideShowInt toAdd = getPostsFromSinglePage(slideShowDoc); + for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) { + addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode")); + } + } catch (MalformedURLException e) { + logger.error("Unable to download slide show, URL was malformed"); + } catch (IOException e) { + logger.error("Unable to download slide show"); } - } catch (MalformedURLException e) { - logger.error("Unable to download slide show, URL was malformed"); - } catch (IOException e) { - logger.error("Unable to download slide show"); } } try { @@ -235,14 +298,14 @@ public class InstagramRipper extends AbstractHTMLRipper { if (imageURLs.size() == 0) { // We add this one item to the array because either wise // the ripper will error out because we returned an empty array - imageURLs.add(getOriginalUrl(data.getString("thumbnail_src"))); + imageURLs.add(getOriginalUrl(data.getString("display_url"))); } - addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))), image_date); + addURLToDownload(new URL(data.getString("display_url")), image_date); } else { if (!Utils.getConfigBoolean("instagram.download_images_only", false)) { - addURLToDownload(new URL(getVideoFromPage(data.getString("code"))), image_date); + addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), image_date); } else { - sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("code")); + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("shortcode")); } } } catch (MalformedURLException e) { @@ -255,33 +318,7 @@ public class InstagramRipper extends AbstractHTMLRipper { break; } } - // Rip the next page - if (!nextPageID.equals("") && !isThisATest()) { - if (url.toExternalForm().contains("/tags/")) { - try { - // Sleep for a while to avoid a ban - sleep(2500); - if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get()); - } else { - getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get()); - } - } catch (IOException e) { - return imageURLs; - } - - } - try { - // Sleep for a while to avoid a ban - sleep(2500); - getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get()); - } catch (IOException e) { - return imageURLs; - } - } else { - logger.warn("Can't get net page"); - } } else { // We're ripping from a single page logger.info("Ripping from single page"); imageURLs = getPostsFromSinglePage(doc); @@ -290,9 +327,124 @@ public class InstagramRipper extends AbstractHTMLRipper { return imageURLs; } + private String getIGGis(String variables) { + String stringToMD5 = rhx_gis + ":" + csrftoken + ":" + variables; + logger.debug("String to md5 is \"" + stringToMD5 + "\""); + try { + byte[] bytesOfMessage = stringToMD5.getBytes("UTF-8"); + + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] hash = md.digest(bytesOfMessage); + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < hash.length; ++i) { + sb.append(Integer.toHexString((hash[i] & 0xFF) | 0x100).substring(1,3)); + } + return sb.toString(); + } catch(UnsupportedEncodingException e) { + return null; + } catch(NoSuchAlgorithmException e) { + return null; + } + } + + @Override + public Document getNextPage(Document doc) throws IOException { + Document toreturn; + java.util.Map cookies = new HashMap(); +// This shouldn't be hardcoded and will break one day + cookies.put("ig_pr", "1"); + cookies.put("csrftoken", csrftoken); + if (!nextPageID.equals("") && !isThisATest()) { + if (rippingTag) { + try { + sleep(2500); + String vars = "{\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}"; + String ig_gis = getIGGis(vars); + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + + "&variables=" + vars).header("x-instagram-gis", ig_gis).cookies(cookies).ignoreContentType().get(); + // Sleep for a while to avoid a ban + logger.info(toreturn.html()); + return toreturn; + + } catch (IOException e) { + throw new IOException("No more pages"); + } + + } + try { + // Sleep for a while to avoid a ban + sleep(2500); + String vars = "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}"; + String ig_gis = getIGGis(vars); + logger.info(ig_gis); + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars + ).header("x-instagram-gis", ig_gis).cookies(cookies).ignoreContentType().get(); + if (!pageHasImages(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; + } catch (IOException e) { + return null; + } + } else { + throw new IOException("No more pages"); + } + } + @Override public void downloadURL(URL url, int index) { addURLToDownload(url); } + private boolean pageHasImages(Document doc) { + JSONObject json = new JSONObject(stripHTMLTags(doc.html())); + int numberOfImages = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length(); + if (numberOfImages == 0) { + return false; + } + return true; + } + + private String getQHash(Document doc) { + String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href"); + StringBuilder sb = new StringBuilder(); + Document jsPage; + try { + // We can't use Jsoup here because it won't download a non-html file larger than a MB + // even if you set maxBodySize to 0 + URLConnection connection = new URL(jsFileURL).openConnection(); + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); + String line; + while ((line = in.readLine()) != null) { + sb.append(line); + } + in.close(); + + } catch (MalformedURLException e) { + logger.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL"); + return null; + } catch (IOException e) { + logger.info("Unable to get query_hash"); + logger.info(e.getMessage()); + return null; + } + if (!rippingTag) { + Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } + } else { + Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } + } + logger.info("Could not find query_hash on " + jsFileURL); + return null; + + } + } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java new file mode 100644 index 00000000..0b513b37 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ModelxRipper.java @@ -0,0 +1,64 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class ModelxRipper extends AbstractHTMLRipper { + + public ModelxRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "modelx"; + } + + @Override + public String getDomain() { + return "modelx.org"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^.*modelx.org/.*/(.+)$"); + Matcher m = p.matcher(url.toExternalForm()); + + if (m.matches()) { + return m.group(1); + } + + throw new MalformedURLException("Expected URL format: http://www.modelx.org/[category (one or more)]/xxxxx got: " + url); + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List result = new ArrayList<>(); + + for (Element el : page.select(".gallery-icon > a")) { + result.add(el.attr("href")); + } + + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java index 52e9a6d2..e8798476 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java @@ -4,10 +4,13 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; import org.json.JSONArray; import org.json.JSONObject; import org.json.JSONTokener; @@ -17,6 +20,9 @@ import com.rarchives.ripme.ui.UpdateUtils; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.Utils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; public class RedditRipper extends AlbumRipper { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java new file mode 100644 index 00000000..d30e9b63 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java @@ -0,0 +1,80 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class SinfestRipper extends AbstractHTMLRipper { + + public SinfestRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "sinfest"; + } + + @Override + public String getDomain() { + return "sinfest.net"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://sinfest.net/view.php\\?date=([0-9-]*)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected sinfest URL format: " + + "sinfest.net/view.php?date=XXXX-XX-XX/ - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + Element elem = doc.select("td.style5 > a > img").last(); + logger.info(elem.parent().attr("href")); + if (elem == null || elem.parent().attr("href").equals("view.php?date=")) { + throw new IOException("No more pages"); + } + String nextPage = elem.parent().attr("href"); + // Some times this returns a empty string + // This for stops that + if (nextPage == "") { + return null; + } + else { + return Http.url("http://sinfest.net/" + nextPage).get(); + } + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + Element elem = doc.select("tbody > tr > td > img").last(); + result.add("http://sinfest.net/" + elem.attr("src")); + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/StaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/StaRipper.java new file mode 100644 index 00000000..4cfaf485 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/StaRipper.java @@ -0,0 +1,112 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class StaRipper extends AbstractHTMLRipper { + + public StaRipper(URL url) throws IOException { + super(url); + } + + private Map cookies = new HashMap<>(); + + @Override + public String getHost() { + return "sta"; + } + + @Override + public String getDomain() { + return "sta.sh"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https://sta.sh/([A-Za-z0-9]+)"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected sta.sh URL format: " + + "sta.sh/ALBUMID - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element el : doc.select("span > span > a.thumb")) { + String thumbPageURL = el.attr("href"); + Document thumbPage = null; + if (checkURL(thumbPageURL)) { + try { + Connection.Response resp = Http.url(new URL(thumbPageURL)).response(); + cookies.putAll(resp.cookies()); + thumbPage = resp.parse(); + } catch (MalformedURLException e) { + logger.info(thumbPageURL + " is a malformed URL"); + } catch (IOException e) { + logger.info(e.getMessage()); + } + String imageDownloadUrl = thumbPage.select("a.dev-page-download").attr("href"); + if (imageDownloadUrl != null && !imageDownloadUrl.equals("")) { + result.add(getImageLinkFromDLLink(imageDownloadUrl)); + } + } + + } + return result; + } + + private boolean checkURL(String url) { + try { + new URL(url); + return true; + } catch (MalformedURLException e) { + return false; + } + } + + private String getImageLinkFromDLLink(String url) { + try { + Connection.Response response = Jsoup.connect(url) + .userAgent(USER_AGENT) + .timeout(10000) + .cookies(cookies) + .followRedirects(false) + .execute(); + String imageURL = response.header("Location"); + logger.info(imageURL); + return imageURL; + } catch (IOException e) { + logger.info("Got error message " + e.getMessage() + " trying to download " + url); + return null; + } + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java new file mode 100644 index 00000000..9ca91e45 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -0,0 +1,108 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.rarchives.ripme.ui.RipStatusMessage; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class TsuminoRipper extends AbstractHTMLRipper { + private Map cookies = new HashMap<>(); + + public TsuminoRipper(URL url) throws IOException { + super(url); + } + + private JSONArray getPageUrls() { + String postURL = "http://www.tsumino.com/Read/Load"; + try { + // This sessionId will expire and need to be replaced + cookies.put("ASP.NET_SessionId","c4rbzccf0dvy3e0cloolmlkq"); + logger.info(cookies); + Document doc = Jsoup.connect(postURL).data("q", getAlbumID()).userAgent(USER_AGENT).cookies(cookies).referrer("http://www.tsumino.com/Read/View/" + getAlbumID()).post(); + String jsonInfo = doc.html().replaceAll("","").replaceAll("", "").replaceAll("", "").replaceAll("", "") + .replaceAll("", "").replaceAll("\n", ""); + logger.info(jsonInfo); + JSONObject json = new JSONObject(jsonInfo); + logger.info(json.getJSONArray("reader_page_urls")); + return json.getJSONArray("reader_page_urls"); + } catch (IOException e) { + logger.info(e); + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_ERRORED, "Unable to download album, please compete the captcha at http://www.tsumino.com/Read/Auth/" + + getAlbumID() + " and try again"); + return null; + } + } + + @Override + public String getHost() { + return "tsumino"; + } + + @Override + public String getDomain() { + return "tsumino.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/([a-zA-Z0-9_-]*)"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1) + "_" + m.group(2); + } + throw new MalformedURLException("Expected tsumino URL format: " + + "tsumino.com/Book/Info/ID/TITLE - got " + url + " instead"); + } + + private String getAlbumID() { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/\\S*"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + return null; + } + + @Override + public Document getFirstPage() throws IOException { + Connection.Response resp = Http.url(url).response(); + cookies.putAll(resp.cookies()); + logger.info(resp.parse()); + return resp.parse(); + } + + @Override + public List getURLsFromPage(Document doc) { + JSONArray imageIds = getPageUrls(); + List result = new ArrayList<>(); + for (int i = 0; i < imageIds.length(); i++) { + result.add("http://www.tsumino.com/Image/Object?name=" + URLEncoder.encode(imageIds.getString(i))); + } + + return result; + } + + @Override + public void downloadURL(URL url, int index) { + sleep(1000); + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java index 9401297d..dbc44585 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java @@ -37,6 +37,7 @@ public class WordpressComicRipper extends AbstractHTMLRipper { private static List explicit_domains = Arrays.asList( "www.totempole666.com", "buttsmithy.com", + "incase.buttsmithy.com", "themonsterunderthebed.net", "prismblush.com", "www.konradokonski.com", @@ -87,6 +88,12 @@ public class WordpressComicRipper extends AbstractHTMLRipper { return true; } + Pattern buttsmithyIncasePat = Pattern.compile("https?://incase.buttsmithy.com/comic/([a-zA-Z0-9_-]*)/?$"); + Matcher buttsmithyIncaseMat = buttsmithyIncasePat.matcher(url.toExternalForm()); + if (buttsmithyIncaseMat.matches()) { + return true; + } + Pattern theMonsterUnderTheBedPat = Pattern.compile("https?://themonsterunderthebed.net/\\?comic=([a-zA-Z0-9_-]*)/?$"); Matcher theMonsterUnderTheBedMat = theMonsterUnderTheBedPat.matcher(url.toExternalForm()); if (theMonsterUnderTheBedMat.matches()) { @@ -178,6 +185,12 @@ public class WordpressComicRipper extends AbstractHTMLRipper { return getHost() + "_" + prismblushMat.group(1); } + Pattern buttsmithyIncasePat = Pattern.compile("https?://incase.buttsmithy.com/comic/([a-zA-Z0-9_-]*)/?$"); + Matcher buttsmithyIncaseMat = buttsmithyIncasePat.matcher(url.toExternalForm()); + if (buttsmithyIncaseMat.matches()) { + return getHost() + "_" + buttsmithyIncaseMat.group(1).replaceAll("-page-\\d", "").replaceAll("-pg-\\d", ""); + } + Pattern comicsxxxPat = Pattern.compile("https?://comics-xxx.com/([a-zA-Z0-9_\\-]*)/?$"); Matcher comicsxxxMat = comicsxxxPat.matcher(url.toExternalForm()); if (comicsxxxMat.matches()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java index 06841ce9..4f2bac97 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java @@ -57,19 +57,21 @@ public class XvideosRipper extends VideoRipper { public void rip() throws IOException { logger.info(" Retrieving " + this.url); Document doc = Http.url(this.url).get(); - Elements embeds = doc.select("embed"); - if (embeds.size() == 0) { - throw new IOException("Could not find Embed code at " + url); - } - Element embed = embeds.get(0); - String vars = embed.attr("flashvars"); - for (String var : vars.split("&")) { - if (var.startsWith("flv_url=")) { - String vidUrl = var.substring("flv_url=".length()); - vidUrl = URLDecoder.decode(vidUrl, "UTF-8"); - addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url)); + Elements scripts = doc.select("script"); + for (Element e : scripts) { + if (e.html().contains("html5player.setVideoUrlHigh")) { + logger.info("Found the right script"); + String[] lines = e.html().split("\n"); + for (String line: lines) { + if (line.contains("html5player.setVideoUrlHigh")) { + String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", ""); + addURLToDownload(new URL(videoURL), HOST + "_" + getGID(this.url)); + waitForThreads(); + return; + } + } } } - waitForThreads(); + throw new IOException("Unable to find video url at " + this.url.toExternalForm()); } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 37ce6cfe..e1d579a4 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.19"; + private static final String DEFAULT_VERSION = "1.7.33"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index b7b8c239..01d20e7c 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -9,19 +9,18 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.ripper.rippers.EroShareRipper; +import com.rarchives.ripme.ripper.rippers.EromeRipper; +import com.rarchives.ripme.ripper.rippers.ImgurRipper; +import com.rarchives.ripme.ripper.rippers.VidbleRipper; +import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; import org.apache.commons.lang.math.NumberUtils; import org.apache.log4j.Logger; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AbstractRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage; -import com.rarchives.ripme.ripper.rippers.VidbleRipper; -import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; -import com.rarchives.ripme.ripper.rippers.EroShareRipper; public class RipUtils { private static final Logger logger = Logger.getLogger(RipUtils.class); @@ -35,8 +34,8 @@ public class RipUtils { && url.toExternalForm().contains("imgur.com/a/")) { try { logger.debug("Fetching imgur album at " + url); - ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url); - for (ImgurImage imgurImage : imgurAlbum.images) { + ImgurRipper.ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url); + for (ImgurRipper.ImgurImage imgurImage : imgurAlbum.images) { logger.debug("Got imgur image: " + imgurImage.url); result.add(imgurImage.url); } @@ -49,8 +48,8 @@ public class RipUtils { // Imgur image series. try { logger.debug("Fetching imgur series at " + url); - ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url); - for (ImgurImage imgurImage : imgurAlbum.images) { + ImgurRipper.ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url); + for (ImgurRipper.ImgurImage imgurImage : imgurAlbum.images) { logger.debug("Got imgur image: " + imgurImage.url); result.add(imgurImage.url); } @@ -91,6 +90,21 @@ public class RipUtils { return result; } + else if (url.toExternalForm().contains("erome.com")) { + try { + logger.info("Getting eroshare album " + url); + EromeRipper r = new EromeRipper(url); + Document tempDoc = r.getFirstPage(); + for (String u : r.getURLsFromPage(tempDoc)) { + result.add(new URL(u)); + } + } catch (IOException e) { + // Do nothing + logger.warn("Exception while retrieving eroshare page:", e); + } + return result; + } + Pattern p = Pattern.compile("https?://i.reddituploads.com/([a-zA-Z0-9]+)\\?.*"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { @@ -122,8 +136,8 @@ public class RipUtils { try { // Fetch the page Document doc = Jsoup.connect(url.toExternalForm()) - .userAgent(AbstractRipper.USER_AGENT) - .get(); + .userAgent(AbstractRipper.USER_AGENT) + .get(); for (Element el : doc.select("meta")) { if (el.attr("name").equals("twitter:image:src")) { result.add(new URL(el.attr("content"))); diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java index 469c330a..4a6c3539 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java @@ -10,6 +10,9 @@ public class EightmusesRipperTest extends RippersTest { // A simple image album EightmusesRipper ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); testRipper(ripper); + // Test the new url format + ripper = new EightmusesRipper(new URL("https://www.8muses.com/comics/album/Affect3D-Comics/TheDude3DX/Lust-Unleashed-The-Urge-To-Explore")); + testRipper(ripper); // Test pages with subalbums ripper = new EightmusesRipper(new URL("https://www.8muses.com/comix/album/Blacknwhitecomics_com-Comix/BlacknWhiteComics/The-Mayor")); testRipper(ripper); diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemjoyhunterRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemjoyhunterRipperTest.java new file mode 100644 index 00000000..fee634ef --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FemjoyhunterRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.FemjoyhunterRipper; + +public class FemjoyhunterRipperTest extends RippersTest { + public void testRip() throws IOException { + FemjoyhunterRipper ripper = new FemjoyhunterRipper(new URL("https://www.femjoyhunter.com/alisa-i-got-nice-big-breasts-and-fine-ass-so-she-seems-to-be-a-hottest-brunette-5936/")); + testRipper(ripper); + } +} \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java index 538d493c..214220b8 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/FivehundredpxRipperTest.java @@ -1,13 +1,15 @@ -package com.rarchives.ripme.tst.ripper.rippers; +//package com.rarchives.ripme.tst.ripper.rippers; +// +//import java.io.IOException; +//import java.net.URL; +// +//import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper; +// +//public class FivehundredpxRipperTest extends RippersTest { +// public void test500pxAlbum() throws IOException { +// FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman")); +// testRipper(ripper); +// } +//} -import java.io.IOException; -import java.net.URL; - -import com.rarchives.ripme.ripper.rippers.FivehundredpxRipper; - -public class FivehundredpxRipperTest extends RippersTest { - public void test500pxAlbum() throws IOException { - FivehundredpxRipper ripper = new FivehundredpxRipper(new URL("https://marketplace.500px.com/alexander_hurman")); - testRipper(ripper); - } -} +// Ripper is broken. See https://github.com/RipMeApp/ripme/issues/438 \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java index 46f5679f..c321a99e 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java @@ -1,13 +1,13 @@ package com.rarchives.ripme.tst.ripper.rippers; +import com.rarchives.ripme.ripper.rippers.ImgurRipper; +import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; + import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; -import com.rarchives.ripme.ripper.rippers.ImgurRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; - public class ImgurRipperTest extends RippersTest { public void testImgurURLFailures() throws IOException { @@ -17,7 +17,6 @@ public class ImgurRipperTest extends RippersTest { failURLs.add(new URL("http://imgur.com/")); failURLs.add(new URL("http://i.imgur.com")); failURLs.add(new URL("http://i.imgur.com/")); - failURLs.add(new URL("http://imgur.com/image")); failURLs.add(new URL("http://imgur.com/image.jpg")); failURLs.add(new URL("http://i.imgur.com/image.jpg")); for (URL url : failURLs) { @@ -50,6 +49,15 @@ public class ImgurRipperTest extends RippersTest { } } + public void testImgurSingleImage() throws IOException { + List contentURLs = new ArrayList<>(); + contentURLs.add(new URL("http://imgur.com/qbfcLyG")); // Single image URL + contentURLs.add(new URL("https://imgur.com/KexUO")); // Single image URL + for (URL url : contentURLs) { + ImgurRipper ripper = new ImgurRipper(url); + testRipper(ripper); + } + } public void testImgurAlbumWithMoreThan20Pictures() throws IOException { ImgurAlbum album = ImgurRipper.getImgurAlbum(new URL("http://imgur.com/a/HUMsq")); diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ModelxRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ModelxRipperTest.java new file mode 100644 index 00000000..2a0358d2 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ModelxRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.ModelxRipper; + +public class ModelxRipperTest extends RippersTest { + public void testModelxAlbum() throws IOException { + ModelxRipper ripper = new ModelxRipper(new URL("http://www.modelx.org/graphis-collection-2002-2016/ai-yuzuki-%e6%9f%9a%e6%9c%88%e3%81%82%e3%81%84-yuzuiro/")); + testRipper(ripper); + } +} \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinfestRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinfestRipperTest.java new file mode 100644 index 00000000..c46e922c --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/SinfestRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.SinfestRipper; + +public class SinfestRipperTest extends RippersTest { + public void testRip() throws IOException { + SinfestRipper ripper = new SinfestRipper(new URL("http://sinfest.net/view.php?date=2000-01-17")); + testRipper(ripper); + } +} \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/StaRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/StaRipperTest.java new file mode 100644 index 00000000..128f3f17 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/StaRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.StaRipper; + +public class StaRipperTest extends RippersTest { + public void testRip() throws IOException { + StaRipper ripper = new StaRipper(new URL("https://sta.sh/2hn9rtavr1g")); + testRipper(ripper); + } +} \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/WordpressComicRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/WordpressComicRipperTest.java index e3d5c4a0..2f7dbcf9 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/WordpressComicRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/WordpressComicRipperTest.java @@ -83,10 +83,10 @@ public class WordpressComicRipperTest extends RippersTest { new URL("http://tnbtu.com/comic/01-00/")); testRipper(ripper); } - - public void test_pepsaga() throws IOException { - WordpressComicRipper ripper = new WordpressComicRipper( - new URL("http://shipinbottle.pepsaga.com/?p=281")); - testRipper(ripper); - } + // https://github.com/RipMeApp/ripme/issues/269 - Disabled test - WordpressRipperTest: various domains flaky in CI +// public void test_pepsaga() throws IOException { +// WordpressComicRipper ripper = new WordpressComicRipper( +// new URL("http://shipinbottle.pepsaga.com/?p=281")); +// testRipper(ripper); +// } }