diff --git a/README.md b/README.md index 390106c2..e56276c1 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,9 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/ * Quickly downloads all images in an online album (see supported sites below) * Easily re-rip albums to fetch new content +* Built in updater +* Can rip images from tumblr in the size they were uploaded in [See here for how to enable](https://github.com/RipMeApp/ripme/wiki/Config-options#tumblrget_raw_image) +* Skips already downloaded images by default ## [List of Supported Sites](https://github.com/ripmeapp/ripme/wiki/Supported-Sites) diff --git a/pom.xml b/pom.xml index 7c7b831f..77bedc9c 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.24 + 1.7.27 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 3fc7fde5..5f47ad6b 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,9 @@ { - "latestVersion": "1.7.24", + "latestVersion": "1.7.27", "changeList": [ + "1.7.27: IG ripper can now rip from tags; fixed json parsing issues", + "1.7.26: fixed instagram ripper", + "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index ff6b4102..6068ed18 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -174,7 +174,7 @@ public abstract class AbstractRipper * URL of the file * @param saveAs * Path of the local file to save the content to. - * @return True on success, flase on failure. + * @return True on success, false on failure. */ public abstract boolean addURLToDownload(URL url, File saveAs); @@ -206,11 +206,13 @@ public abstract class AbstractRipper * The HTTP referrer to use while downloading this file. * @param cookies * The cookies to send to the server while downloading this file. + * @param fileName + * The name that file will be written to * @return * True if downloaded successfully * False if failed to download */ - protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies, String fileName) { // Don't re-add the url if it was downloaded in a previous rip if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) { if (hasDownloadedURL(url.toExternalForm())) { @@ -225,9 +227,18 @@ public abstract class AbstractRipper logger.debug("Ripper has been stopped"); return false; } - logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies); - String saveAs = url.toExternalForm(); - saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies + ", fileName: " + fileName); + String saveAs; + if (fileName != null) { + saveAs = fileName; + // Get the extension of the file + String extension = url.toExternalForm().substring(url.toExternalForm().lastIndexOf(".") + 1); + saveAs = saveAs + "." + extension; + } else { + saveAs = url.toExternalForm(); + saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + } + if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } @@ -274,7 +285,11 @@ public abstract class AbstractRipper * @return True on success, flase on failure. */ protected boolean addURLToDownload(URL url, String prefix, String subdirectory) { - return addURLToDownload(url, prefix, subdirectory, null, null); + return addURLToDownload(url, prefix, subdirectory, null, null, null); + } + + protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + return addURLToDownload(url, prefix, subdirectory, referrer, cookies, null); } /** @@ -290,6 +305,8 @@ public abstract class AbstractRipper // Use empty subdirectory return addURLToDownload(url, prefix, ""); } + + /** * Waits for downloading threads to complete. */ diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 43873cf9..77ca9102 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.utils.Utils; import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -125,7 +126,11 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.info("Retrieving full-size image location from " + imageHref); image = getFullSizeImage(imageHref); URL imageUrl = new URL(image); - addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies); + if (Utils.getConfigBoolean("8muses.use_short_names", false)) { + addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, ""); + } else { + addURLToDownload(imageUrl, getPrefixLong(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies); + } // X is our page index x++; @@ -178,8 +183,11 @@ public class EightmusesRipper extends AbstractHTMLRipper { addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies); } - @Override - public String getPrefix(int index) { + public String getPrefixLong(int index) { return String.format("%03d_", index); } + + public String getPrefixShort(int index) { + return String.format("%03d", index); + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index ab44edfd..bb2998f8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -1,8 +1,11 @@ package com.rarchives.ripme.ripper.rippers; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; +import java.net.URLConnection; import java.time.*; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -25,6 +28,9 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { String nextPageID = ""; + private String qHash; + private boolean rippingTag = false; + private String tagName; private String userID; @@ -130,13 +136,31 @@ public class InstagramRipper extends AbstractHTMLRipper { p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); m = p.matcher(url.toExternalForm()); if (m.matches()) { + rippingTag = true; + tagName = m.group(1); return m.group(1); } throw new MalformedURLException("Unable to find user in " + url); } + private String stripHTMLTags(String t) { + t = t.replaceAll("\n" + + " \n" + + " ", ""); + t.replaceAll("\n" + + "", ""); + t = t.replaceAll("\n", ""); + t = t.replaceAll("=\"\"", ""); + return t; + } + + private JSONObject getJSONFromPage(Document firstPage) throws IOException { + // Check if this page is HTML + JSON or jsut json + if (!firstPage.html().contains("window._sharedData =")) { + return new JSONObject(stripHTMLTags(firstPage.html())); + } String jsonText = ""; try { for (Element script : firstPage.select("script[type=text/javascript]")) { @@ -153,8 +177,10 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { - userID = getGID(url); - return Http.url(url).get(); + Document p = Http.url(url).get(); + // Get the query hash so we can download the next page + qHash = getQHash(p); + return p; } private String getVideoFromPage(String videoID) { @@ -210,30 +236,47 @@ public class InstagramRipper extends AbstractHTMLRipper { if (!url.toExternalForm().contains("/p/")) { JSONArray datas = new JSONArray(); - try { - JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); - } catch (JSONException e) { - // Handle hashtag pages - datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0) - .getJSONObject("tag").getJSONObject("media").getJSONArray("nodes"); + if (!rippingTag) { + // This first try only works on data from the first page + try { + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", ""); + datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + } catch (JSONException e) { + datas = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + } + } else { + try { + JSONArray tagPage = json.getJSONObject("entry_data").getJSONArray("TagPage"); + datas = tagPage.getJSONObject(0).getJSONObject("graphql").getJSONObject("hashtag") + .getJSONObject("edge_hashtag_to_media").getJSONArray("edges"); + } catch (JSONException e) { + datas = json.getJSONObject("data").getJSONObject("hashtag").getJSONObject("edge_hashtag_to_media") + .getJSONArray("edges"); + } } for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); - Long epoch = data.getLong("date"); + data = data.getJSONObject("node"); + Long epoch = data.getLong("taken_at_timestamp"); Instant instant = Instant.ofEpochSecond(epoch); String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC)); - if (data.getString("__typename").equals("GraphSidecar")) { - try { - Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("code"))).get(); - List toAdd = getPostsFromSinglePage(slideShowDoc); - for (int slideShowInt=0; slideShowInt toAdd = getPostsFromSinglePage(slideShowDoc); + for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) { + addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode")); + } + } catch (MalformedURLException e) { + logger.error("Unable to download slide show, URL was malformed"); + } catch (IOException e) { + logger.error("Unable to download slide show"); } - } catch (MalformedURLException e) { - logger.error("Unable to download slide show, URL was malformed"); - } catch (IOException e) { - logger.error("Unable to download slide show"); } } try { @@ -246,9 +289,9 @@ public class InstagramRipper extends AbstractHTMLRipper { addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))), image_date); } else { if (!Utils.getConfigBoolean("instagram.download_images_only", false)) { - addURLToDownload(new URL(getVideoFromPage(data.getString("code"))), image_date); + addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), image_date); } else { - sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("code")); + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("shortcode")); } } } catch (MalformedURLException e) { @@ -274,19 +317,13 @@ public class InstagramRipper extends AbstractHTMLRipper { public Document getNextPage(Document doc) throws IOException { Document toreturn; if (!nextPageID.equals("") && !isThisATest()) { - if (url.toExternalForm().contains("/tags/")) { + if (rippingTag) { try { - // Sleep for a while to avoid a ban sleep(2500); - if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); - } else { - toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); - } + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + + "&variables={\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}").ignoreContentType().get(); + // Sleep for a while to avoid a ban logger.info(toreturn.html()); - if (!hasImage(toreturn)) { - throw new IOException("No more pages"); - } return toreturn; } catch (IOException e) { @@ -297,8 +334,9 @@ public class InstagramRipper extends AbstractHTMLRipper { try { // Sleep for a while to avoid a ban sleep(2500); - toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); - if (!hasImage(toreturn)) { + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + + "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}").ignoreContentType().get(); + if (!pageHasImages(toreturn)) { throw new IOException("No more pages"); } return toreturn; @@ -315,19 +353,55 @@ public class InstagramRipper extends AbstractHTMLRipper { addURLToDownload(url); } - private boolean hasImage(Document doc) { - try { - JSONObject json = getJSONFromPage(doc); - JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); - logger.info(datas.length()); - if (datas.length() == 0) { - return false; - } - return true; - } catch (IOException e) { + private boolean pageHasImages(Document doc) { + logger.info("BAD DATA: " + stripHTMLTags(doc.html())); + JSONObject json = new JSONObject(stripHTMLTags(doc.html())); + int numberOfImages = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length(); + if (numberOfImages == 0) { return false; } + return true; + } + + private String getQHash(Document doc) { + String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href"); + StringBuilder sb = new StringBuilder(); + Document jsPage; + try { + // We can't use Jsoup here because it won't download a non-html file larger than a MB + // even if you set maxBodySize to 0 + URLConnection connection = new URL(jsFileURL).openConnection(); + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); + String line; + while ((line = in.readLine()) != null) { + sb.append(line); + } + in.close(); + + } catch (MalformedURLException e) { + logger.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL"); + return null; + } catch (IOException e) { + logger.info("Unable to get query_hash"); + logger.info(e.getMessage()); + return null; + } + if (!rippingTag) { + Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } + } else { + Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } + } + logger.info("Could not find query_hash on " + jsFileURL); + return null; } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java new file mode 100644 index 00000000..9ca91e45 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -0,0 +1,108 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.rarchives.ripme.ui.RipStatusMessage; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class TsuminoRipper extends AbstractHTMLRipper { + private Map cookies = new HashMap<>(); + + public TsuminoRipper(URL url) throws IOException { + super(url); + } + + private JSONArray getPageUrls() { + String postURL = "http://www.tsumino.com/Read/Load"; + try { + // This sessionId will expire and need to be replaced + cookies.put("ASP.NET_SessionId","c4rbzccf0dvy3e0cloolmlkq"); + logger.info(cookies); + Document doc = Jsoup.connect(postURL).data("q", getAlbumID()).userAgent(USER_AGENT).cookies(cookies).referrer("http://www.tsumino.com/Read/View/" + getAlbumID()).post(); + String jsonInfo = doc.html().replaceAll("","").replaceAll("", "").replaceAll("", "").replaceAll("", "") + .replaceAll("", "").replaceAll("\n", ""); + logger.info(jsonInfo); + JSONObject json = new JSONObject(jsonInfo); + logger.info(json.getJSONArray("reader_page_urls")); + return json.getJSONArray("reader_page_urls"); + } catch (IOException e) { + logger.info(e); + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_ERRORED, "Unable to download album, please compete the captcha at http://www.tsumino.com/Read/Auth/" + + getAlbumID() + " and try again"); + return null; + } + } + + @Override + public String getHost() { + return "tsumino"; + } + + @Override + public String getDomain() { + return "tsumino.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/([a-zA-Z0-9_-]*)"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1) + "_" + m.group(2); + } + throw new MalformedURLException("Expected tsumino URL format: " + + "tsumino.com/Book/Info/ID/TITLE - got " + url + " instead"); + } + + private String getAlbumID() { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/\\S*"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + return null; + } + + @Override + public Document getFirstPage() throws IOException { + Connection.Response resp = Http.url(url).response(); + cookies.putAll(resp.cookies()); + logger.info(resp.parse()); + return resp.parse(); + } + + @Override + public List getURLsFromPage(Document doc) { + JSONArray imageIds = getPageUrls(); + List result = new ArrayList<>(); + for (int i = 0; i < imageIds.length(); i++) { + result.add("http://www.tsumino.com/Image/Object?name=" + URLEncoder.encode(imageIds.getString(i))); + } + + return result; + } + + @Override + public void downloadURL(URL url, int index) { + sleep(1000); + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java index 9401297d..dbc44585 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java @@ -37,6 +37,7 @@ public class WordpressComicRipper extends AbstractHTMLRipper { private static List explicit_domains = Arrays.asList( "www.totempole666.com", "buttsmithy.com", + "incase.buttsmithy.com", "themonsterunderthebed.net", "prismblush.com", "www.konradokonski.com", @@ -87,6 +88,12 @@ public class WordpressComicRipper extends AbstractHTMLRipper { return true; } + Pattern buttsmithyIncasePat = Pattern.compile("https?://incase.buttsmithy.com/comic/([a-zA-Z0-9_-]*)/?$"); + Matcher buttsmithyIncaseMat = buttsmithyIncasePat.matcher(url.toExternalForm()); + if (buttsmithyIncaseMat.matches()) { + return true; + } + Pattern theMonsterUnderTheBedPat = Pattern.compile("https?://themonsterunderthebed.net/\\?comic=([a-zA-Z0-9_-]*)/?$"); Matcher theMonsterUnderTheBedMat = theMonsterUnderTheBedPat.matcher(url.toExternalForm()); if (theMonsterUnderTheBedMat.matches()) { @@ -178,6 +185,12 @@ public class WordpressComicRipper extends AbstractHTMLRipper { return getHost() + "_" + prismblushMat.group(1); } + Pattern buttsmithyIncasePat = Pattern.compile("https?://incase.buttsmithy.com/comic/([a-zA-Z0-9_-]*)/?$"); + Matcher buttsmithyIncaseMat = buttsmithyIncasePat.matcher(url.toExternalForm()); + if (buttsmithyIncaseMat.matches()) { + return getHost() + "_" + buttsmithyIncaseMat.group(1).replaceAll("-page-\\d", "").replaceAll("-pg-\\d", ""); + } + Pattern comicsxxxPat = Pattern.compile("https?://comics-xxx.com/([a-zA-Z0-9_\\-]*)/?$"); Matcher comicsxxxMat = comicsxxxPat.matcher(url.toExternalForm()); if (comicsxxxMat.matches()) { diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 98191c33..80282ccb 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.24"; + private static final String DEFAULT_VERSION = "1.7.27"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar";