From 4b7db493d1dbd9f39038b70b822a32bf1965f89c Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 21 Feb 2018 18:38:10 -0500 Subject: [PATCH 01/15] Added support for incase.buttsimthy.com --- .../ripme/ripper/rippers/WordpressComicRipper.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java index 9401297d..dbc44585 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java @@ -37,6 +37,7 @@ public class WordpressComicRipper extends AbstractHTMLRipper { private static List explicit_domains = Arrays.asList( "www.totempole666.com", "buttsmithy.com", + "incase.buttsmithy.com", "themonsterunderthebed.net", "prismblush.com", "www.konradokonski.com", @@ -87,6 +88,12 @@ public class WordpressComicRipper extends AbstractHTMLRipper { return true; } + Pattern buttsmithyIncasePat = Pattern.compile("https?://incase.buttsmithy.com/comic/([a-zA-Z0-9_-]*)/?$"); + Matcher buttsmithyIncaseMat = buttsmithyIncasePat.matcher(url.toExternalForm()); + if (buttsmithyIncaseMat.matches()) { + return true; + } + Pattern theMonsterUnderTheBedPat = Pattern.compile("https?://themonsterunderthebed.net/\\?comic=([a-zA-Z0-9_-]*)/?$"); Matcher theMonsterUnderTheBedMat = theMonsterUnderTheBedPat.matcher(url.toExternalForm()); if (theMonsterUnderTheBedMat.matches()) { @@ -178,6 +185,12 @@ public class WordpressComicRipper extends AbstractHTMLRipper { return getHost() + "_" + prismblushMat.group(1); } + Pattern buttsmithyIncasePat = Pattern.compile("https?://incase.buttsmithy.com/comic/([a-zA-Z0-9_-]*)/?$"); + Matcher buttsmithyIncaseMat = buttsmithyIncasePat.matcher(url.toExternalForm()); + if (buttsmithyIncaseMat.matches()) { + return getHost() + "_" + buttsmithyIncaseMat.group(1).replaceAll("-page-\\d", "").replaceAll("-pg-\\d", ""); + } + Pattern comicsxxxPat = Pattern.compile("https?://comics-xxx.com/([a-zA-Z0-9_\\-]*)/?$"); Matcher comicsxxxMat = comicsxxxPat.matcher(url.toExternalForm()); if (comicsxxxMat.matches()) { From 0aada1fd903f902108e74c0e4053de2a584814d7 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 8 Mar 2018 06:15:27 -0500 Subject: [PATCH 02/15] Added more features to readme --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 390106c2..e56276c1 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,9 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/ * Quickly downloads all images in an online album (see supported sites below) * Easily re-rip albums to fetch new content +* Built in updater +* Can rip images from tumblr in the size they were uploaded in [See here for how to enable](https://github.com/RipMeApp/ripme/wiki/Config-options#tumblrget_raw_image) +* Skips already downloaded images by default ## [List of Supported Sites](https://github.com/ripmeapp/ripme/wiki/Supported-Sites) From e5096736bfa99217bea948dc8c06486ac8409a38 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 09:25:50 -0500 Subject: [PATCH 03/15] Added tsuminoRipper --- .../ripme/ripper/rippers/TsuminoRipper.java | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java new file mode 100644 index 00000000..ff6e8829 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -0,0 +1,108 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class TsuminoRipper extends AbstractHTMLRipper { + private Map cookies = new HashMap<>(); + + public TsuminoRipper(URL url) throws IOException { + super(url); + } + + private JSONArray getPageUrls() { + String postURL = "http://www.tsumino.com/Read/Load"; + try { + // This sessionId will expire and need to be replaced + cookies.put("ASP.NET_SessionId","c4rbzccf0dvy3e0cloolmlkq"); + logger.info(cookies); + Document doc = Jsoup.connect(postURL).data("q", getAlbumID()).userAgent(USER_AGENT).cookies(cookies).referrer("http://www.tsumino.com/Read/View/" + getAlbumID()).post(); + String jsonInfo = doc.html().replaceAll("","").replaceAll("", "").replaceAll("", "").replaceAll("", "") + .replaceAll("", "").replaceAll("\n", ""); + logger.info(jsonInfo); + JSONObject json = new JSONObject(jsonInfo); + logger.info(json.getJSONArray("reader_page_urls")); + return json.getJSONArray("reader_page_urls"); + } catch (IOException e) { + logger.info(e); + return null; + } + } + + @Override + public String getHost() { + return "tsumino"; + } + + @Override + public String getDomain() { + return "tsumino.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/([a-zA-Z0-9_-]*)"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1) + "_" + m.group(2); + } + throw new MalformedURLException("Expected tsumino URL format: " + + "tsumino.com/Book/Info/ID/TITLE - got " + url + " instead"); + } + + private String getAlbumID() { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/\\S*"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + return null; + } + + @Override + public Document getFirstPage() throws IOException { + Connection.Response resp = Http.url(url).response(); + cookies.putAll(resp.cookies()); + // We need to perform a get on http://www.tsumino.com/Read/View/albumID/1 or else the + //www.tsumino.com/Read/Load endpoint 404s + resp = Http.url("http://www.tsumino.com/Book/Info/" + getAlbumID()).response(); + cookies.putAll(resp.cookies()); + return resp.parse(); + } + + @Override + public List getURLsFromPage(Document doc) { + JSONArray imageIds = getPageUrls(); + List result = new ArrayList<>(); + for (int i = 0; i < imageIds.length(); i++) { + result.add("http://www.tsumino.com/Image/Object?name=" + URLEncoder.encode(imageIds.getString(i))); + } + + return result; + } + + @Override + public void downloadURL(URL url, int index) { + sleep(1000); + addURLToDownload(url, getPrefix(index)); + } +} From 6769bbfb7d6c0defa070ffe29dd084b9f7af0894 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 10:39:02 -0500 Subject: [PATCH 04/15] Now tells the user if they need to fill out a captcha --- .../com/rarchives/ripme/ripper/rippers/TsuminoRipper.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java index ff6e8829..9ca91e45 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ui.RipStatusMessage; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Connection; @@ -44,6 +45,8 @@ public class TsuminoRipper extends AbstractHTMLRipper { return json.getJSONArray("reader_page_urls"); } catch (IOException e) { logger.info(e); + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_ERRORED, "Unable to download album, please compete the captcha at http://www.tsumino.com/Read/Auth/" + + getAlbumID() + " and try again"); return null; } } @@ -82,10 +85,7 @@ public class TsuminoRipper extends AbstractHTMLRipper { public Document getFirstPage() throws IOException { Connection.Response resp = Http.url(url).response(); cookies.putAll(resp.cookies()); - // We need to perform a get on http://www.tsumino.com/Read/View/albumID/1 or else the - //www.tsumino.com/Read/Load endpoint 404s - resp = Http.url("http://www.tsumino.com/Book/Info/" + getAlbumID()).response(); - cookies.putAll(resp.cookies()); + logger.info(resp.parse()); return resp.parse(); } From c9c85429456543cd4fedc8083b1b9614e1b84bc8 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 12 Mar 2018 12:40:13 -0400 Subject: [PATCH 05/15] Added another overload to addURLToDownload which allows the ripper to set the name of the file; Fixed 8muses filename length issue --- .../ripme/ripper/AbstractRipper.java | 29 +++++++++++++++---- .../ripper/rippers/EightmusesRipper.java | 4 +-- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index ff6b4102..6068ed18 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -174,7 +174,7 @@ public abstract class AbstractRipper * URL of the file * @param saveAs * Path of the local file to save the content to. - * @return True on success, flase on failure. + * @return True on success, false on failure. */ public abstract boolean addURLToDownload(URL url, File saveAs); @@ -206,11 +206,13 @@ public abstract class AbstractRipper * The HTTP referrer to use while downloading this file. * @param cookies * The cookies to send to the server while downloading this file. + * @param fileName + * The name that file will be written to * @return * True if downloaded successfully * False if failed to download */ - protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies, String fileName) { // Don't re-add the url if it was downloaded in a previous rip if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) { if (hasDownloadedURL(url.toExternalForm())) { @@ -225,9 +227,18 @@ public abstract class AbstractRipper logger.debug("Ripper has been stopped"); return false; } - logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies); - String saveAs = url.toExternalForm(); - saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies + ", fileName: " + fileName); + String saveAs; + if (fileName != null) { + saveAs = fileName; + // Get the extension of the file + String extension = url.toExternalForm().substring(url.toExternalForm().lastIndexOf(".") + 1); + saveAs = saveAs + "." + extension; + } else { + saveAs = url.toExternalForm(); + saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + } + if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } @@ -274,7 +285,11 @@ public abstract class AbstractRipper * @return True on success, flase on failure. */ protected boolean addURLToDownload(URL url, String prefix, String subdirectory) { - return addURLToDownload(url, prefix, subdirectory, null, null); + return addURLToDownload(url, prefix, subdirectory, null, null, null); + } + + protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map cookies) { + return addURLToDownload(url, prefix, subdirectory, referrer, cookies, null); } /** @@ -290,6 +305,8 @@ public abstract class AbstractRipper // Use empty subdirectory return addURLToDownload(url, prefix, ""); } + + /** * Waits for downloading threads to complete. */ diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 43873cf9..80ac5b93 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -125,7 +125,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.info("Retrieving full-size image location from " + imageHref); image = getFullSizeImage(imageHref); URL imageUrl = new URL(image); - addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies); + addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, ""); // X is our page index x++; @@ -180,6 +180,6 @@ public class EightmusesRipper extends AbstractHTMLRipper { @Override public String getPrefix(int index) { - return String.format("%03d_", index); + return String.format("%03d", index); } } From d4c3d6a025c25c4fbb143d6577d9ac5e5e65be49 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 12 Mar 2018 15:02:34 -0400 Subject: [PATCH 06/15] Added an option to use short names for 8muses --- .../ripme/ripper/rippers/EightmusesRipper.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 80ac5b93..77ca9102 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.utils.Utils; import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -125,7 +126,11 @@ public class EightmusesRipper extends AbstractHTMLRipper { logger.info("Retrieving full-size image location from " + imageHref); image = getFullSizeImage(imageHref); URL imageUrl = new URL(image); - addURLToDownload(imageUrl, getPrefix(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, ""); + if (Utils.getConfigBoolean("8muses.use_short_names", false)) { + addURLToDownload(imageUrl, getPrefixShort(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies, ""); + } else { + addURLToDownload(imageUrl, getPrefixLong(x), getSubdir(page.select("title").text()), this.url.toExternalForm(), cookies); + } // X is our page index x++; @@ -178,8 +183,11 @@ public class EightmusesRipper extends AbstractHTMLRipper { addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies); } - @Override - public String getPrefix(int index) { + public String getPrefixLong(int index) { + return String.format("%03d_", index); + } + + public String getPrefixShort(int index) { return String.format("%03d", index); } } From fd2ff087dd430b4f46b5fcb56ba1e35ffb8d2055 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 13 Mar 2018 20:41:55 -0400 Subject: [PATCH 07/15] Fixed instagram ripper --- .../ripme/ripper/rippers/InstagramRipper.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index ab44edfd..14f78cf2 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -212,7 +212,8 @@ public class InstagramRipper extends AbstractHTMLRipper { JSONArray datas = new JSONArray(); try { JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); + datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); } catch (JSONException e) { // Handle hashtag pages datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0) @@ -220,15 +221,16 @@ public class InstagramRipper extends AbstractHTMLRipper { } for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); - Long epoch = data.getLong("date"); + data = data.getJSONObject("node"); + Long epoch = data.getLong("taken_at_timestamp"); Instant instant = Instant.ofEpochSecond(epoch); String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC)); if (data.getString("__typename").equals("GraphSidecar")) { try { - Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("code"))).get(); + Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("shortcode"))).get(); List toAdd = getPostsFromSinglePage(slideShowDoc); for (int slideShowInt=0; slideShowInt Date: Tue, 13 Mar 2018 20:45:28 -0400 Subject: [PATCH 08/15] Ripper no longer supports tags --- .../ripme/ripper/rippers/InstagramRipper.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 14f78cf2..dbadf355 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -127,11 +127,11 @@ public class InstagramRipper extends AbstractHTMLRipper { return m.group(1); } - p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } +// p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); +// m = p.matcher(url.toExternalForm()); +// if (m.matches()) { +// return m.group(1); +// } throw new MalformedURLException("Unable to find user in " + url); } From 7356a13da13cd6dac580aa34ce197a690284aa46 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 14 Mar 2018 12:35:03 -0400 Subject: [PATCH 09/15] Minor code clean up --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index dbadf355..e78834ba 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -227,7 +227,7 @@ public class InstagramRipper extends AbstractHTMLRipper { String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC)); if (data.getString("__typename").equals("GraphSidecar")) { try { - Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("shortcode"))).get(); + Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get(); List toAdd = getPostsFromSinglePage(slideShowDoc); for (int slideShowInt=0; slideShowInt Date: Wed, 14 Mar 2018 12:58:34 -0400 Subject: [PATCH 10/15] 1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 7c7b831f..a36b52ef 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.24 + 1.7.25 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 3fc7fde5..1e4fb4dc 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.24", + "latestVersion": "1.7.25", "changeList": [ + "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", "1.7.22: Added func to normalize urls before reading from/writing to url history file; last seen feature now works with instagram", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 98191c33..6c31f378 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.24"; + private static final String DEFAULT_VERSION = "1.7.25"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 545bfce7c9f8b8845c617954b65cd934d7af1a75 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 15 Mar 2018 13:18:29 -0400 Subject: [PATCH 11/15] Insagram ripper now can get all pages of a profile --- .../ripme/ripper/rippers/InstagramRipper.java | 93 ++++++++++++++----- 1 file changed, 69 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index e78834ba..93de0e8c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -1,8 +1,11 @@ package com.rarchives.ripme.ripper.rippers; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; +import java.net.URLConnection; import java.time.*; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -25,6 +28,7 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { String nextPageID = ""; + private String qHash; private String userID; @@ -136,7 +140,21 @@ public class InstagramRipper extends AbstractHTMLRipper { throw new MalformedURLException("Unable to find user in " + url); } + private String stripHTMLTags(String t) { + t = t.replaceAll("\n" + + " \n" + + " ", ""); + t.replaceAll("\n" + + "", ""); + return t; + } + + private JSONObject getJSONFromPage(Document firstPage) throws IOException { + // Check if this page is HTML + JSON or jsut json + if (!firstPage.html().contains("window._sharedData =")) { + return new JSONObject(stripHTMLTags(firstPage.html())); + } String jsonText = ""; try { for (Element script : firstPage.select("script[type=text/javascript]")) { @@ -153,8 +171,10 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { - userID = getGID(url); - return Http.url(url).get(); + Document p = Http.url(url).get(); + // Get the query hash so we can download the next page + qHash = getQHash(p); + return p; } private String getVideoFromPage(String videoID) { @@ -210,14 +230,15 @@ public class InstagramRipper extends AbstractHTMLRipper { if (!url.toExternalForm().contains("/p/")) { JSONArray datas = new JSONArray(); + // This first try only works on data from the first page try { JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", ""); datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); } catch (JSONException e) { - // Handle hashtag pages - datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0) - .getJSONObject("tag").getJSONObject("media").getJSONArray("nodes"); + datas = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); } for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); @@ -281,14 +302,11 @@ public class InstagramRipper extends AbstractHTMLRipper { // Sleep for a while to avoid a ban sleep(2500); if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).ignoreContentType().get(); } else { - toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).ignoreContentType().get(); } logger.info(toreturn.html()); - if (!hasImage(toreturn)) { - throw new IOException("No more pages"); - } return toreturn; } catch (IOException e) { @@ -299,8 +317,9 @@ public class InstagramRipper extends AbstractHTMLRipper { try { // Sleep for a while to avoid a ban sleep(2500); - toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); - if (!hasImage(toreturn)) { + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + + "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}").ignoreContentType().get(); + if (!pageHasImages(toreturn)) { throw new IOException("No more pages"); } return toreturn; @@ -317,20 +336,46 @@ public class InstagramRipper extends AbstractHTMLRipper { addURLToDownload(url); } - private boolean hasImage(Document doc) { - try { - JSONObject json = getJSONFromPage(doc); - JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - JSONArray datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") - .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); - logger.info(datas.length()); - if (datas.length() == 0) { - return false; - } - return true; - } catch (IOException e) { + private boolean pageHasImages(Document doc) { + JSONObject json = new JSONObject(stripHTMLTags(doc.html())); + int numberOfImages = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length(); + if (numberOfImages == 0) { return false; } + return true; + } + + private String getQHash(Document doc) { + String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href"); + StringBuilder sb = new StringBuilder(); + Document jsPage; + try { + // We can't use Jsoup here because it won't download a non-html file larger than a MB + // even if you set maxBodySize to 0 + URLConnection connection = new URL(jsFileURL).openConnection(); + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); + String line; + while ((line = in.readLine()) != null) { + sb.append(line); + } + in.close(); + + } catch (MalformedURLException e) { + logger.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL"); + return null; + } catch (IOException e) { + logger.info("Unable to get query_hash"); + logger.info(e.getMessage()); + return null; + } + Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } + logger.info("Could not find query_hash on " + jsFileURL); + return null; } From da338eb7164e33e37f0b63a02e33ea645eb9e339 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 15 Mar 2018 13:40:11 -0400 Subject: [PATCH 12/15] 1.7.26: fixed instagram ripper --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index a36b52ef..35e1c586 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.25 + 1.7.26 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 1e4fb4dc..6674d63f 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.25", + "latestVersion": "1.7.26", "changeList": [ + "1.7.26: fixed instagram ripper", "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", "1.7.23: Fixed xvideos ripper; InstagramRipper now works with lastseenfeature", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 6c31f378..8fada879 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.25"; + private static final String DEFAULT_VERSION = "1.7.26"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From 5600b375b4f9595762f980a3c01cbe51e81abcf1 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 15 Mar 2018 16:27:15 -0400 Subject: [PATCH 13/15] IG ripper can now rip from tags --- .../ripme/ripper/rippers/InstagramRipper.java | 93 ++++++++++++------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 93de0e8c..92138861 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -29,6 +29,8 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { String nextPageID = ""; private String qHash; + private boolean rippingTag = false; + private String tagName; private String userID; @@ -131,11 +133,13 @@ public class InstagramRipper extends AbstractHTMLRipper { return m.group(1); } -// p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); -// m = p.matcher(url.toExternalForm()); -// if (m.matches()) { -// return m.group(1); -// } + p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + rippingTag = true; + tagName = m.group(1); + return m.group(1); + } throw new MalformedURLException("Unable to find user in " + url); } @@ -230,15 +234,26 @@ public class InstagramRipper extends AbstractHTMLRipper { if (!url.toExternalForm().contains("/p/")) { JSONArray datas = new JSONArray(); - // This first try only works on data from the first page - try { - JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); - userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", ""); - datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") - .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); - } catch (JSONException e) { - datas = json.getJSONObject("data").getJSONObject("user") - .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + if (!rippingTag) { + // This first try only works on data from the first page + try { + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", ""); + datas = profilePage.getJSONObject(0).getJSONObject("graphql").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + } catch (JSONException e) { + datas = json.getJSONObject("data").getJSONObject("user") + .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges"); + } + } else { + try { + JSONArray tagPage = json.getJSONObject("entry_data").getJSONArray("TagPage"); + datas = tagPage.getJSONObject(0).getJSONObject("graphql").getJSONObject("hashtag") + .getJSONObject("edge_hashtag_to_media").getJSONArray("edges"); + } catch (JSONException e) { + datas = json.getJSONObject("data").getJSONObject("hashtag").getJSONObject("edge_hashtag_to_media") + .getJSONArray("edges"); + } } for (int i = 0; i < datas.length(); i++) { JSONObject data = (JSONObject) datas.get(i); @@ -246,17 +261,20 @@ public class InstagramRipper extends AbstractHTMLRipper { Long epoch = data.getLong("taken_at_timestamp"); Instant instant = Instant.ofEpochSecond(epoch); String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC)); - if (data.getString("__typename").equals("GraphSidecar")) { - try { - Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get(); - List toAdd = getPostsFromSinglePage(slideShowDoc); - for (int slideShowInt=0; slideShowInt toAdd = getPostsFromSinglePage(slideShowDoc); + for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) { + addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode")); + } + } catch (MalformedURLException e) { + logger.error("Unable to download slide show, URL was malformed"); + } catch (IOException e) { + logger.error("Unable to download slide show"); } - } catch (MalformedURLException e) { - logger.error("Unable to download slide show, URL was malformed"); - } catch (IOException e) { - logger.error("Unable to download slide show"); } } try { @@ -297,15 +315,12 @@ public class InstagramRipper extends AbstractHTMLRipper { public Document getNextPage(Document doc) throws IOException { Document toreturn; if (!nextPageID.equals("") && !isThisATest()) { - if (url.toExternalForm().contains("/tags/")) { + if (rippingTag) { try { - // Sleep for a while to avoid a ban sleep(2500); - if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).ignoreContentType().get(); - } else { - toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).ignoreContentType().get(); - } + toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + + "&variables={\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}").ignoreContentType().get(); + // Sleep for a while to avoid a ban logger.info(toreturn.html()); return toreturn; @@ -369,10 +384,18 @@ public class InstagramRipper extends AbstractHTMLRipper { logger.info(e.getMessage()); return null; } - Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); - Matcher m = jsP.matcher(sb.toString()); - if (m.find()) { - return m.group(1); + if (!rippingTag) { + Pattern jsP = Pattern.compile("o},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } + } else { + Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+)."); + Matcher m = jsP.matcher(sb.toString()); + if (m.find()) { + return m.group(1); + } } logger.info("Could not find query_hash on " + jsFileURL); return null; From 5b5e86ed3b869457a26fcb9340c0caf7bbfabc4f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 16 Mar 2018 16:50:29 -0400 Subject: [PATCH 14/15] Fixed json parsing errors --- .../com/rarchives/ripme/ripper/rippers/InstagramRipper.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 92138861..bb2998f8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -150,6 +150,8 @@ public class InstagramRipper extends AbstractHTMLRipper { " ", ""); t.replaceAll("\n" + "", ""); + t = t.replaceAll("\n", ""); + t = t.replaceAll("=\"\"", ""); return t; } @@ -352,6 +354,7 @@ public class InstagramRipper extends AbstractHTMLRipper { } private boolean pageHasImages(Document doc) { + logger.info("BAD DATA: " + stripHTMLTags(doc.html())); JSONObject json = new JSONObject(stripHTMLTags(doc.html())); int numberOfImages = json.getJSONObject("data").getJSONObject("user") .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length(); From 6cea9e3ff75d3378fb1693bee7d54d28a5f1a774 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 17 Mar 2018 16:09:34 -0400 Subject: [PATCH 15/15] 1.7.27: IG ripper can now rip from tags; fixed json parsing issues --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 35e1c586..77bedc9c 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.26 + 1.7.27 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 6674d63f..5f47ad6b 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion": "1.7.26", + "latestVersion": "1.7.27", "changeList": [ + "1.7.27: IG ripper can now rip from tags; fixed json parsing issues", "1.7.26: fixed instagram ripper", "1.7.25: Fixed instagram ripper; Added an option to use short names for 8muses; Added tsuminoRipper; Added support for incase.buttsmithy.com", "1.7.24: Added sta.sh ripper; Added sinfest.com ripper; Added femjoyhunter.com ripper; Disabled flaky unit tests", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 8fada879..80282ccb 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.26"; + private static final String DEFAULT_VERSION = "1.7.27"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar";