Merge pull request #222 from cyian-1756/ig_hashtag

Added support for ripping from IG tags
2017-11-18 09:16:21 -05:00 · 2017-11-18 09:16:21 -05:00 · ecf72e517e
commit ecf72e517e
parent 73b165c645 bcf3f9d2a7
1 changed files with 36 additions and 4 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@ -69,6 +69,13 @@ public class InstagramRipper extends AbstractHTMLRipper {
        if (m.matches()) {
            return m.group(1);
        }
+
+        p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
+        m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            return m.group(1);
+        }
+
        throw new MalformedURLException("Unable to find user in " + url);
    }

@ -141,11 +148,18 @@ public class InstagramRipper extends AbstractHTMLRipper {
            logger.warn("Unable to exact json from page");
        }

-        Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]+)/?");
+        Pattern p = Pattern.compile("^.*instagram.com/p/([a-zA-Z0-9\\-_.]+)/?");
        Matcher m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
+        if (!m.matches()) {
+            JSONArray datas = new JSONArray();
+            try {
                JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
-            JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
+                datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
+            } catch (JSONException e) {
+                // Handle hashtag pages
+                datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0)
+                        .getJSONObject("tag").getJSONObject("media").getJSONArray("nodes");
+            }
            for (int i = 0; i < datas.length(); i++) {
                JSONObject data = (JSONObject) datas.get(i);
                Long epoch = data.getLong("date");
@ -175,6 +189,21 @@ public class InstagramRipper extends AbstractHTMLRipper {
            }
            // Rip the next page
            if (!nextPageID.equals("") && !isThisATest()) {
+                if (url.toExternalForm().contains("/tags/")) {
+                    try {
+                        // Sleep for a while to avoid a ban
+                        sleep(2500);
+                        if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
+                            getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
+                        } else {
+                            getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
+                        }
+
+                    } catch (IOException e) {
+                        return imageURLs;
+                    }
+
+                }
                try {
                    // Sleep for a while to avoid a ban
                    sleep(2500);
@ -182,8 +211,11 @@ public class InstagramRipper extends AbstractHTMLRipper {
                } catch (IOException e) {
                    return imageURLs;
                }
+            } else {
+                logger.warn("Can't get net page");
            }
        } else { // We're ripping from a single page
+            logger.info("Ripping from single page");
            if (!doc.select("meta[property=og:video]").attr("content").equals("")) {
                String videoURL = doc.select("meta[property=og:video]").attr("content");
                // We're ripping a page with a video on it