From 5c185c05df8789a25c4bd88746c78d8b87a05810 Mon Sep 17 00:00:00 2001
From: cyian-1756 <devnull64@vfemail.net>
Date: Sat, 18 Nov 2017 03:52:17 -0500
Subject: [PATCH] Improved instagram regex and added sanitizeURL

---
 .../rarchives/ripme/ripper/rippers/InstagramRipper.java  | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
index c085059c..6a33e71d 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -43,6 +43,13 @@ public class InstagramRipper extends AbstractHTMLRipper {
         return (url.getHost().endsWith("instagram.com"));
     }
 
+    @Override
+    public URL sanitizeURL(URL url) throws MalformedURLException {
+       URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
+       logger.info("sanitized URL is " + san_url.toExternalForm());
+        return san_url;
+    }
+
     @Override
     public String getGID(URL url) throws MalformedURLException {
         Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
@@ -51,7 +58,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
             return m.group(1);
         }
 
-        p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?");
+        p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
         m = p.matcher(url.toExternalForm());
         if (m.matches()) {
             return m.group(1);