diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index c085059c..6a33e71d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -43,6 +43,13 @@ public class InstagramRipper extends AbstractHTMLRipper { return (url.getHost().endsWith("instagram.com")); } + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", "")); + logger.info("sanitized URL is " + san_url.toExternalForm()); + return san_url; + } + @Override public String getGID(URL url) throws MalformedURLException { Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?"); @@ -51,7 +58,7 @@ public class InstagramRipper extends AbstractHTMLRipper { return m.group(1); } - p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?"); + p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?"); m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1);