From fb6e23e7d6095a9c77803d25efb7af16c355d207 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 29 Jul 2017 21:07:30 -0400 Subject: [PATCH 1/3] Eroshareripper now uses mirror for data --- .../ripme/ripper/rippers/EroShareRipper.java | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java index 75c56044..a2ee74c6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java @@ -51,13 +51,13 @@ public class EroShareRipper extends AbstractHTMLRipper { } @Override public boolean canRip(URL url) { - Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); + Pattern p = Pattern.compile("^https?://spacescience.tech/([a-zA-Z0-9\\-_]+)/?$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return true; } - Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); + Pattern pa = Pattern.compile("^https?://spacescience.tech/u/([a-zA-Z0-9\\-_]+)/?$"); Matcher ma = pa.matcher(url.toExternalForm()); if (ma.matches()) { return true; @@ -66,7 +66,7 @@ public class EroShareRipper extends AbstractHTMLRipper { } public boolean is_profile(URL url) { - Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); + Pattern pa = Pattern.compile("^https?://spacescience.tech/u/([a-zA-Z0-9\\-_]+)/?$"); Matcher ma = pa.matcher(url.toExternalForm()); if (ma.matches()) { return true; @@ -79,12 +79,14 @@ public class EroShareRipper extends AbstractHTMLRipper { // Find next page String nextUrl = ""; Element elem = doc.select("li.next > a").first(); - logger.info(elem); + if (elem == null) { + throw new IOException("No more pages"); + } nextUrl = elem.attr("href"); if (nextUrl == "") { throw new IOException("No more pages"); } - return Http.url("https://eroshare.com" + nextUrl).get(); + return Http.url("spacescience.tech" + nextUrl).get(); } @Override @@ -124,7 +126,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add(videoURL); + URLs.add("https:" + videoURL); } } // Profile videos @@ -132,7 +134,7 @@ public class EroShareRipper extends AbstractHTMLRipper { for (Element link : links) { Document video_page; try { - video_page = Http.url("https://eroshare.com" + link.attr("href")).get(); + video_page = Http.url("spacescience.tech" + link.attr("href")).get(); } catch (IOException e) { logger.warn("Failed to log link in Jsoup"); video_page = null; @@ -143,7 +145,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add(videoURL); + URLs.add("https:" + videoURL); } } } @@ -164,19 +166,19 @@ public class EroShareRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); + Pattern p = Pattern.compile("^https?://spacescience.tech/([a-zA-Z0-9\\-_]+)/?$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); } - Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); + Pattern pa = Pattern.compile("^https?://spacescience.tech/u/([a-zA-Z0-9\\-_]+)/?$"); Matcher ma = pa.matcher(url.toExternalForm()); if (ma.matches()) { return m.group(1) + "_profile"; } - throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album"); + throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album or spacescience.tech/album"); } public static List getURLs(URL url) throws IOException{ @@ -203,11 +205,10 @@ public class EroShareRipper extends AbstractHTMLRipper { if (vid.hasClass("album-video")) { Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); - URLs.add(new URL(videoURL)); + URLs.add(new URL("https:" + videoURL)); } } return URLs; } } - From fdf82f5126b3dc3f30ac66a7784e54315f113e61 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 29 Jul 2017 21:51:41 -0400 Subject: [PATCH 2/3] changed regex to include eroshare.com --- .../ripme/ripper/rippers/EroShareRipper.java | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java index a2ee74c6..054d6ba2 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java @@ -62,6 +62,18 @@ public class EroShareRipper extends AbstractHTMLRipper { if (ma.matches()) { return true; } + + Pattern p_eroshare = Pattern.compile("^https?://eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); + Matcher m_eroshare = p_eroshare.matcher(url.toExternalForm()); + if (m_eroshare.matches()) { + return true; + } + + Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); + Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm()); + if (m_eroshare_profile.matches()) { + return true; + } return false; } @@ -155,7 +167,8 @@ public class EroShareRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { - Response resp = Http.url(this.url) + String urlToDownload = this.url.toExternalForm(); + Response resp = Http.url(urlToDownload.replace("eroshare.com", "spacescience.tech")) .ignoreContentType() .response(); @@ -172,6 +185,18 @@ public class EroShareRipper extends AbstractHTMLRipper { return m.group(1); } + Pattern p_eroshare = Pattern.compile("^https?://eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); + Matcher m_eroshare = p_eroshare.matcher(url.toExternalForm()); + if (m_eroshare.matches()) { + return m_eroshare.group(1); + } + + Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); + Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm()); + if (m_eroshare_profile.matches()) { + return m_eroshare_profile.group(1) + "_profile"; + } + Pattern pa = Pattern.compile("^https?://spacescience.tech/u/([a-zA-Z0-9\\-_]+)/?$"); Matcher ma = pa.matcher(url.toExternalForm()); if (ma.matches()) { From cdbdc995f139df36a42637fd9990a17dd59ce187 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 9 Aug 2017 17:01:10 -0400 Subject: [PATCH 3/3] Added eroshare mirror eroshae --- .../ripme/ripper/rippers/EroShareRipper.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java index 054d6ba2..85839e7a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java @@ -51,13 +51,13 @@ public class EroShareRipper extends AbstractHTMLRipper { } @Override public boolean canRip(URL url) { - Pattern p = Pattern.compile("^https?://spacescience.tech/([a-zA-Z0-9\\-_]+)/?$"); + Pattern p = Pattern.compile("^https?://eroshae.com/([a-zA-Z0-9\\-_]+)/?$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return true; } - Pattern pa = Pattern.compile("^https?://spacescience.tech/u/([a-zA-Z0-9\\-_]+)/?$"); + Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$"); Matcher ma = pa.matcher(url.toExternalForm()); if (ma.matches()) { return true; @@ -78,7 +78,7 @@ public class EroShareRipper extends AbstractHTMLRipper { } public boolean is_profile(URL url) { - Pattern pa = Pattern.compile("^https?://spacescience.tech/u/([a-zA-Z0-9\\-_]+)/?$"); + Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$"); Matcher ma = pa.matcher(url.toExternalForm()); if (ma.matches()) { return true; @@ -98,7 +98,7 @@ public class EroShareRipper extends AbstractHTMLRipper { if (nextUrl == "") { throw new IOException("No more pages"); } - return Http.url("spacescience.tech" + nextUrl).get(); + return Http.url("eroshae.com" + nextUrl).get(); } @Override @@ -146,7 +146,7 @@ public class EroShareRipper extends AbstractHTMLRipper { for (Element link : links) { Document video_page; try { - video_page = Http.url("spacescience.tech" + link.attr("href")).get(); + video_page = Http.url("eroshae.com" + link.attr("href")).get(); } catch (IOException e) { logger.warn("Failed to log link in Jsoup"); video_page = null; @@ -168,7 +168,7 @@ public class EroShareRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { String urlToDownload = this.url.toExternalForm(); - Response resp = Http.url(urlToDownload.replace("eroshare.com", "spacescience.tech")) + Response resp = Http.url(urlToDownload.replace("eroshare.com", "eroshae.com")) .ignoreContentType() .response(); @@ -179,7 +179,7 @@ public class EroShareRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://spacescience.tech/([a-zA-Z0-9\\-_]+)/?$"); + Pattern p = Pattern.compile("^https?://eroshae.com/([a-zA-Z0-9\\-_]+)/?$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); @@ -197,13 +197,13 @@ public class EroShareRipper extends AbstractHTMLRipper { return m_eroshare_profile.group(1) + "_profile"; } - Pattern pa = Pattern.compile("^https?://spacescience.tech/u/([a-zA-Z0-9\\-_]+)/?$"); + Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$"); Matcher ma = pa.matcher(url.toExternalForm()); if (ma.matches()) { return m.group(1) + "_profile"; } - throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album or spacescience.tech/album"); + throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album or eroshae.com/album"); } public static List getURLs(URL url) throws IOException{