Fix #370: Fix NewsfilterRipper.
This commit is contained in:
parent
bacc69dff5
commit
ef206fb7a6
1
.gitignore
vendored
1
.gitignore
vendored
@ -13,3 +13,4 @@ history.json
|
|||||||
.settings/
|
.settings/
|
||||||
.classpath
|
.classpath
|
||||||
*.txt
|
*.txt
|
||||||
|
bin/
|
||||||
|
@ -1,19 +1,19 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
|
||||||
import org.jsoup.Connection;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.Connection;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||||
|
|
||||||
public class NewsfilterRipper extends AlbumRipper {
|
public class NewsfilterRipper extends AlbumRipper {
|
||||||
private static final String HOST = "newsfilter";
|
private static final String HOST = "newsfilter";
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ public class NewsfilterRipper extends AlbumRipper {
|
|||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
String u = url.toExternalForm();
|
String u = url.toExternalForm();
|
||||||
if (u.indexOf('#') >= 0) {
|
if (u.indexOf('#') >= 0) {
|
||||||
u = u.substring(0, u.indexOf('#'));
|
u = u.substring(0, u.indexOf('#'));
|
||||||
}
|
}
|
||||||
u = u.replace("https?://m\\.newsfilter\\.org", "http://newsfilter.org");
|
u = u.replace("https?://m\\.newsfilter\\.org", "http://newsfilter.org");
|
||||||
return new URL(u);
|
return new URL(u);
|
||||||
@ -41,36 +41,25 @@ public class NewsfilterRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
String gid = getGID(this.url),
|
String gid = getGID(this.url);
|
||||||
theurl = "http://newsfilter.org/gallery/" + gid;
|
String theurl = "http://newsfilter.org/gallery/" + gid;
|
||||||
|
|
||||||
Connection.Response resp = null;
|
|
||||||
logger.info("Loading " + theurl);
|
logger.info("Loading " + theurl);
|
||||||
resp = Jsoup.connect(theurl)
|
|
||||||
.timeout(5000)
|
|
||||||
.referrer("")
|
|
||||||
.userAgent(USER_AGENT)
|
|
||||||
.method(Connection.Method.GET)
|
|
||||||
.execute();
|
|
||||||
|
|
||||||
|
Connection.Response resp = Jsoup.connect(theurl)
|
||||||
|
.timeout(5000)
|
||||||
|
.referrer("")
|
||||||
|
.userAgent(USER_AGENT)
|
||||||
|
.method(Connection.Method.GET)
|
||||||
|
.execute();
|
||||||
Document doc = resp.parse();
|
Document doc = resp.parse();
|
||||||
//Element gallery = doc.getElementById("thegalmain");
|
|
||||||
//Elements piclinks = gallery.getElementsByAttributeValue("itemprop","contentURL");
|
Elements thumbnails = doc.select("#galleryImages .inner-block img");
|
||||||
Pattern pat = Pattern.compile(gid+"/\\d+");
|
for (Element thumb : thumbnails) {
|
||||||
Elements piclinks = doc.getElementsByAttributeValueMatching("href", pat);
|
String thumbUrl = thumb.attr("src");
|
||||||
for (Element picelem : piclinks) {
|
String picUrl = thumbUrl.replace("thumbs/", "");
|
||||||
String picurl = "http://newsfilter.org"+picelem.attr("href");
|
addURLToDownload(new URL(picUrl));
|
||||||
logger.info("Getting to picture page: "+picurl);
|
|
||||||
resp = Jsoup.connect(picurl)
|
|
||||||
.timeout(5000)
|
|
||||||
.referrer(theurl)
|
|
||||||
.userAgent(USER_AGENT)
|
|
||||||
.method(Connection.Method.GET)
|
|
||||||
.execute();
|
|
||||||
Document picdoc = resp.parse();
|
|
||||||
String dlurl = picdoc.getElementsByAttributeValue("itemprop","contentURL").first().attr("src");
|
|
||||||
addURLToDownload(new URL(dlurl));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,9 +75,8 @@ public class NewsfilterRipper extends AlbumRipper {
|
|||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(2);
|
return m.group(2);
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("Expected newsfilter gallery format: "
|
throw new MalformedURLException(
|
||||||
+ "http://newsfilter.org/gallery/galleryid"
|
"Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
|
||||||
+ " Got: " + url);
|
" Got: " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user