2014-02-25 10:28:22 +01:00
|
|
|
package com.rarchives.ripme.ripper.rippers;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
2014-06-25 04:05:54 +02:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
2014-02-25 10:28:22 +01:00
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.nodes.Element;
|
|
|
|
|
2014-06-25 04:05:54 +02:00
|
|
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
2014-06-22 02:08:42 +02:00
|
|
|
import com.rarchives.ripme.utils.Http;
|
2014-02-25 10:28:22 +01:00
|
|
|
|
2014-06-25 04:05:54 +02:00
|
|
|
public class ImagefapRipper extends AbstractHTMLRipper {
|
2014-02-26 08:44:22 +01:00
|
|
|
|
2014-04-11 08:42:14 +02:00
|
|
|
private Document albumDoc = null;
|
2015-02-06 08:58:17 +01:00
|
|
|
private boolean isNewAlbumType = false;
|
2014-04-11 08:42:14 +02:00
|
|
|
|
2014-02-26 08:44:22 +01:00
|
|
|
public ImagefapRipper(URL url) throws IOException {
|
|
|
|
super(url);
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getHost() {
|
2014-06-25 04:05:54 +02:00
|
|
|
return "imagefap";
|
|
|
|
}
|
|
|
|
@Override
|
|
|
|
public String getDomain() {
|
|
|
|
return "imagefap.com";
|
2014-02-26 08:44:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reformat given URL into the desired format (all images on single page)
|
|
|
|
*/
|
2014-06-25 04:05:54 +02:00
|
|
|
@Override
|
2014-02-27 04:54:44 +01:00
|
|
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
|
|
String gid = getGID(url);
|
2015-02-06 08:58:17 +01:00
|
|
|
String newURL = "http://www.imagefap.com/gallery.php?";
|
|
|
|
if (isNewAlbumType) {
|
|
|
|
newURL += "p";
|
|
|
|
}
|
|
|
|
newURL += "gid=" + gid + "&view=2";
|
2014-06-25 04:05:54 +02:00
|
|
|
logger.debug("Changed URL from " + url + " to " + newURL);
|
2015-02-06 08:58:17 +01:00
|
|
|
return new URL(newURL);
|
2014-02-26 08:44:22 +01:00
|
|
|
}
|
|
|
|
|
2014-04-11 08:42:14 +02:00
|
|
|
@Override
|
2014-02-27 04:54:44 +01:00
|
|
|
public String getGID(URL url) throws MalformedURLException {
|
2014-04-11 08:42:14 +02:00
|
|
|
Pattern p; Matcher m;
|
|
|
|
|
2015-02-06 08:58:17 +01:00
|
|
|
p = Pattern.compile("^.*imagefap.com/gallery.php\\?pgid=([a-f0-9]+).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
isNewAlbumType = true;
|
|
|
|
return m.group(1);
|
|
|
|
}
|
2014-05-26 05:12:18 +02:00
|
|
|
p = Pattern.compile("^.*imagefap.com/gallery.php\\?gid=([0-9]+).*$");
|
2014-04-11 08:42:14 +02:00
|
|
|
m = p.matcher(url.toExternalForm());
|
2014-02-26 08:44:22 +01:00
|
|
|
if (m.matches()) {
|
2014-02-27 04:54:44 +01:00
|
|
|
return m.group(1);
|
2014-02-26 08:44:22 +01:00
|
|
|
}
|
2014-04-11 08:42:14 +02:00
|
|
|
|
2014-05-26 05:12:18 +02:00
|
|
|
p = Pattern.compile("^.*imagefap.com/pictures/([0-9]+).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
return m.group(1);
|
|
|
|
}
|
2015-02-06 08:58:17 +01:00
|
|
|
p = Pattern.compile("^.*imagefap.com/pictures/([a-f0-9]+).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
isNewAlbumType = true;
|
|
|
|
return m.group(1);
|
|
|
|
}
|
2014-05-26 05:12:18 +02:00
|
|
|
|
|
|
|
p = Pattern.compile("^.*imagefap.com/gallery/([0-9]+).*$");
|
2014-02-27 04:54:44 +01:00
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
return m.group(1);
|
2014-02-26 08:44:22 +01:00
|
|
|
}
|
2015-02-06 08:58:17 +01:00
|
|
|
p = Pattern.compile("^.*imagefap.com/gallery/([a-f0-9]+).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
isNewAlbumType = true;
|
|
|
|
return m.group(1);
|
|
|
|
}
|
2014-04-11 08:42:14 +02:00
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
throw new MalformedURLException(
|
|
|
|
"Expected imagefap.com gallery formats: "
|
|
|
|
+ "imagefap.com/gallery.php?gid=####... or "
|
|
|
|
+ "imagefap.com/pictures/####..."
|
|
|
|
+ " Got: " + url);
|
2014-02-26 08:44:22 +01:00
|
|
|
}
|
2014-06-25 04:05:54 +02:00
|
|
|
|
2014-02-26 08:44:22 +01:00
|
|
|
@Override
|
2014-06-25 04:05:54 +02:00
|
|
|
public Document getFirstPage() throws IOException {
|
2014-04-11 08:42:14 +02:00
|
|
|
if (albumDoc == null) {
|
2014-06-25 04:05:54 +02:00
|
|
|
albumDoc = Http.url(url).get();
|
2014-04-11 08:42:14 +02:00
|
|
|
}
|
2014-06-25 04:05:54 +02:00
|
|
|
return albumDoc;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Document getNextPage(Document doc) throws IOException {
|
|
|
|
String nextURL = null;
|
2014-08-16 07:07:59 +02:00
|
|
|
for (Element a : doc.select("a.link3")) {
|
2014-06-25 04:05:54 +02:00
|
|
|
if (a.text().contains("next")) {
|
2014-08-16 07:07:59 +02:00
|
|
|
nextURL = "http://imagefap.com/gallery.php" + a.attr("href");
|
2014-05-11 19:07:58 +02:00
|
|
|
break;
|
|
|
|
}
|
2014-06-25 04:05:54 +02:00
|
|
|
}
|
|
|
|
if (nextURL == null) {
|
|
|
|
throw new IOException("No next page found");
|
|
|
|
}
|
|
|
|
sleep(1000);
|
|
|
|
return Http.url(nextURL).get();
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public List<String> getURLsFromPage(Document doc) {
|
|
|
|
List<String> imageURLs = new ArrayList<String>();
|
2014-08-16 07:07:59 +02:00
|
|
|
for (Element thumb : doc.select("#gallery img")) {
|
2014-06-25 04:05:54 +02:00
|
|
|
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
|
|
|
|
continue;
|
2014-02-25 10:28:22 +01:00
|
|
|
}
|
2014-06-25 04:05:54 +02:00
|
|
|
String image = thumb.attr("src");
|
|
|
|
image = image.replaceAll(
|
|
|
|
"http://x.*.fap.to/images/thumb/",
|
|
|
|
"http://fap.to/images/full/");
|
2015-12-19 19:08:30 +01:00
|
|
|
image = image.replaceAll("w[0-9]{1,}-h[0-9]{1,}/", "");
|
2014-06-25 04:05:54 +02:00
|
|
|
imageURLs.add(image);
|
2015-02-06 08:58:17 +01:00
|
|
|
if (isThisATest()) {
|
|
|
|
break;
|
|
|
|
}
|
2014-02-25 10:28:22 +01:00
|
|
|
}
|
2014-06-25 04:05:54 +02:00
|
|
|
return imageURLs;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void downloadURL(URL url, int index) {
|
2015-02-18 05:02:08 +01:00
|
|
|
// Send referrer for image downloads
|
|
|
|
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
|
2014-02-26 08:44:22 +01:00
|
|
|
}
|
2014-02-25 10:28:22 +01:00
|
|
|
|
2014-06-25 04:05:54 +02:00
|
|
|
@Override
|
|
|
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
|
|
|
try {
|
|
|
|
// Attempt to use album title as GID
|
|
|
|
String title = getFirstPage().title();
|
|
|
|
Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
|
|
|
|
Matcher m = p.matcher(title);
|
|
|
|
if (m.matches()) {
|
|
|
|
return getHost() + "_" + m.group(1);
|
|
|
|
}
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Fall back to default album naming convention
|
|
|
|
}
|
|
|
|
return super.getAlbumTitle(url);
|
2014-02-26 08:44:22 +01:00
|
|
|
}
|
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|