Added VK ripper
This commit is contained in:
parent
e0275949f7
commit
d51ad485f6
2
.gitignore
vendored
2
.gitignore
vendored
@ -4,3 +4,5 @@ ripme.log
|
|||||||
rips/
|
rips/
|
||||||
.history
|
.history
|
||||||
ripme.jar.update
|
ripme.jar.update
|
||||||
|
*.swp
|
||||||
|
ripme.jar
|
||||||
|
2
pom.xml
2
pom.xml
@ -4,7 +4,7 @@
|
|||||||
<groupId>com.rarchives.ripme</groupId>
|
<groupId>com.rarchives.ripme</groupId>
|
||||||
<artifactId>ripme</artifactId>
|
<artifactId>ripme</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<version>1.0.2</version>
|
<version>1.0.3</version>
|
||||||
<name>ripme</name>
|
<name>ripme</name>
|
||||||
<url>http://rip.rarchives.com</url>
|
<url>http://rip.rarchives.com</url>
|
||||||
<properties>
|
<properties>
|
||||||
|
158
src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java
Normal file
158
src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractRipper;
|
||||||
|
|
||||||
|
public class VkRipper extends AbstractRipper {
|
||||||
|
|
||||||
|
private static final String DOMAIN = "vk.com",
|
||||||
|
HOST = "vk";
|
||||||
|
private static final Logger logger = Logger.getLogger(SeeniveRipper.class);
|
||||||
|
|
||||||
|
public VkRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canRip(URL url) {
|
||||||
|
return url.getHost().endsWith(DOMAIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void rip() throws IOException {
|
||||||
|
Map<String,String> photoIDsToURLs = new HashMap<String,String>();
|
||||||
|
int offset = 0;
|
||||||
|
while (true) {
|
||||||
|
logger.info(" Retrieving " + this.url);
|
||||||
|
|
||||||
|
// al=1&offset=80&part=1
|
||||||
|
Map<String,String> postData = new HashMap<String,String>();
|
||||||
|
postData.put("al", "1");
|
||||||
|
postData.put("offset", Integer.toString(offset));
|
||||||
|
postData.put("part", "1");
|
||||||
|
Document doc = Jsoup.connect(this.url.toExternalForm())
|
||||||
|
.header("Referer", this.url.toExternalForm())
|
||||||
|
.ignoreContentType(true)
|
||||||
|
.userAgent(USER_AGENT)
|
||||||
|
.timeout(5000)
|
||||||
|
.data(postData)
|
||||||
|
.post();
|
||||||
|
|
||||||
|
String body = doc.toString();
|
||||||
|
if (!body.contains("<div")) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
body = body.substring(body.indexOf("<div"));
|
||||||
|
doc = Jsoup.parseBodyFragment(body);
|
||||||
|
List<Element> elements = doc.select("a");
|
||||||
|
Set<String> photoIDsToGet = new HashSet<String>();
|
||||||
|
for (Element a : elements) {
|
||||||
|
if (!a.attr("onclick").contains("showPhoto('")) {
|
||||||
|
logger.error("a: " + a);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String photoID = a.attr("onclick");
|
||||||
|
photoID = photoID.substring(photoID.indexOf("showPhoto('") + "showPhoto('".length());
|
||||||
|
photoID = photoID.substring(0, photoID.indexOf("'"));
|
||||||
|
if (!photoIDsToGet.contains(photoID)) {
|
||||||
|
photoIDsToGet.add(photoID);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (String photoID : photoIDsToGet) {
|
||||||
|
if (!photoIDsToURLs.containsKey(photoID)) {
|
||||||
|
try {
|
||||||
|
photoIDsToURLs.putAll(getPhotoIDsToURLs(photoID));
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Exception while retrieving photo id " + photoID, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!photoIDsToURLs.containsKey(photoID)) {
|
||||||
|
logger.error("Could not find URL for photo ID: " + photoID);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String url = photoIDsToURLs.get(photoID);
|
||||||
|
addURLToDownload(new URL(url));
|
||||||
|
}
|
||||||
|
logger.info("Received " + elements.size() + " elements");
|
||||||
|
if (elements.size() < 40) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
offset += elements.size();
|
||||||
|
}
|
||||||
|
waitForThreads();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String,String> getPhotoIDsToURLs(String photoID) throws IOException {
|
||||||
|
Map<String,String> photoIDsToURLs = new HashMap<String,String>();
|
||||||
|
Map<String,String> postData = new HashMap<String,String>();
|
||||||
|
// act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196
|
||||||
|
postData.put("list", getGID(this.url));
|
||||||
|
postData.put("act", "show");
|
||||||
|
postData.put("al", "1");
|
||||||
|
postData.put("module", "photos");
|
||||||
|
postData.put("photo", photoID);
|
||||||
|
Document doc = Jsoup
|
||||||
|
.connect("https://vk.com/al_photos.php")
|
||||||
|
.header("Referer", this.url.toExternalForm())
|
||||||
|
.ignoreContentType(true)
|
||||||
|
.userAgent(USER_AGENT)
|
||||||
|
.timeout(5000)
|
||||||
|
.data(postData)
|
||||||
|
.post();
|
||||||
|
String jsonString = doc.toString();
|
||||||
|
jsonString = jsonString.substring(jsonString.indexOf("<!json>") + "<!json>".length());
|
||||||
|
jsonString = jsonString.substring(0, jsonString.indexOf("<!>"));
|
||||||
|
JSONArray json = new JSONArray(jsonString);
|
||||||
|
for (int i = 0; i < json.length(); i++) {
|
||||||
|
JSONObject jsonImage = json.getJSONObject(i);
|
||||||
|
for (String key : new String[] {"z_src", "y_src", "x_src"}) {
|
||||||
|
if (!jsonImage.has(key)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
photoIDsToURLs.put(jsonImage.getString("id"), jsonImage.getString(key));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return photoIDsToURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return HOST;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album)([a-zA-Z0-9_]{1,}).*$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (!m.matches()) {
|
||||||
|
throw new MalformedURLException("Expected format: http://vk.com/album#### or vk.com/photos####");
|
||||||
|
}
|
||||||
|
int count = m.groupCount();
|
||||||
|
return m.group(count - 1) + m.group(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
package com.rarchives.ripme.tst.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.rippers.VkRipper;
|
||||||
|
|
||||||
|
public class VkRipperTest extends RippersTest {
|
||||||
|
|
||||||
|
public void testVkAlbum() throws IOException {
|
||||||
|
if (!DOWNLOAD_CONTENT) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
List<URL> contentURLs = new ArrayList<URL>();
|
||||||
|
contentURLs.add(new URL("https://vk.com/album45506334_172415053"));
|
||||||
|
//contentURLs.add(new URL("https://vk.com/album45506334_0"));
|
||||||
|
//contentURLs.add(new URL("https://vk.com/photos45506334"));
|
||||||
|
for (URL url : contentURLs) {
|
||||||
|
try {
|
||||||
|
VkRipper ripper = new VkRipper(url);
|
||||||
|
ripper.rip();
|
||||||
|
assert(ripper.getWorkingDir().listFiles().length > 1);
|
||||||
|
deleteDir(ripper.getWorkingDir());
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail("Error while ripping URL " + url + ": " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user