package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AbstractRipper; public class VkRipper extends AbstractRipper { private static final String DOMAIN = "vk.com", HOST = "vk"; private static final Logger logger = Logger.getLogger(SeeniveRipper.class); public VkRipper(URL url) throws IOException { super(url); } @Override public boolean canRip(URL url) { return url.getHost().endsWith(DOMAIN); } @Override public URL sanitizeURL(URL url) throws MalformedURLException { return url; } @Override public void rip() throws IOException { Map photoIDsToURLs = new HashMap(); int offset = 0; while (true) { logger.info(" Retrieving " + this.url); // al=1&offset=80&part=1 Map postData = new HashMap(); postData.put("al", "1"); postData.put("offset", Integer.toString(offset)); postData.put("part", "1"); Document doc = Jsoup.connect(this.url.toExternalForm()) .header("Referer", this.url.toExternalForm()) .ignoreContentType(true) .userAgent(USER_AGENT) .timeout(5000) .data(postData) .post(); String body = doc.toString(); if (!body.contains(" elements = doc.select("a"); Set photoIDsToGet = new HashSet(); for (Element a : elements) { if (!a.attr("onclick").contains("showPhoto('")) { logger.error("a: " + a); continue; } String photoID = a.attr("onclick"); photoID = photoID.substring(photoID.indexOf("showPhoto('") + "showPhoto('".length()); photoID = photoID.substring(0, photoID.indexOf("'")); if (!photoIDsToGet.contains(photoID)) { photoIDsToGet.add(photoID); } } for (String photoID : photoIDsToGet) { if (!photoIDsToURLs.containsKey(photoID)) { try { photoIDsToURLs.putAll(getPhotoIDsToURLs(photoID)); } catch (IOException e) { logger.error("Exception while retrieving photo id " + photoID, e); continue; } } if (!photoIDsToURLs.containsKey(photoID)) { logger.error("Could not find URL for photo ID: " + photoID); continue; } String url = photoIDsToURLs.get(photoID); addURLToDownload(new URL(url)); } logger.info("Received " + elements.size() + " elements"); if (elements.size() < 40) { break; } offset += elements.size(); } waitForThreads(); } private Map getPhotoIDsToURLs(String photoID) throws IOException { Map photoIDsToURLs = new HashMap(); Map postData = new HashMap(); // act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196 postData.put("list", getGID(this.url)); postData.put("act", "show"); postData.put("al", "1"); postData.put("module", "photos"); postData.put("photo", photoID); Document doc = Jsoup .connect("https://vk.com/al_photos.php") .header("Referer", this.url.toExternalForm()) .ignoreContentType(true) .userAgent(USER_AGENT) .timeout(5000) .data(postData) .post(); String jsonString = doc.toString(); jsonString = jsonString.substring(jsonString.indexOf("") + "".length()); jsonString = jsonString.substring(0, jsonString.indexOf("")); JSONArray json = new JSONArray(jsonString); for (int i = 0; i < json.length(); i++) { JSONObject jsonImage = json.getJSONObject(i); for (String key : new String[] {"z_src", "y_src", "x_src"}) { if (!jsonImage.has(key)) { continue; } photoIDsToURLs.put(jsonImage.getString("id"), jsonImage.getString(key)); break; } } return photoIDsToURLs; } @Override public String getHost() { return HOST; } @Override public String getGID(URL url) throws MalformedURLException { Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album)([a-zA-Z0-9_]{1,}).*$"); Matcher m = p.matcher(url.toExternalForm()); if (!m.matches()) { throw new MalformedURLException("Expected format: http://vk.com/album#### or vk.com/photos####"); } int count = m.groupCount(); return m.group(count - 1) + m.group(count); } }