He estado trasteando una forma de simular un navegador en Java. Hasta ahora he usado JMeter, que es tremendamente potente, configurable y para pruebas de carga es imprescindible. No obstante hay dos detalles que no me convencen: a veces uno quiere algo programático en lugar de declarativo, y segundo el JMeter es durillo de entender y configurar. Además, no siempre es necesario tener métricas exóticas o peticiones de Ajax, a veces sólo queremos acceder a algún servicio web o analizar una web para bajar ficheros o automatizar tareas.
Aunque hay un montón de soluciones disponibles, me he propuesto hacer un pequeño simulador de navegador (un webcrawler) en Java, que permita fácilmente y de forma sencilla implementar tareas. Le he puesto a la criatura Hoverkraft. Dejo por aquí el código fuente.
package net.krusher.hoverkraft; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.Serializable; import java.net.HttpCookie; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Scanner; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; /** * Hoverkraft - Das Web Boot * @author Axelei * */ public class Hoverkraft implements Serializable { /** * */ private static final long serialVersionUID = -4846381367781986634L; public static final String USER_AGENT = " Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36"; public static final String ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; public static final String ACCEPT_LANGUAGE = "es,en-US;q=0.8,en;q=0.6"; public static final int MAX_TRIES = 5; private static final String LINE_FEED = "\r\n"; public enum Method { GET, POST } private URL url; private HttpURLConnection connection; private String content; private int code = -1; private Method method; private String referer; private Map<String, String> postVars = new HashMap<String, String>(); private Map<String, HttpCookie> cookies = new HashMap<String, HttpCookie>(); private Map<String, File> uploads = new HashMap<String, File>(); public Document getXml() { return Jsoup.parse(content); } public Hoverkraft() { super(); } /** * Set sail to a destination * @param url * @throws MalformedURLException */ public void go(String url, Method method) throws MalformedURLException { this.url = new URL(url); this.method = method; } public void go(String url) throws MalformedURLException { go(url, Method.GET); } public void disconnect() { connection.disconnect(); } public void setPostVars(Map<String, String> vars) { this.postVars = vars; } public void setUploads(Map<String, File> uploads) { this.uploads = uploads; } /** * Executes the web petition * @throws IOException */ public void execute() throws IOException { boolean redirect = false; int tries = 0; do { connection = (HttpURLConnection) url.openConnection(); setProperties(connection); connection.connect(); code = connection.getResponseCode(); // Redirecciones if (code != HttpURLConnection.HTTP_OK) { if (code == HttpURLConnection.HTTP_MOVED_TEMP || code == HttpURLConnection.HTTP_MOVED_PERM || code == HttpURLConnection.HTTP_SEE_OTHER ) redirect = true; } if (redirect) { go(connection.getHeaderField("Location"), method); } } while (redirect == true && tries++ < MAX_TRIES); InputStream is = (InputStream) connection.getContent(); content = stream2string(is); referer = url.toString(); Map<String, List<String>> headers = connection.getHeaderFields(); /** * Obtener cookies */ if (headers.containsKey("Set-Cookie")) { List<String> cookiesObtenidas = headers.get("Set-Cookie"); for (String cookie : cookiesObtenidas) { List<HttpCookie> cookiesParseadas = HttpCookie.parse(cookie); for (HttpCookie cookieParseada : cookiesParseadas) { if (cookies.containsKey(cookieParseada.getName())) { cookies.remove(cookieParseada.getName()); } cookies.put(cookieParseada.getName(), cookieParseada); } } } } private void setProperties(HttpURLConnection connection) throws IOException { // Cabeceras connection.setRequestProperty("user-agent", USER_AGENT); connection.setRequestProperty("accept", ACCEPT); connection.setRequestProperty("accept-language", ACCEPT_LANGUAGE); if (referer != null) { connection.setRequestProperty("referer", referer); } connection.setRequestMethod(method.toString()); // Cookies for (HttpCookie cookie : cookies.values()) { connection.setRequestProperty("Cookie", cookie.toString()); } connection.setDoOutput(false); // Variables Post y demás if (method == Method.POST && !postVars.isEmpty() && uploads.isEmpty()) { connection.setDoOutput(true); StringBuffer urlParameters = new StringBuffer(); for (Entry<String, String> var : postVars.entrySet()) { urlParameters.append(URLEncoder.encode(var.getKey(), "UTF-8") + "=" + URLEncoder.encode(var.getValue(), "UTF-8") + "&"); } if (urlParameters.charAt(urlParameters.length() - 1) == '&') { urlParameters.deleteCharAt(urlParameters.length() - 1); } postVars.clear(); connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); connection.setRequestProperty("charset", "utf-8"); connection.setRequestProperty("Content-Length", Integer.toString(urlParameters.toString().getBytes().length)); DataOutputStream wr = new DataOutputStream(connection.getOutputStream()); wr.writeBytes(urlParameters.toString()); wr.flush(); wr.close(); } if (method == Method.POST && !uploads.isEmpty()) { String boundary = "===" + System.currentTimeMillis() + "==="; connection.setUseCaches(false); connection.setDoOutput(true); connection.setDoInput(true); connection.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary); connection.setRequestProperty("charset", "UTF-8"); OutputStream outputStream = connection.getOutputStream(); PrintWriter writer = new PrintWriter(new OutputStreamWriter(outputStream, "UTF-8"), true); for (Entry<String, String> var : postVars.entrySet()) { writer.append("--" + boundary).append(LINE_FEED); writer.append("Content-Disposition: form-data; name=\"" + var.getKey() + "\"").append(LINE_FEED); writer.append("Content-Type: text/plain; charset=UTF-8").append(LINE_FEED); writer.append(LINE_FEED); writer.append(var.getValue()).append(LINE_FEED); writer.flush(); } for (Entry<String, File> fichero : uploads.entrySet()) { String fileName = fichero.getValue().getName(); writer.append("--" + boundary).append(LINE_FEED); writer.append("Content-Disposition: form-data; name=\"" + fichero.getKey() + "\"; filename=\"" + fileName + "\"").append(LINE_FEED); writer.append("Content-Type: " + URLConnection.guessContentTypeFromName(fileName)).append(LINE_FEED); writer.append("Content-Transfer-Encoding: binary").append(LINE_FEED); writer.append(LINE_FEED); writer.flush(); FileInputStream inputStream = new FileInputStream(fichero.getValue()); byte[] buffer = new byte[4096]; int bytesRead = -1; while ((bytesRead = inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, bytesRead); } outputStream.flush(); inputStream.close(); writer.append(LINE_FEED); writer.flush(); } writer.append(LINE_FEED).flush(); writer.append("--" + boundary + "--").append(LINE_FEED); writer.close(); postVars.clear(); uploads.clear(); } } /** * Resets the browser */ public void reset() { url = null; connection = null; content = null; code = -1; referer = null; postVars.clear(); cookies.clear(); uploads.clear(); } /** * Get contents of last execution * @return */ public String getContent() { return content; } /** * Gets HTTP code of last execution * @return */ public int getCode() { return code; } private static String stream2string(InputStream is) { String salida = ""; Scanner scanner = new Scanner(is); scanner.useDelimiter("\\A"); while (scanner.hasNext()) { salida += scanner.next(); } scanner.close(); return salida; } }
Este código depende de la librería jsoup, que naturalmente está disponible como software libre por ahí y funciona desde Maven perfectamente.
Por supuesto estaría encantado de poder leer cualquier mejora o crítica. ¡Comenta, comenta!