|
|
|
|
|
|
Java Coding Part One: Fetching URLs
If you are familiar with C, you will quickly recognize Java. When a Java application is executed, the Java virtual machine looks for the funcion main, just as in C. Main calls an initialization function, and then calls a function which fetches a URL and returns the text in a string. C programmers will notice that strings are handled with much less hassle in Java. Note that we have provided a way to get through a proxy server. In part two we will add authentication for password-protected sites, and for proxy servers that require it. We will also add an object to contain much of this information that we can pass more easily. The code for this file can also be downloaded on the last page of this article.
Main
|
static String proxyServer=192.168.34.44; // put the IP or name of your proxy server here
static int proxyPort=8080; // put in the port number on which your proxy server expects connections
// make this false if you dont have a proxy server between you and the Internet
boolean useProxy=true;
public static void main(String[ ] args)
throws IOException, UnknownHostException {
init();
static String someURL=new String (http://www.warnertechnology.com);
// for now just spit the output to the terminal window
System.out.println(myGetURL(someURL));
}
} // end main |
Initialization Routine
|
static public void init() {
try {
// these lines essentially tell the Java VM where to look when it sees
// the https protocol, since it isnt part of standard Java
Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider());
System.getProperties().put("java.protocol.handler.pkgs",
"com.sun.net.ssl.internal.www.protocol");
}
// catch (ClassNotFoundException e) {
catch (Exception E) {
System.out.println("Unable to a load driver during init phase.");
System.out.println(E);
E.printStackTrace();
}
} // init |
myGetURL
|
public static String myGetURL(String theURL)
throws UnknownHostException, IOException {
int result=0, fetched=0;
URL destination=new URL(theURL);
char[] cbuf=new char[65000];
try {
// are we using a proxy server? If so, Java provides a method for using it
if (useProxy) {
System.getProperties().put("proxySet", "true");
System.getProperties().put("ftpProxySet", "true");
// the next two lines should say just "proxyHost" and "proxyPort" for
// use with JSSE instead of "http.proxyHost" and "http.proxyPort," respectively.
System.getProperties().put("http.proxyHost", proxyServer);
System.getProperties().put("http.proxyPort", proxyPort);
System.getProperties().put("https.proxyHost", proxyServer);
System.getProperties().put("https.proxyPort", proxyPort);
System.getProperties().put("ftpProxyHost", proxyServer);
System.getProperties().put("ftpProxyPort", proxyPort);
} // fi use proxy
// this section for https is obsolete with Sun's JSSE, but JSSE only works with JDK 1.2!
// It will also be unnecessary if we can get the OpenSSL Wrapper and HTTPClient to play
// nicely.
if (destination.getProtocol().equalsIgnoreCase("https")) {
try {
if (destination.getPort()==-1) {
port=443;
}
else {
port=destination.getPort();
}
// this next part is necessary since we are handling this protocol "by hand."
if (useProxy) {
InetAddress theAddress= InetAddress.getByName(proxyServer);
s =new SSLSocket(theAddress,
(java.lang.Integer.decode(proxyPort).intValue()), null, 0, "CONNECT " +
destination.getHost() + ":" + port + " HTTP/1.0\r\n\r\n");
}
else {
s = new SSLSocket(destination.getHost(), port);
}
s.setSoLinger(true, 30);
OutputStream dos = s.getOutputStream();
InputStream is = s.getInputStream();
InputStreamReader dinsr=new InputStreamReader(is);
BufferedReader in=new BufferedReader(dinsr);
String themsg=new String("GET / HTTP/1.0\r\n\r\n");
dos.write(themsg.getBytes());
dos.flush();
while ((fetched!=-1) && (result<65000)) {
fetched=in.read(cbuf,result,65000-result);
result+=fetched;
} // while
in.close();
s.close();
} // try
catch (Exception e) {
System.out.println("\nUh oh! "+e);
e.printStackTrace();
}
//return null;
} // if https
else { // not https
// let Java handle all the negotiation with the HTTP site
URLConnection conn = destination.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
// due to the nature of sockets, you probably wont get all the text back with one call
// so we continue to call read until we get -1, which means were done,
// or until we run out of space in our array of characters
while ((fetched!=-1) && (result<65000)) {
fetched=in.read(cbuf,result,65000-result);
result+=fetched;
} // while
in.close();
} // else not https
} // try
catch (UnknownHostException e) {
System.err.println("Don't know about host: "+e);
}
catch (IOException e) {
System.err.println("Couldn't get I/O for the connection: "+e);
}
catch (Exception e) {
System.err.println(e);
}
finally {
// convert the array of characters to a String
// being sure to convert only the characters that have
// data, not the entire 65,000 character array
String theInfo=new String(cbuf,0,result);
return theInfo;
}
} // end myGetURL |
Javas URL classes takes care of a lot of the unnecessary detail involved with connecting to a URL. You can perform the same task on a lower level in Java by using Sockets and sending HTTP requests, but you shouldnt generally need to.
Previous Section Next Section
|