|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.niocchi.core.Crawler
public class Crawler
Field Summary | |
---|---|
int |
address_total_time
|
int |
connection_total_time
|
int |
incomplete_count
|
int |
internal_error_count
|
int |
processed_count
|
int |
read_total_time
|
int |
redirected_count
|
int |
select_total_time
|
long |
start_time
|
int |
status_200
|
int |
status_other
|
int |
timeout_count
|
int |
write_total_time
|
Constructor Summary | |
---|---|
Crawler(ResourceFactoryInt res_factory_,
int max_channels_)
Create a new Crawler instance. |
Method Summary | |
---|---|
int |
getConnectionTimeout()
Return the current connection timeout. |
int |
getReadTimeout()
Return the current read timeout. |
RedirectionController |
getRedirectionController()
Return the current redirection filter that the crawler is using, Null if there isn't a redirection filter. |
int |
getSelectTimeout()
Returns the current select timeout. |
java.lang.String |
getUserAgent()
Returns the user agent. |
void |
interruptCrawling()
Interrupts the crawling in a clean and relative imediate way. |
void |
printMonitoredState(java.io.PrintStream out_)
write some crawl statistics. |
void |
run(URLPool url_pool_)
Start the crawl. |
void |
setAllowCompression(boolean allowCompresion)
Set content compression on/off. |
void |
setConnectionTimeout(int timeout_)
Set the connection timeout. |
void |
setNegativeResolutionTTL(int ttl_)
|
void |
setReadTimeout(int timeout_)
Set the read (data reception) timeout. |
void |
setRedirectionController(RedirectionController controller_)
Sets the new RedirectionController. |
void |
setSelectTimeout(int timeout_)
Set the timeout for the selection of ready channels. |
void |
setTimeout(int timeout_)
Set the connection timeout and the read (data reception) timeout. |
void |
setUserAgent(java.lang.String ua_)
Set the user agent. |
void |
setVerbose()
|
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public long start_time
public int processed_count
public int status_200
public int status_other
public int redirected_count
public int incomplete_count
public int internal_error_count
public int connection_total_time
public int read_total_time
public int write_total_time
public int select_total_time
public int address_total_time
public int timeout_count
Constructor Detail |
---|
public Crawler(ResourceFactoryInt res_factory_, int max_channels_) throws java.io.IOException
res_factory_
- the resources factorymax_channels_
- the maximum number of used channels (therefore of used resources).
java.io.IOException
Method Detail |
---|
public void setNegativeResolutionTTL(int ttl_)
public void setVerbose()
public void setUserAgent(java.lang.String ua_)
ua_
- the user agent.public java.lang.String getUserAgent()
public void setAllowCompression(boolean allowCompresion)
allowCompresion
- public void run(URLPool url_pool_) throws java.io.IOException
url_pool_
-
java.io.IOException
public RedirectionController getRedirectionController()
HostRedirectionController
.
HostRedirectionController
to configure the redirections of the crawler, or you can
implement your own RedirectionController
and use the
method setRedirectionController(RedirectionController)
to
give your own policy. It's recommend that you includes in your
own policy the old one by checking the method RedirectionController#filter(Query, URL)
of the old redirectionFilter before your code of filtering.
setRedirectionController(RedirectionController)
public void setRedirectionController(RedirectionController controller_)
getRedirectionController()
public void interruptCrawling()
public void setTimeout(int timeout_)
timeout_
- the time in millisecond.public void setSelectTimeout(int timeout_)
timeout_
- the time in millisecond.public void setConnectionTimeout(int timeout_)
timeout_
- the time in millisecond.public void setReadTimeout(int timeout_)
timeout_
- the time in millisecond.public int getSelectTimeout()
public int getConnectionTimeout()
public int getReadTimeout()
public void printMonitoredState(java.io.PrintStream out_)
out_
-
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |