| Method from org.apache.nutch.crawl.CrawlDatum Detail: |
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
|
public int compareTo(CrawlDatum that) {
if (that.score != this.score)
return (that.score - this.score) > 0 ? 1 : -1;
if (that.status != this.status)
return this.status - that.status;
if (that.fetchTime != this.fetchTime)
return (that.fetchTime - this.fetchTime) > 0 ? 1 : -1;
if (that.retries != this.retries)
return that.retries - this.retries;
if (that.fetchInterval != this.fetchInterval)
return (that.fetchInterval - this.fetchInterval) > 0 ? 1 : -1;
if (that.modifiedTime != this.modifiedTime)
return (that.modifiedTime - this.modifiedTime) > 0 ? 1 : -1;
return SignatureComparator._compare(this, that);
}
Sort by decreasing score. |
public boolean equals(Object o) {
if (!(o instanceof CrawlDatum))
return false;
CrawlDatum other = (CrawlDatum)o;
boolean res =
(this.status == other.status) &&
(this.fetchTime == other.fetchTime) &&
(this.modifiedTime == other.modifiedTime) &&
(this.retries == other.retries) &&
(this.fetchInterval == other.fetchInterval) &&
(SignatureComparator._compare(this.signature, other.signature) == 0) &&
(this.score == other.score);
if (!res) return res;
return metadataEquals(other.metaData);
}
|
public int getFetchInterval() {
return fetchInterval;
}
|
public long getFetchTime() {
return fetchTime;
}
Returns either the time of the last fetch, or the next fetch time,
depending on whether Fetcher or CrawlDbReducer set the time. |
public MapWritable getMetaData() {
if (this.metaData == null) this.metaData = new org.apache.hadoop.io.MapWritable();
return this.metaData;
}
returns a MapWritable if it was set or read in @see readFields(DataInput),
returns empty map in case CrawlDatum was freshly created (lazily instantiated). |
public long getModifiedTime() {
return modifiedTime;
}
|
public byte getRetriesSinceFetch() {
return retries;
}
|
public float getScore() {
return score;
}
|
public byte[] getSignature() {
return signature;
}
|
public byte getStatus() {
return status;
}
|
public static String getStatusName(byte value) {
String res = statNames.get(value);
if (res == null) res = "unknown";
return res;
}
|
public static boolean hasDbStatus(CrawlDatum datum) {
if (datum.status < = STATUS_DB_MAX) return true;
return false;
}
|
public static boolean hasFetchStatus(CrawlDatum datum) {
if (datum.status > STATUS_DB_MAX && datum.status < = STATUS_FETCH_MAX) return true;
return false;
}
|
public int hashCode() {
int res = 0;
if (signature != null) {
for (int i = 0; i < signature.length / 4; i += 4) {
res ^= (int)(signature[i] < < 24 + signature[i+1] < < 16 +
signature[i+2] < < 8 + signature[i+3]);
}
}
res ^= metaData.entrySet().hashCode();
return
res ^ status ^
((int)fetchTime) ^
((int)modifiedTime) ^
retries ^
fetchInterval ^
Float.floatToIntBits(score);
}
|
public void putAllMetaData(CrawlDatum other) {
for (Entry< Writable, Writable > e : other.getMetaData().entrySet()) {
metaData.put(e.getKey(), e.getValue());
}
}
Add all metadata from other CrawlDatum to this CrawlDatum. |
public static CrawlDatum read(DataInput in) throws IOException {
CrawlDatum result = new CrawlDatum();
result.readFields(in);
return result;
}
|
public void readFields(DataInput in) throws IOException {
byte version = in.readByte(); // read version
if (version > CUR_VERSION) // check version
throw new VersionMismatchException(CUR_VERSION, version);
status = in.readByte();
fetchTime = in.readLong();
retries = in.readByte();
if (version > 5) {
fetchInterval = in.readInt();
} else fetchInterval = Math.round(in.readFloat());
score = in.readFloat();
if (version > 2) {
modifiedTime = in.readLong();
int cnt = in.readByte();
if (cnt > 0) {
signature = new byte[cnt];
in.readFully(signature);
} else signature = null;
}
metaData = new org.apache.hadoop.io.MapWritable();
if (version > 3) {
if (version < 7) {
MapWritable oldMetaData = new MapWritable();
if (in.readBoolean()) {
oldMetaData.readFields(in);
}
for (Writable key : oldMetaData.keySet()) {
metaData.put(key, oldMetaData.get(key));
}
} else {
if (in.readBoolean()) {
metaData.readFields(in);
}
}
}
// translate status codes
if (version < 5) {
if (oldToNew.containsKey(status))
status = oldToNew.get(status);
else
status = STATUS_DB_UNFETCHED;
}
}
|
public void set(CrawlDatum that) {
this.status = that.status;
this.fetchTime = that.fetchTime;
this.retries = that.retries;
this.fetchInterval = that.fetchInterval;
this.score = that.score;
this.modifiedTime = that.modifiedTime;
this.signature = that.signature;
this.metaData = new org.apache.hadoop.io.MapWritable(that.metaData); // make a deep copy
}
Copy the contents of another instance into this instance. |
public void setFetchInterval(int fetchInterval) {
this.fetchInterval = fetchInterval;
}
|
public void setFetchInterval(float fetchInterval) {
this.fetchInterval = Math.round(fetchInterval);
}
|
public void setFetchTime(long fetchTime) {
this.fetchTime = fetchTime;
}
Sets either the time of the last fetch or the next fetch time,
depending on whether Fetcher or CrawlDbReducer set the time. |
public void setMetaData(MapWritable mapWritable) {
this.metaData = new org.apache.hadoop.io.MapWritable(mapWritable);
}
|
public void setModifiedTime(long modifiedTime) {
this.modifiedTime = modifiedTime;
}
|
public void setRetriesSinceFetch(int retries) {
this.retries = (byte)retries;
}
|
public void setScore(float score) {
this.score = score;
}
|
public void setSignature(byte[] signature) {
if (signature != null && signature.length > 256)
throw new RuntimeException("Max signature length (256) exceeded: " + signature.length);
this.signature = signature;
}
|
public void setStatus(int status) {
this.status = (byte)status;
}
|
public String toString() {
// register this comparator
WritableComparator.define(CrawlDatum.class, new Comparator());
StringBuilder buf = new StringBuilder();
buf.append("Version: " + CUR_VERSION + "\n");
buf.append("Status: " + getStatus() + " (" + getStatusName(getStatus()) + ")\n");
buf.append("Fetch time: " + new Date(getFetchTime()) + "\n");
buf.append("Modified time: " + new Date(getModifiedTime()) + "\n");
buf.append("Retries since fetch: " + getRetriesSinceFetch() + "\n");
buf.append("Retry interval: " + getFetchInterval() + " seconds (" +
(getFetchInterval() / FetchSchedule.SECONDS_PER_DAY) + " days)\n");
buf.append("Score: " + getScore() + "\n");
buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n");
buf.append("Metadata: ");
for (Entry< Writable, Writable > e : metaData.entrySet()) {
buf.append(e.getKey());
buf.append(": ");
buf.append(e.getValue());
}
buf.append('\n");
return buf.toString();
}
|
public void write(DataOutput out) throws IOException {
out.writeByte(CUR_VERSION); // store current version
out.writeByte(status);
out.writeLong(fetchTime);
out.writeByte(retries);
out.writeInt(fetchInterval);
out.writeFloat(score);
out.writeLong(modifiedTime);
if (signature == null) {
out.writeByte(0);
} else {
out.writeByte(signature.length);
out.write(signature);
}
if (metaData.size() > 0) {
out.writeBoolean(true);
metaData.write(out);
} else {
out.writeBoolean(false);
}
}
|