| Method from org.apache.nutch.collection.CollectionManager Detail: |
public Subcollection createSubCollection(String id,
String name) {
Subcollection subCol = null;
if (!collectionMap.containsKey(id)) {
subCol = new Subcollection(id, name, getConf());
collectionMap.put(id, subCol);
}
return subCol;
}
Create a new subcollection. |
public void deleteSubCollection(String id) throws IOException {
final Subcollection subCol = getSubColection(id);
if (subCol != null) {
collectionMap.remove(id);
}
}
Delete named subcollection |
public Collection getAll() {
return collectionMap.values();
}
|
public static CollectionManager getCollectionManager(Configuration conf) {
String key = "collectionmanager";
ObjectCache objectCache = ObjectCache.get(conf);
CollectionManager impl = (CollectionManager)objectCache.getObject(key);
if (impl == null) {
try {
if (LOG.isInfoEnabled()) {
LOG.info("Instantiating CollectionManager");
}
impl=new CollectionManager(conf);
objectCache.setObject(key,impl);
} catch (Exception e) {
throw new RuntimeException("Couldn't create CollectionManager",e);
}
}
return impl;
}
|
public Subcollection getSubColection(String id) {
return (Subcollection) collectionMap.get(id);
}
Returns named subcollection |
public String getSubCollections(String url) {
String collections = "";
final Iterator iterator = collectionMap.values().iterator();
while (iterator.hasNext()) {
final Subcollection subCol = (Subcollection) iterator.next();
if (subCol.filter(url) != null) {
collections += " " + subCol.name;
}
}
if (LOG.isTraceEnabled()) { LOG.trace("subcollections:" + collections); }
return collections;
}
Return names of collections url is part of |
protected void init() {
try {
if (LOG.isInfoEnabled()) { LOG.info("initializing CollectionManager"); }
// initialize known subcollections
configfile = getConf().getResource(
getConf().get("subcollections.config", DEFAULT_FILE_NAME));
InputStream input = getConf().getConfResourceAsInputStream(
getConf().get("subcollections.config", DEFAULT_FILE_NAME));
parse(input);
} catch (Exception e) {
if (LOG.isWarnEnabled()) {
LOG.warn("Error occured:" + e);
e.printStackTrace(LogUtil.getWarnStream(LOG));
}
}
}
|
protected void parse(InputStream input) {
Element collections = DomUtil.getDom(input);
if (collections != null) {
NodeList nodeList = collections
.getElementsByTagName(Subcollection.TAG_COLLECTION);
if (LOG.isInfoEnabled()) {
LOG.info("file has" + nodeList.getLength() + " elements");
}
for (int i = 0; i < nodeList.getLength(); i++) {
Element scElem = (Element) nodeList.item(i);
Subcollection subCol = new Subcollection(getConf());
subCol.initialize(scElem);
collectionMap.put(subCol.name, subCol);
}
} else if (LOG.isInfoEnabled()) {
LOG.info("Cannot find collections");
}
}
|
public void save() throws IOException {
try {
final FileOutputStream fos = new FileOutputStream(new File(configfile
.getFile()));
final Document doc = new DocumentImpl();
final Element collections = doc
.createElement(Subcollection.TAG_COLLECTIONS);
final Iterator iterator = collectionMap.values().iterator();
while (iterator.hasNext()) {
final Subcollection subCol = (Subcollection) iterator.next();
final Element collection = doc
.createElement(Subcollection.TAG_COLLECTION);
collections.appendChild(collection);
final Element name = doc.createElement(Subcollection.TAG_NAME);
name.setNodeValue(subCol.getName());
collection.appendChild(name);
final Element whiteList = doc
.createElement(Subcollection.TAG_WHITELIST);
whiteList.setNodeValue(subCol.getWhiteListString());
collection.appendChild(whiteList);
final Element blackList = doc
.createElement(Subcollection.TAG_BLACKLIST);
blackList.setNodeValue(subCol.getBlackListString());
collection.appendChild(blackList);
}
DomUtil.saveDom(fos, collections);
fos.flush();
fos.close();
} catch (FileNotFoundException e) {
throw new IOException(e.toString());
}
}
Save collections into file |