Source code: com/opencms/linkmanagement/LinkChecker.java
1 /*
2 * File : $Source: /usr/local/cvs/opencms/src/com/opencms/linkmanagement/LinkChecker.java,v $
3 * Date : $Date: 2003/04/01 15:20:18 $
4 * Version: $Revision: 1.6 $
5 *
6 * This library is part of OpenCms -
7 * the Open Source Content Mananagement System
8 *
9 * Copyright (C) 2001 The OpenCms Group
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * For further information about OpenCms, please see the
22 * OpenCms Website: http://www.opencms.org
23 *
24 * You should have received a copy of the GNU Lesser General Public
25 * License along with this library; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 */
28 package com.opencms.linkmanagement;
29
30 import java.util.*;
31 import com.opencms.file.*;
32 import com.opencms.core.*;
33 import com.opencms.template.*;
34 import com.opencms.report.*;
35
36 /**
37 * Extracts all links (anchors) out of a OpenCms page.
38 *
39 * @author Hanjo Riege
40 * @version 1.0
41 */
42
43 public class LinkChecker {
44
45 public LinkChecker() {
46 }
47
48 /**
49 * Checks the content of the page and extracts all links that are on this page.
50 * Used by the linkmanagement to save the links in the database.
51 *
52 * @param cms The CmsObject.
53 * @param page The name(getAbsolutePath) of the page
54 * @return The compleate CmsPageLinks object for this page.
55 */
56 public CmsPageLinks extractLinks(CmsObject cms, String page) throws CmsException{
57
58 // first lets get the prefix of the page name (we need it later)
59 String rootName = cms.readFileHeader(page).getRootName();
60 // get the pages content
61 String bodyFileName = null;
62 String bodyClassName = null;
63 CmsXmlControlFile pageControlFile = new CmsXmlControlFile(cms, page);
64 if(pageControlFile.isElementTemplateDefined(I_CmsConstants.C_TYPE_BODY_NAME)){
65 bodyFileName = pageControlFile.getElementTemplate(I_CmsConstants.C_TYPE_BODY_NAME);
66 }
67 if(pageControlFile.isElementClassDefined(I_CmsConstants.C_TYPE_BODY_NAME)){
68 bodyClassName = pageControlFile.getElementClass(I_CmsConstants.C_TYPE_BODY_NAME);
69 }
70 CmsXmlTemplate bodyClassObject = (CmsXmlTemplate)CmsTemplateClassManager.getClassInstance(cms, bodyClassName);
71 CmsXmlTemplateFile bodyTemplateFile = bodyClassObject.getOwnTemplateFile(cms,
72 bodyFileName, I_CmsConstants.C_TYPE_BODY_NAME, null, null);
73 Vector result = bodyTemplateFile.getAllLinkTagValues();
74 // we have to cleanup the result. Relative links will be inserted as absolute links
75 // and we cut the parameters.
76 Vector cleanResult = new Vector(result.size());
77 for(int i=0; i<result.size(); i++){
78 String work = (String)result.elementAt(i);
79 // first the parameters
80 int paraStart = work.indexOf("?");
81 if(paraStart >= 0){
82 work = work.substring(0, paraStart);
83 }
84 // dont forget the anker links
85 int ankerStart = work.indexOf("#");
86 if(ankerStart >= 0){
87 work = work.substring(0, ankerStart);
88 }
89 // here is something for the future: if link starts with /// it is the full name of the resource
90 if(work.startsWith("///")){
91 work = work.substring(2);
92 }else{
93 // now for relative links
94 work = com.opencms.util.Utils.mergeAbsolutePath(page, work);
95 // now we need the site prefix (lets take it from the page itself)
96 work = rootName +work;
97 }
98 cleanResult.add(work);
99 }
100 return new CmsPageLinks(cms.readFileHeader(page).getResourceId(), cleanResult);
101 }
102
103 /**
104 * This Method checks if the online project has broken links when the project with
105 * the projectId is published (if projectId = onlineProjectId it simply checks the
106 * online project).
107 *
108 * @param cms The CmsObject.
109 * @param projectId The id of the project to be published.
110 * @param report A cmsReport object for logging while the method is still running.
111 *
112 * The report is filled with a CmsPageLinks object for each page containing broken links
113 * this CmsPageLinks object contains all links on the page withouth a valid target.
114 */
115 public void checkProject(CmsObject cms, int projectId, I_CmsReport report)throws CmsException{
116 if(projectId == CmsObject.C_PROJECT_ONLINE_ID){
117 // lets check only the online project
118 Vector result = cms.getOnlineBrokenLinks();
119 for(int i=0; i<result.size(); i++){
120 report.println((CmsPageLinks)result.elementAt(i));
121 }
122 }else{
123 // we are in a project. First get the changed, new ,deleted resources
124 Vector deleted = cms.readProjectView(projectId, "deleted");
125 Vector changed = cms.readProjectView(projectId, "changed");
126 Vector newRes = cms.readProjectView(projectId, "new");
127 cms.getBrokenLinks(projectId, report, changed, deleted, newRes);
128 }
129 }
130 }