View Javadoc

1   /* Copyright (2006-2007) Schibsted Søk AS
2    * This file is part of SESAT.
3    *
4    *   SESAT is free software: you can redistribute it and/or modify
5    *   it under the terms of the GNU Affero General Public License as published by
6    *   the Free Software Foundation, either version 3 of the License, or
7    *   (at your option) any later version.
8    *
9    *   SESAT is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   *   GNU Affero General Public License for more details.
13   *
14   *   You should have received a copy of the GNU Affero General Public License
15   *   along with SESAT.  If not, see <http://www.gnu.org/licenses/>.
16  
17   * SyndicationGenerator.java
18   *
19   * Created on June 7, 2006, 2:39 PM
20   */
21  
22  package no.sesat.search.view.output;
23  
24  import com.sun.syndication.feed.synd.SyndContent;
25  import com.sun.syndication.feed.synd.SyndContentImpl;
26  import com.sun.syndication.feed.synd.SyndEnclosure;
27  import com.sun.syndication.feed.synd.SyndEnclosureImpl;
28  import com.sun.syndication.feed.synd.SyndEntry;
29  import com.sun.syndication.feed.synd.SyndEntryImpl;
30  import com.sun.syndication.feed.synd.SyndFeed;
31  import com.sun.syndication.feed.synd.SyndFeedImpl;
32  import com.sun.syndication.io.FeedException;
33  import com.sun.syndication.io.SyndFeedOutput;
34  import no.sesat.search.InfrastructureException;
35  import no.sesat.search.datamodel.DataModelContext;
36  import no.sesat.search.datamodel.generic.StringDataObject;
37  import no.sesat.search.result.ResultItem;
38  import no.sesat.search.result.ResultList;
39  import no.sesat.search.site.Site;
40  import no.sesat.search.site.SiteContext;
41  import no.sesat.search.site.config.PropertiesLoader;
42  import no.sesat.search.site.config.ResourceContext;
43  import no.sesat.search.view.config.SearchTab;
44  import no.sesat.search.site.config.TextMessages;
45  import no.sesat.search.view.output.syndication.modules.SearchResultModule;
46  import no.sesat.search.view.output.syndication.modules.SearchResultModuleImpl;
47  import no.sesat.search.view.velocity.VelocityEngineFactory;
48  import org.apache.commons.lang.StringEscapeUtils;
49  import org.apache.log4j.Logger;
50  import org.apache.velocity.Template;
51  import org.apache.velocity.VelocityContext;
52  import org.apache.velocity.app.VelocityEngine;
53  import org.apache.velocity.exception.MethodInvocationException;
54  import org.apache.velocity.exception.ParseErrorException;
55  import org.apache.velocity.exception.ResourceNotFoundException;
56  
57  import java.io.StringWriter;
58  import java.text.DateFormat;
59  import java.text.ParseException;
60  import java.text.SimpleDateFormat;
61  import java.util.ArrayList;
62  import java.util.Date;
63  import java.util.List;
64  import java.util.Properties;
65  import java.util.TimeZone;
66  import javax.resource.NotSupportedException;
67  import no.sesat.search.result.BasicResultList;
68  
69  /**
70   * Used by the rssDecorator.jsp to print out the results in rss format.
71   *
72   *
73   */
74  public final class SyndicationGenerator {
75  
76      /**
77       * The context this class needs to do its job.
78       */
79      public interface Context extends SiteContext, DataModelContext, ResourceContext {
80          /**
81           * The tab to generate rss for.
82           *
83           * @return The search tab to generate rss for.
84           */
85          SearchTab getTab();
86  
87          /**
88           * The complete URL of the original page the rss represents.
89           *
90           * @return the url of the original page.
91           */
92          String getURL();
93      }
94  
95      // Constants -----------------------------------------------------
96  
97      // Any other way to get rid of the dc:date tags that ROME generates.
98      private static final Logger LOG = Logger.getLogger(SyndicationGenerator.class);
99  
100     private static final String DCDATE_PATTERN = "<dc:date>[^<]+</dc:date>";
101 
102     private static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'";
103     private static final String ERR_TEMPLATE_NOT_FOUND = " Unable to find template for rss field: ";
104     private static final String ERR_TEMPLATE_ERR = " Parse error in template: ";
105     private static final String DEBUG_USING_DEFAULT_DATE_FORMAT = "Using default date format";
106 
107     // Attributes ----------------------------------------------------
108 
109     private final Context context;
110 
111     private final ResultList<ResultItem> result;
112     private final Site site;
113     private final TextMessages text;
114     private String feedType = "rss_2.0";
115     private final String templateDir;
116     private final VelocityEngine engine;
117     private final String uri;
118     //private final Channels channels;
119     private String encoding = "UTF-8";
120     private String nowStringUTC;
121 
122     // Static --------------------------------------------------------
123 
124     // Constructors --------------------------------------------------
125 
126     /**
127      * Creates a new instance.
128      *
129      * @param context The context this class needs to do its work.
130      * @throws SyndicationNotSupportedException
131      */
132     public SyndicationGenerator(final Context context) throws SyndicationNotSupportedException{
133 
134         if(null == context.getTab().getRssResultName()){ throw new SyndicationNotSupportedException(); }
135 
136         this.context = context;
137 
138         this.result = null != context.getDataModel().getSearch(context.getTab().getRssResultName())
139                 ? context.getDataModel().getSearch(context.getTab().getRssResultName()).getResults()
140                 : new BasicResultList<ResultItem>();
141 
142         this.site = context.getSite();
143 
144         this.text = TextMessages.valueOf(getTextMessagesContext());
145         this.uri = context.getURL();
146 
147         final String type = getParameter("feedType");
148 
149         if (! "".equals(type)) {
150             this.feedType = type;
151         }
152 
153         final String enc = getParameter("encoding");
154         if (! "".equals(enc)) {
155             if (encoding.equalsIgnoreCase("iso-8859-1")) {
156                 this.encoding = "iso-8859-1";
157             }
158         }
159 
160         templateDir = "rss/" + context.getTab().getId() + "/";
161 
162         engine = VelocityEngineFactory.valueOf(site).getEngine();
163     }
164 
165     // Public --------------------------------------------------------
166 
167     /**
168      * Returns the generated rss content.
169      *
170      * @return the rss document.
171      */
172 
173     public String generate() {
174 
175         String dfString = DEFAULT_DATE_FORMAT;
176 
177         try {
178             dfString = render("dateFormat_publishedDate", null, 0);
179         } catch (ResourceNotFoundException ex) {
180             LOG.trace(DEBUG_USING_DEFAULT_DATE_FORMAT);
181         }
182 
183         final DateFormat df = new SimpleDateFormat(dfString);
184 
185         // Zulu time is UTC. But java doesn't know that.
186         if (dfString.endsWith("'Z'")) {
187             df.setTimeZone(TimeZone.getTimeZone("UTC"));
188         }
189 
190         nowStringUTC = df.format(new Date());
191 
192         try {
193             final SyndFeed feed = new SyndFeedImpl();
194             final SearchResultModule m = new SearchResultModuleImpl();
195 
196             m.setNumberOfHits(Integer.toString(result.getHitCount()));
197 
198             final List<SearchResultModule> modules = new ArrayList<SearchResultModule>();
199 
200             modules.add(m);
201 
202             feed.setModules(modules);
203 
204             feed.setEncoding(this.encoding);
205             feed.setFeedType(feedType);
206             feed.setDescription(StringEscapeUtils.unescapeXml(render("description", null, 0)));
207             feed.setTitle(StringEscapeUtils.unescapeXml(render("title", null, 0)));
208             feed.setPublishedDate(new Date());
209             feed.setLink(render("link", null, 0));
210 
211             final List<SyndEntry> entries = new ArrayList<SyndEntry>();
212 
213             int idx = 0;
214             for (ResultItem item : result.getResults()) {
215                 ++idx;
216 
217                 final SyndEntry entry = new SyndEntryImpl();
218 
219                 final SearchResultModule entryModule = new SearchResultModuleImpl();
220 
221                 if (item.getField("age") != null && !"".equals(item.getField("age"))) {
222                     entryModule.setArticleAge(item.getField("age"));
223                 }
224 
225                 if (item.getField("newssource") != null && !"".equals(item.getField("newssource"))) {
226                     entryModule.setNewsSource(item.getField("newssource"));
227                 }
228 
229                 final List<SearchResultModule> sModules = new ArrayList<SearchResultModule>();
230                 sModules.add(entryModule);
231                 entry.setModules(sModules);
232                 final SyndContent content = new SyndContentImpl();
233 
234                 content.setType("text/html");
235                 final String entryDescription = render("entryDescription", item, idx);
236 
237                 content.setValue(StringEscapeUtils.unescapeHtml(entryDescription));
238 
239                 final String publishedDate = render("entryPublishedDate", item, idx);
240 
241                 try {
242                     final Date date = df.parse(publishedDate);
243 
244                     if (date.getTime() > 0) {
245                         entry.setPublishedDate(df.parse(publishedDate));
246                     } else {
247                         LOG.debug("Publish date set to epoch. Ignoring");
248                     }
249                 } catch (ParseException ex) {
250                     if (!(publishedDate == null || publishedDate.trim().equals(""))) {
251                         LOG.error("Cannot parse " + publishedDate + " using df " + dfString);
252                     } else {
253                         LOG.debug("Publish date is empty. Using current time");
254                     }
255 
256                     entry.setPublishedDate(new Date());
257                 }
258 
259                 entry.setTitle(render("entryTitle", item, idx));
260                 entry.setLink(render("entryUri", item, idx));
261 
262 
263                 try {
264                     final SyndEnclosure enclosure = new SyndEnclosureImpl();
265 
266                     enclosure.setUrl(render("entryEnclosure", item, idx));
267 
268                     final List<SyndEnclosure> enclosures = new ArrayList<SyndEnclosure>();
269                     enclosures.add(enclosure);
270                     entry.setEnclosures(enclosures);
271 
272                     // @todo. specific to sesam.no. put somewhere else...
273                     if ("swip".equals(context.getTab().getKey())) {
274                         enclosure.setType("image/gif");
275                     } else {
276                         enclosure.setType("image/png");
277                     }
278                 } catch (ResourceNotFoundException ex) {
279                     LOG.debug("Template for enclosure not found. Skipping.");
280                 }
281 
282 
283                 final List<SyndContent> contents = new ArrayList<SyndContent>();
284 
285                 contents.add(content);
286 
287                 entry.setContents(contents);
288                 entry.setDescription(content);
289 
290                 entries.add(entry);
291             }
292 
293             feed.setEntries(entries);
294 
295             final SyndFeedOutput output = new SyndFeedOutput();
296             return output.outputString(feed).replaceAll(DCDATE_PATTERN, "");
297         } catch (ResourceNotFoundException ex) {
298             throw new RuntimeException(ex);
299         } catch (FeedException ex) {
300             throw new RuntimeException(ex);
301         }
302     }
303 
304     // Package protected ---------------------------------------------
305 
306     // Protected -----------------------------------------------------
307 
308     // Private -------------------------------------------------------
309 
310     private String render(
311             final String name,
312             final ResultItem item,
313             final int itemIdx) throws ResourceNotFoundException {
314 
315         final String templateUri = templateDir + name;
316 
317         try {
318             final VelocityContext cxt = VelocityEngineFactory.newContextInstance(engine);
319 
320             cxt.put("text", text);
321             cxt.put("now", nowStringUTC);
322 
323             if (item != null) {
324                 cxt.put("item", item);
325                 cxt.put("itemIdx", itemIdx);
326             }
327 
328             cxt.put("datamodel", context.getDataModel());
329 
330             final String origUri = uri.replaceAll("&?output=[^&]+", "").replaceAll("&?feedtype=[^&]+", "");
331             cxt.put("uri", origUri);
332 
333             final Template tpl = VelocityEngineFactory.getTemplate(engine, site, templateUri);
334 
335             final StringWriter writer = new StringWriter();
336             tpl.merge(cxt, writer);
337 
338             return writer.toString();
339 
340         } catch (ParseErrorException ex) {
341             LOG.error(ERR_TEMPLATE_ERR + templateUri);
342             throw new InfrastructureException(ex);
343 
344         } catch (MethodInvocationException ex) {
345             throw new InfrastructureException(ex);
346 
347         } catch (ResourceNotFoundException ex) {
348             LOG.debug(ERR_TEMPLATE_NOT_FOUND + templateUri);
349             throw ex;
350 
351         } catch (Exception ex) {
352             throw new InfrastructureException(ex);
353         }
354     }
355 
356     private String getParameter(final String parameterName) {
357         final StringDataObject value = context.getDataModel().getParameters().getValue(parameterName);
358 
359         if (value != null) {
360             return value.toString();
361         } else {
362             return "";
363         }
364     }
365 
366     private TextMessages.Context getTextMessagesContext() {
367         return new TextMessages.Context() {
368             public Site getSite() {
369                 return context.getSite();
370             }
371 
372             public PropertiesLoader newPropertiesLoader(
373                     final SiteContext siteCxt,
374                     final String resource,
375                     final Properties properties) {
376             return context.newPropertiesLoader(siteCxt, resource, properties);
377             }
378         };
379     }
380 
381 //    private Channels.Context getChannelContext() {
382 //        return new Channels.Context() {
383 //            public Site getSite() {
384 //                return context.getSite();
385 //            }
386 //            public DocumentLoader newDocumentLoader(
387 //                    final SiteContext cxt,
388 //                    final String resource,
389 //                    final DocumentBuilder builder) {
390 //                return context.newDocumentLoader(cxt, resource, builder);
391 //            }
392 //        };
393 //    }
394 
395     // Inner classes -------------------------------------------------
396 
397     public static final class SyndicationNotSupportedException extends Exception{
398 
399     }
400 }