Manage your torrents from your Android device
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

302 lines
11 KiB

/*
* Taken from the 'Learning Android' project, released as Public Domain software at
* http://github.com/digitalspaghetti/learning-android and modified heavily for Transdroid
*/
package org.transdroid.core.rssparser;
import android.text.TextUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.transdroid.daemon.util.HttpHelper;
import org.transdroid.daemon.util.TlsSniSocketFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.Locale;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
@SuppressWarnings("deprecation")
public class RssParser extends DefaultHandler {
private final String urlString;
private final String[] excludeFilters;
private final String[] includeFilters;
private Channel channel;
private StringBuilder text;
private Item item;
private boolean imageStatus;
/**
* The constructor for the RSS parser; call {@link #parse()} to synchronously create an HTTP connection and parse
* the RSS feed contents. The results can be retrieved with {@link #getChannel()}.
*
* @param url The url of the feed to retrieve
* @param excludeFilter A |-separated list of words that may not be included in the item title or they are excluded
* @param includeFilter A |-separated list of words that need to be included in the item title or they are excluded
*/
public RssParser(String url, String excludeFilter, String includeFilter) {
this.urlString = url;
if (!TextUtils.isEmpty(excludeFilter)) {
this.excludeFilters = excludeFilter.split("\\|");
for (int i = 0; i < excludeFilters.length; i++) {
excludeFilters[i] = excludeFilters[i].toUpperCase(Locale.getDefault());
}
} else {
this.excludeFilters = null;
}
if (!TextUtils.isEmpty(includeFilter)) {
this.includeFilters = includeFilter.split("\\|");
for (int i = 0; i < includeFilters.length; i++) {
includeFilters[i] = includeFilters[i].toUpperCase(Locale.getDefault());
}
} else {
this.includeFilters = null;
}
this.text = new StringBuilder();
}
/**
* Returns the loaded RSS feed as channel which contains the individual {@link Item}s
*
* @return A channel object that contains the feed details and individual items
*/
public Channel getChannel() {
return this.channel;
}
/**
* Initialises an HTTP connection, retrieves the content and parses the RSS feed as standard XML.
*
* @throws ParserConfigurationException Thrown if the SX parser is not working corectly
* @throws SAXException Thrown if the SAX parser can encounters non-standard XML content
* @throws IOException Thrown if the RSS feed content can not be retrieved, such as when no connection is available
*/
public void parse() throws ParserConfigurationException, SAXException, IOException {
DefaultHttpClient httpclient = initialise();
HttpResponse result = httpclient.execute(new HttpGet(urlString));
SAXParserFactory spf = SAXParserFactory.newInstance();
if (spf != null) {
SAXParser sp = spf.newSAXParser();
sp.parse(result.getEntity().getContent(), this);
}
// Apply filters
if (channel != null && (includeFilters != null || excludeFilters != null)) {
Iterator<Item> i = channel.getItems().iterator();
while (i.hasNext()) {
if (!matchesFilters(i.next()))
i.remove();
}
}
}
private boolean matchesFilters(Item next) {
String title = next.getTitle().toUpperCase();
if (includeFilters != null) {
boolean include = false;
for (String includeWord : includeFilters) {
if (includeWord.equals("") || title.contains(includeWord)) {
include = true;
break;
}
}
if (!include)
return false;
}
if (excludeFilters != null) {
for (String excludeWord : excludeFilters) {
if (!excludeWord.equals("") && title.contains(excludeWord))
return false;
}
}
return true;
}
private DefaultHttpClient initialise() {
SchemeRegistry registry = new SchemeRegistry();
registry.register(new Scheme("http", new PlainSocketFactory(), 80));
registry.register(new Scheme("https", new TlsSniSocketFactory(), 443));
HttpParams httpparams = new BasicHttpParams();
HttpConnectionParams.setConnectionTimeout(httpparams, 5000);
HttpConnectionParams.setSoTimeout(httpparams, 5000);
HttpProtocolParams.setUserAgent(httpparams, HttpHelper.userAgent);
DefaultHttpClient httpclient = new DefaultHttpClient(new ThreadSafeClientConnManager(httpparams, registry),
httpparams);
httpclient.addRequestInterceptor(HttpHelper.gzipRequestInterceptor);
httpclient.addResponseInterceptor(HttpHelper.gzipResponseInterceptor);
return httpclient;
}
/**
* By default creates a standard Item (with title, description and links), which may to overridden to add more data
* (i.e. custom tags that a feed may supply).
*
* @return A possibly decorated Item instance
*/
protected Item createNewItem() {
return new Item();
}
@Override
public final void startElement(String uri, String localName, String qName, Attributes attributes) {
/** First lets check for the channel */
if (localName.equalsIgnoreCase("channel")) {
this.channel = new Channel();
}
/** Now lets check for an item */
if (localName.equalsIgnoreCase("item") && (this.channel != null)) {
this.item = createNewItem();
this.channel.addItem(this.item);
}
/** Now lets check for an image */
if (localName.equalsIgnoreCase("image") && (this.channel != null)) {
this.imageStatus = true;
}
/** Checking for a enclosure */
if (localName.equalsIgnoreCase("enclosure")) {
/** Lets check we are in an item */
if (this.item != null && attributes != null && attributes.getLength() > 0) {
if (attributes.getValue("url") != null) {
this.item.setEnclosureUrl(attributes.getValue("url").trim());
}
if (attributes.getValue("type") != null) {
this.item.setEnclosureType(attributes.getValue("type"));
}
if (attributes.getValue("length") != null) {
this.item.setEnclosureLength(Long.parseLong(attributes.getValue("length")));
}
}
}
}
/**
* This is where we actually parse for the elements contents
*/
@SuppressWarnings("deprecation")
public final void endElement(String uri, String localName, String qName) {
/** Check we have an RSS Feed */
if (this.channel == null) {
return;
}
/** Check are at the end of an item */
if (localName.equalsIgnoreCase("item")) {
this.item = null;
}
/** Check we are at the end of an image */
if (localName.equalsIgnoreCase("image"))
this.imageStatus = false;
/** Now we need to parse which title we are in */
if (localName.equalsIgnoreCase("title")) {
/** We are an item, so we set the item title */
if (this.item != null) {
this.item.setTitle(this.text.toString().trim());
/** We are in an image */
} else {
this.channel.setTitle(this.text.toString().trim());
}
}
/** Now we are checking for a link */
if (localName.equalsIgnoreCase("link")) {
/** Check we are in an item **/
if (this.item != null) {
this.item.setLink(this.text.toString().trim());
/** Check we are in an image */
} else if (this.imageStatus) {
this.channel.setImage(this.text.toString().trim());
/** Check we are in a channel */
} else {
this.channel.setLink(this.text.toString().trim());
}
}
/** Checking for a description */
if (localName.equalsIgnoreCase("description")) {
/** Lets check we are in an item */
if (this.item != null) {
this.item.setDescription(this.text.toString().trim());
/** Lets check we are in the channel */
} else {
this.channel.setDescription(this.text.toString().trim());
}
}
/** Checking for a pubdate */
if (localName.equalsIgnoreCase("pubDate")) {
/** Lets check we are in an item */
if (this.item != null) {
try {
this.item.setPubdate(new Date(Date.parse(this.text.toString().trim())));
} catch (Exception e) {
// Date is malformed (not parsable by Date.parse)
}
/** Lets check we are in the channel */
} else {
try {
this.channel.setPubDate(new Date(Date.parse(this.text.toString().trim())));
} catch (Exception e) {
// Date is malformed (not parsable by Date.parse)
}
}
}
/** Check for the category */
if (localName.equalsIgnoreCase("category") && (this.item != null)) {
this.channel.addCategory(this.text.toString().trim());
}
addAdditionalData(localName, this.item, this.text.toString());
this.text.setLength(0);
}
/**
* May be overridden to add additional data from tags that are not standard in RSS. Not used by this default RSS
* style parser. Usually used in conjunction with {@link #createNewItem()}.
*
* @param localName The tag name
* @param item The Item we are currently parsing
* @param text The new text content
*/
protected void addAdditionalData(String localName, Item item, String text) {
}
@Override
public final void characters(char[] ch, int start, int length) {
this.text.append(ch, start, length);
}
}