# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
from urllib2 import urlopen
from xml.sax import make_parser, ContentHandler
import sys
__author__ = "Suvash Sedhain"
__date__ = "Fri Sep 12 16:46:44 NPT 2008"
class RSSHandler (ContentHandler):
def __init__(self):
ContentHandler.__init__(self)
self.__inItem = False
self.__inTitle = False
def characters(self,data):
if self.__inTitle:
sys.stdout.write(data)
def startElement(self, tag, attrs):
if tag == "item":
self.__inItem = True
if tag == "title" and self.__inItem:
self.__inTitle = True
def endElement(self, tag):
if tag == "title" and self.__inTitle:
sys.stdout.write("\n")
self.__inTitle = False
if tag == "item":
self.__inItem = False
def listFeedTitle(url):
infile = urlopen(url)
parser = make_parser()
parser.setContentHandler(RSSHandler())
parser.parse(infile)
#extract all the python related links from dzone
listFeedTitle("http://www.dzone.com/links/feed/search/python/rss.xml")
Code Explanation:
The class RSSHandler is derived from the class ContentHandler, which is a interface that provides functionality of Parsing XML. we override methods characters, startElement and endElement to customize the parser as per our need.
The method invocation can be summarized as:
The method invocation can be summarized as:
- "characters" method is called when stream of text is encountered
- "startElement" method is called when any starting tag is encountered
- "endElement" method is called when any ending tag is encountered