beautifulSoap使用
import db,re,osimport socketfrom urllib import urlopenfrom BeautifulSoup import BeautifulSoup def mkdir(path=''): base = '.' newpath = base + '\\'+path if (os.path.exists(newpath)): pass else: os.mkdir(newpath)socket.setdefaulttimeout(600)url="http://www.example.com"text2 = urlopen(url).read()text = BeautifulSoup(text2).prettify()ortext2 = urlopen('http://www.example.com/').read()text = BeautifulSoup(text2)[s.extract() for s in text('script')] #去除 危险的script[s.extract() for s in text('style')] #去除样式print text.prettify()==========================
安装
wget http://www.crummy.com/software/BeautifulSoup/download/3.x/BeautifulSoup-3.2.1.tar.gz
tar zxf
cd 目录
python setup.py install