读书人

hous365的房源信息的采摘

发布时间: 2012-11-17 11:14:14 作者: rapoo

hous365的房源信息的采集

自己以前写的一个采集程序比较简单

<?phpinclude('global.php');$conn = new db();if(isset($_GET['company'])){$company = $_GET['company'];}else{$company = '';}if(isset($_GET['memberid'])){$memberid = $_GET['memberid'];}else{$memberid = '';}if(isset($_GET['zj_num'])){$zj_num = $_GET['zj_num'];}else{$zj_num = '';}if(isset($_GET['zj_code'])){$zj_code = $_GET['zj_code'];}else{$zj_code = '';}$url="http://zsb.house365.com/main.php?infotype=0&price=0&buildarea=0&district=0&keyword=&order_=1&page=".$zj_num."&agentcode=".$zj_code."&pkind=selllist&roomtype=&topic=&order=";$text = @file_get_contents($url);preg_match_all('/<td align="center" valign="middle"><a href=\'(.*?)\' target=\'_blank\' title="(.*?)"><img src="http:\/\/sell.house365.com\/images\/sellesflist_12.gif" width="77" height="18" border="0" \/><\/a><\/td>/i',$text,$row);$num_all = 0;$len = count($row[1]);//$len = 1;for($i=0;$i<$len;$i++){        $mrent = array();    $url1 = $row[1][$i];          $text1 = @file_get_contents($url1);        $mrent['coltype']    = 'second';        $mrent['menuid']     = '10';    $mrent['memberid']   = $memberid;    $mrent['memberprop'] = '2';    $mrent['shangquan']  = '0';     $mrent['infotype']   = 'sale';     $mrent['jz']         = '1';     $mrent['menuid']     ='10';    $mrent['author']     = $company;//iconv("UTF-8", "GBK", $company);        //房源名称    preg_match_all('/<td colspan="2"  => "住宅" ,        "31" =>"写字楼" ,        "32" =>"商铺" ,        "33" =>"别墅" ,        );    if(in_array($housetype,$cat_arr)){        $catid = array_search($housetype,$cat_arr);     }else{        $catid = 0;    }    $mrent['catid'] = $catid;             //更新时间     preg_match_all('/<td => "玄武区",            "45" => "鼓楼区",            "48" => "白下区",            "49" => "建邺区",            "47" => "秦淮区",            "44" => "下关区",            "51" => "雨花台区",            "50" => "栖霞区",            "52" => "江宁区",            "53" => "浦口区",            "54" => "六合区",            "55" => "溧水县",            "56" => "高淳县",            "60" => "其它",            );    $zoneid = array_search($cat,$district);     $mrent['zoneid'] = $zoneid;        //板块    preg_match_all('/<td width="150" align="center"><strong><span>(.*?)<\/span><\/strong><\/td>\s*<td width="185">联系人:<span>(.*?)<\/span><\/td>/i',$text1,$lxr_arr);      $lxr_tel = trim($lxr_arr[1][0]);    $lxr_name = trim($lxr_arr[2][0]);    $mrent['lxr'] = iconv( "GBK","UTF-8", $lxr_name);    $th_tel = array('<span style="font-size:14px">','</span>');    $lxr_tel = str_replace($th_tel,"",$lxr_tel);    $tel_arr = explode("-",$lxr_tel);      for($tj=0;$tj<count($tel_arr);$tj++)    {                $tel_len = strlen($tel_arr[$tj]);             if($tel_len==11)        {$mrent['lxdh'] = $tel_arr[$tj];                    //$mrent['lxrshouji'] = $tel_arr[$tj];                   }else{            $mrent['lxdh'] = $tel_arr[$tj];                   }    }    //详细信息    preg_match_all('/<div    :            $facnet = 1;        case "管道煤气":            $facgas = 1;        case "有线电视":            $factvnet = 1;        case "电话"    :             $facphone = 1;        case "冰箱"    :            $facfridge = 1;        case "电视机"  :             $factv = 1;        case "洗衣机"  :              $facwasher = 1;        case "热水器"  :              $facwheater = 1;         case "空调"    :             $facaircon = 1;           case "家具"    :              $facfurniture = 1;                }    }        $mrent['facfurniture'] = $facfurniture;    $mrent['factvnet']     = $factvnet;    $mrent['factv']        = $factv;    $mrent['facnet']       = $facnet;    $mrent['facphone']     = $facphone;    $mrent['facwheater']   = $facwheater;    $mrent['facaircon']    = $facaircon;    $mrent['facwasher']    = $facwasher;    $mrent['facfridge']    = $facfridge;    $mrent['facgas']       = $facgas;    if($mrent['loupan']!='')    {        $sql = "SELECT id FROM `hou_mrent` where memberid =$memberid loupan ='".$mrent['loupan']."'";         $chk = $conn-> Query2SingleRowArray($sql);        if($chk['id']==''){            $conn ->insert('hou_mrent',$mrent,$debug = true);  $rows = $conn->GetQueryAffectedRows();if($rows>0){$num_all++;}        }    }}$note = "抓取完成,本页一共抓取".$num_all."条房源";$note = iconv("GBK", "UTF-8", $note);    ?>    <script type="text/javascript">parent.document.all('note').innerHTML="<?=$note?>";parent.document.all('btn_sc').disabled="";parent.document.all('btn_zq').disabled="";</script>

?

读书人网 >互联网

热点推荐