@ -53,8 +53,9 @@ class Collector extends Command
$this -> line ( " 供应商 $supplier_id 导入完毕 " );
} else {
// $this->exists_update = false;
$this -> tourist_line ();
$this -> hotel ();
// $this->tourist_line();
// $this->hotel();
$this -> scenic ();
$this -> line ( '全部采集完毕' );
}
return Command :: SUCCESS ;
@ -100,6 +101,84 @@ class Collector extends Command
}
}
//景区采集
private function scenic ()
{
$http = Http :: withOptions ([ 'verify' => false ]) -> withHeaders ([ 'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' ]);
for ( $i = 15 ; $i < 20 ; $i ++ ) {
$this -> line ( '开始采集景区:第 ' . ( $i + 1 ) . ' 页' );
$html = $http -> get ( 'https://m.mafengwo.cn/jd/10030/gonglve.html?page=' . ( $i + 1 ));
if ( preg_match_all ( '/data-url="(.*?)"/' , $html , $matches )) {
if ( empty ( $matches [ 1 ])) continue ;
foreach ( $matches [ 1 ] as $url ) {
$this -> line ( '开始采集 ' . $url );
$res = $http -> get ( 'https://m.mafengwo.cn' . $url );
if ( ! preg_match ( '/\/(\d+)\.html/' , $url , $match )) continue ;
$id = $match [ 1 ];
$inc_sales = $http -> post ( 'https://m.mafengwo.cn/poi/poi/inc_sales' , [ 'poiid' => $id ]);
preg_match ( '#<strong><em>¥</em>(\d+)</strong>#' , $inc_sales [ 'html' ] ? ? '' , $match_price );
preg_match ( '#<h1>(.*?)</h1>#' , $res , $match_title );
preg_match_all ( '#<div class="swiper-slide">[\s\S]*?<img src="(.*?)"#' , $res , $match_pictures );
preg_match ( '#蜂蜂点评<br/><strong>(\d+)条#' , $res , $match_sale );
preg_match ( '#"row h1" data-jump="ticket">\s*([\w\W]*?)\s*</div>\s*<div class="row h1"#s' , $res , $match_know );
preg_match ( '#<div class="desc" style="display: none;">\s*([\w\W]*?)\s*</div>#s' , $res , $match_content );
preg_match_all ( '#时间:</dt>\s*<dd>\s*(.*?)\s*</dd>#' , $res , $match_open_time );
//扩展字段
preg_match ( '#<p class="t2">地址:(.*?)<i></i></p>#' , $res , $match_address );
$pos_start = strpos ( $res , '<div class="keywords">' );
$pos_end = strpos ( $res , '</div>' , $pos_start );
preg_match_all ( '#<span>([^<>]*?)</span>#s' , substr ( $res , $pos_start , $pos_end - $pos_start ), $match_project );
if ( isset ( $match_project [ 1 ]) && is_array ( $match_project [ 1 ])) {
$extends [ 'field_2_project' ] = array_map ( function ( $v ) {
return [
'name' => $v ,
'num' => '' ,
'price' => '' ,
];
}, $match_project [ 1 ]);
}
$extends [ 'field_2_address' ] = $match_address [ 1 ] ? ? '' ;
$extends [ 'field_2_open_time' ][ 0 ][ 'node' ] = '营业时间' ;
$extends [ 'field_2_open_time' ][ 0 ][ 'summer' ] = '上岛时间' ;
$extends [ 'field_2_open_time' ][ 0 ][ 'winter' ] = '下岛时间' ;
if ( isset ( $match_open_time [ 1 ]) && is_array ( $match_open_time [ 1 ])) {
$extends [ 'field_2_open_time' ][ 1 ][ 'node' ] = $match_open_time [ 1 ][ 0 ] ? ? '' ;
$extends [ 'field_2_open_time' ][ 1 ][ 'summer' ] = $match_open_time [ 1 ][ 1 ] ? ? '' ;
$extends [ 'field_2_open_time' ][ 1 ][ 'winter' ] = $match_open_time [ 1 ][ 2 ] ? ? '' ;
}
CollectProduct :: updateOrCreate ([ 'unique_id' => $id , 'site' => 1 ], [
'unique_id' => $id ,
'site' => 1 ,
'type' => 2 , //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项
'title' => $match_title [ 1 ] ? ? '' ,
'price' => $match_price [ 1 ] ? ? 0 ,
'original_price' => ( $match_price [ 1 ] ? ? 0 ) * 1.58 ,
'pictures' => $match_pictures [ 1 ] ? ? [],
'stock' => mt_rand ( 1000 , 9999 ),
'sale' => $match_sale [ 1 ] ? ? 0 ,
'status' => - 2 , //-2下架
'know' => $match_know [ 1 ] ? ? '' ,
'content' => $match_content [ 1 ] ? ? '' ,
'extends' => $extends ,
'longitude' => 0 ,
'latitude' => 0 ,
'address' => $match_address [ 1 ] ? ? '' ,
]);
$this -> line ( " 采集 $url 完毕 " . PHP_EOL );
}
}
$this -> line ( '第 ' . ( $i + 1 ) . ' 页采集结束' . PHP_EOL );
}
}
//酒店采集
private function hotel ()
{
@ -107,7 +186,7 @@ class Collector extends Command
$mddid = 10030 ; //10030==三亚
for ( $i = 0 ; $i < 10 ; $i ++ ) {
$this -> line ( '开始采集:第 ' . ( $i + 1 ) . ' 页' );
$this -> line ( '开始采集酒店 :第 ' . ( $i + 1 ) . ' 页' );
$data = $http -> get ( 'https://m.mafengwo.cn/rest/hotel/hotels/' , [
'filter' => [
'mddid' => $mddid
@ -201,6 +280,7 @@ class Collector extends Command
$http = Http :: withOptions ([ 'verify' => false ]);
for ( $i = 0 ; $i < 10 ; $i ++ ) {
$this -> line ( '开始采集旅游线路:第 ' . ( $i + 1 ) . ' 页' );
$data = $http -> get ( 'https://m.mafengwo.cn/sales/ajax.php' , [
'sF' => 'search_new_list' ,
'offset' => $i * 10 , //分页参数