Browse Source

景区采集

master
李可松 4 years ago
parent
commit
d43afad517
  1. 86
      app/Console/Commands/Collector.php

86
app/Console/Commands/Collector.php

@ -53,8 +53,9 @@ class Collector extends Command
$this->line("供应商 $supplier_id 导入完毕");
} else {
// $this->exists_update = false;
$this->tourist_line();
$this->hotel();
// $this->tourist_line();
// $this->hotel();
$this->scenic();
$this->line('全部采集完毕');
}
return Command::SUCCESS;
@ -100,6 +101,84 @@ class Collector extends Command
}
}
//景区采集
private function scenic()
{
$http = Http::withOptions(['verify' => false])->withHeaders(['User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1']);
for ($i=15; $i<20; $i++) {
$this->line('开始采集景区:第 ' . ($i + 1) . ' 页');
$html = $http->get('https://m.mafengwo.cn/jd/10030/gonglve.html?page=' . ($i + 1));
if (preg_match_all('/data-url="(.*?)"/', $html, $matches)) {
if(empty($matches[1])) continue;
foreach ($matches[1] as $url) {
$this->line('开始采集 ' . $url);
$res = $http->get('https://m.mafengwo.cn' . $url);
if (!preg_match('/\/(\d+)\.html/', $url, $match)) continue;
$id = $match[1];
$inc_sales = $http->post('https://m.mafengwo.cn/poi/poi/inc_sales', ['poiid' => $id]);
preg_match('#<strong><em>¥</em>(\d+)</strong>#', $inc_sales['html'] ?? '', $match_price);
preg_match('#<h1>(.*?)</h1>#', $res, $match_title);
preg_match_all('#<div class="swiper-slide">[\s\S]*?<img src="(.*?)"#', $res, $match_pictures);
preg_match('#蜂蜂点评<br/><strong>(\d+)条#', $res, $match_sale);
preg_match('#"row h1" data-jump="ticket">\s*([\w\W]*?)\s*</div>\s*<div class="row h1"#s', $res, $match_know);
preg_match('#<div class="desc" style="display: none;">\s*([\w\W]*?)\s*</div>#s', $res, $match_content);
preg_match_all('#时间:</dt>\s*<dd>\s*(.*?)\s*</dd>#', $res, $match_open_time);
//扩展字段
preg_match('#<p class="t2">地址:(.*?)<i></i></p>#', $res, $match_address);
$pos_start = strpos($res, '<div class="keywords">');
$pos_end = strpos($res, '</div>', $pos_start);
preg_match_all('#<span>([^<>]*?)</span>#s', substr($res, $pos_start, $pos_end - $pos_start), $match_project);
if (isset($match_project[1]) && is_array($match_project[1])) {
$extends['field_2_project'] = array_map(function($v) {
return [
'name' => $v,
'num' => '',
'price' => '',
];
}, $match_project[1]);
}
$extends['field_2_address'] = $match_address[1] ?? '';
$extends['field_2_open_time'][0]['node'] = '营业时间';
$extends['field_2_open_time'][0]['summer'] = '上岛时间';
$extends['field_2_open_time'][0]['winter'] = '下岛时间';
if (isset($match_open_time[1]) && is_array($match_open_time[1])) {
$extends['field_2_open_time'][1]['node'] = $match_open_time[1][0] ?? '';
$extends['field_2_open_time'][1]['summer'] = $match_open_time[1][1] ?? '';
$extends['field_2_open_time'][1]['winter'] = $match_open_time[1][2] ?? '';
}
CollectProduct::updateOrCreate(['unique_id' => $id, 'site' => 1], [
'unique_id' => $id,
'site' => 1,
'type' => 2, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项
'title' => $match_title[1] ?? '',
'price' => $match_price[1] ?? 0,
'original_price' => ($match_price[1] ?? 0) * 1.58,
'pictures' => $match_pictures[1] ?? [],
'stock' => mt_rand(1000, 9999),
'sale' => $match_sale[1] ?? 0,
'status' => -2, //-2下架
'know' => $match_know[1] ?? '',
'content' => $match_content[1] ?? '',
'extends' => $extends,
'longitude' => 0,
'latitude' => 0,
'address' => $match_address[1] ?? '',
]);
$this->line("采集 $url 完毕" . PHP_EOL);
}
}
$this->line('第 ' . ($i + 1) . ' 页采集结束' . PHP_EOL);
}
}
//酒店采集
private function hotel()
{
@ -107,7 +186,7 @@ class Collector extends Command
$mddid = 10030; //10030==三亚
for ($i=0; $i<10; $i++) {
$this->line('开始采集:第 ' . ($i + 1) . ' 页');
$this->line('开始采集酒店:第 ' . ($i + 1) . ' 页');
$data = $http->get('https://m.mafengwo.cn/rest/hotel/hotels/', [
'filter' => [
'mddid' => $mddid
@ -201,6 +280,7 @@ class Collector extends Command
$http = Http::withOptions(['verify' => false]);
for($i=0; $i<10; $i++) {
$this->line('开始采集旅游线路:第 ' . ($i + 1) . ' 页');
$data = $http->get('https://m.mafengwo.cn/sales/ajax.php', [
'sF' => 'search_new_list',
'offset' => $i * 10, //分页参数

Loading…
Cancel
Save