option('import'); if (!empty($supplier_id)) { $this->import($supplier_id); $this->line("供应商 $supplier_id 导入完毕"); } else { // $this->exists_update = false; // $this->tourist_line(); // $this->hotel(); $this->scenic(); $this->line('全部采集完毕'); } return Command::SUCCESS; } //导入 private function import($supplier_id) { $limit = $this->option('limit'); $cpModel = CollectProduct::query()->orderBy('id', 'desc'); if (!empty($limit)) { $arr = explode(',', $limit); $count = count($arr); if ($count == 1) { $import_data = $cpModel->limit($arr[0])->get()->toArray(); } else if ($count == 2) { $import_data = $cpModel->offset($arr[0])->limit($arr[1])->get()->toArray(); } else { $import_data = $cpModel->get()->toArray(); } } else { $import_data = $cpModel->get()->toArray(); } if (ctype_digit($supplier_id)) { $ids = [$supplier_id]; } else if ($supplier_id == 'all') { $ids = Supplier::query()->where('id', '>', 1)->pluck('id'); } else { return; } foreach ($ids as $supplier_id) { $this->line('正在导入 ' . $supplier_id); array_walk($import_data, function ($v) use ($supplier_id) { $v['supplier_id'] = $supplier_id; unset($v['unique_id'], $v['site']); Product::query()->updateOrCreate(['supplier_id' => $supplier_id, 'title' => $v['title']], $v); }); $this->line("导入 $supplier_id 结束"); } } //景区采集 private function scenic() { $http = Http::withOptions(['verify' => false])->withHeaders(['User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1']); for ($i=15; $i<20; $i++) { $this->line('开始采集景区:第 ' . ($i + 1) . ' 页'); $html = $http->get('https://m.mafengwo.cn/jd/10030/gonglve.html?page=' . ($i + 1)); if (preg_match_all('/data-url="(.*?)"/', $html, $matches)) { if(empty($matches[1])) continue; foreach ($matches[1] as $url) { $this->line('开始采集 ' . $url); $res = $http->get('https://m.mafengwo.cn' . $url); if (!preg_match('/\/(\d+)\.html/', $url, $match)) continue; $id = $match[1]; if ($this->exists_update == false && CollectProduct::where(['unique_id' => $id, 'site' => 1])->exists()) { continue; } $inc_sales = $http->post('https://m.mafengwo.cn/poi/poi/inc_sales', ['poiid' => $id]); preg_match('#(\d+)#', $inc_sales['html'] ?? '', $match_price); preg_match('#

(.*?)

#', $res, $match_title); preg_match_all('#
[\s\S]*?(\d+)条#', $res, $match_sale); preg_match('#"row h1" data-jump="ticket">\s*([\w\W]*?)\s*
\s*#s', $res, $match_content); preg_match_all('#时间:\s*
\s*(.*?)\s*
#', $res, $match_open_time); //扩展字段 preg_match('#

地址:(.*?)

#', $res, $match_address); $pos_start = strpos($res, '
'); $pos_end = strpos($res, '
', $pos_start); preg_match_all('#([^<>]*?)#s', substr($res, $pos_start, $pos_end - $pos_start), $match_project); if (isset($match_project[1]) && is_array($match_project[1])) { $extends['field_2_project'] = array_map(function($v) { return [ 'name' => $v, 'num' => '', 'price' => '', ]; }, $match_project[1]); } $extends['field_2_address'] = $match_address[1] ?? ''; $extends['field_2_open_time'][0]['node'] = '营业时间'; $extends['field_2_open_time'][0]['summer'] = '上岛时间'; $extends['field_2_open_time'][0]['winter'] = '下岛时间'; if (isset($match_open_time[1]) && is_array($match_open_time[1])) { $extends['field_2_open_time'][1]['node'] = $match_open_time[1][0] ?? ''; $extends['field_2_open_time'][1]['summer'] = $match_open_time[1][1] ?? ''; $extends['field_2_open_time'][1]['winter'] = $match_open_time[1][2] ?? ''; } CollectProduct::updateOrCreate(['unique_id' => $id, 'site' => 1], [ 'unique_id' => $id, 'site' => 1, 'type' => 2, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项 'title' => $match_title[1] ?? '', 'price' => $match_price[1] ?? 0, 'original_price' => ($match_price[1] ?? 0) * 1.58, 'pictures' => $match_pictures[1] ?? [], 'stock' => mt_rand(1000, 9999), 'sale' => $match_sale[1] ?? 0, 'status' => -2, //-2下架 'know' => $match_know[1] ?? '', 'content' => $match_content[1] ?? '', 'extends' => $extends, 'longitude' => 0, 'latitude' => 0, 'address' => $match_address[1] ?? '', ]); $this->line("采集 $url 完毕" . PHP_EOL); } } $this->line('第 ' . ($i + 1) . ' 页采集结束' . PHP_EOL); } } //酒店采集 private function hotel() { $http = Http::withOptions(['verify' => false]); $mddid = 10030; //10030==三亚 for ($i=0; $i<10; $i++) { $this->line('开始采集酒店:第 ' . ($i + 1) . ' 页'); $data = $http->get('https://m.mafengwo.cn/rest/hotel/hotels/', [ 'filter' => [ 'mddid' => $mddid ], 'page' => [ 'mode' => 'sequential', 'boundary' => $i * 20, //分页参数 'num' => 20 ], ]); if (empty($data['data']['list'])) { continue; } foreach ($data['data']['list'] as $v) { if (empty($v['id'])) continue; if ($this->exists_update == false && CollectProduct::where(['unique_id' => $v['id'], 'site' => 1])->exists()) { continue; } $this->line('采集详情:' . $v['id']); //基本信息 $params = [ '_ts' => time() . '123', 'hotel_id' => (string)$v['id'], 'lat' => '', 'lng' => '', 'rmdd_id' => (string)$mddid, ]; $params['_sn'] = $this->_sn($params); $data = $http->get('https://m.mafengwo.cn/hservice/detail/info/base_info', $params); if (empty($data['data']['info'])) { continue; } $base_info = $data['data']['info']; //酒店详情 $data = $http->get('https://m.mafengwo.cn/hservice/detail/info/guide_info', ['hotel_id' => $v['id']]); $guide_info = $data['data']['info'] ?? []; //旅游须知 $know = "

入住时间:" . (!empty($guide_info['check_in']['title']) ? $guide_info['check_in']['title'] : '') . "

"; $know .= "

离店时间:" . (!empty($guide_info['check_out']['title']) ? $guide_info['check_out']['title'] : '') . "

"; $know .= '

' . array_reduce($base_info['facility_sort'] ?? [], fn($v1, $v2) => $v1 . $v2['title'] ?? '') . '

'; //扩展字段 $extends['field_1_tags'] = array_map(fn($v) => $v['title'] ?? '', $guide_info['facility'] ?? []); $extends['field_1_name'] = $base_info['name']; $extends['field_1_address'] = $base_info['address']; $extends['field_1_latitude'] = $base_info['lat']; $extends['field_1_longitude'] = $base_info['lng']; CollectProduct::updateOrCreate(['unique_id' => $v['id'], 'site' => 1], [ 'unique_id' => $v['id'], 'site' => 1, 'type' => 1, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项 'title' => mb_substr($base_info['name'] ?? '' . $base_info['level'] ?? '', 0, 255), 'price' => mt_rand(150, 350), 'original_price' => mt_rand(350, 550), 'pictures' => array_map(fn($v) => $v['url'] ?? '', $base_info['album'] ?? []) ?? [], 'stock' => mt_rand(1000, 9999), 'sale' => $base_info['num_collect'] ?? 0, 'status' => -2, //-2下架 'know' => $know, 'content' => $guide_info['intro'] ?? '', 'extends' => $extends, 'longitude' => $base_info['lng'] ?? 0, 'latitude' => $base_info['lat'] ?? 0, 'address' => $base_info['address'] ?? '', ]); $this->line("{$v['id']} 采集结束" . PHP_EOL); } $this->line('第 ' . ($i + 1) . ' 页采集结束' . PHP_EOL); } } //酒店详情计算_sn private function _sn($params): string { ksort($params); return substr(md5(json_encode($params) . 'c9d6618dbc657b41a66eb0af952906f1'), 2, 10); } //旅游线路采集 private function tourist_line() { $http = Http::withOptions(['verify' => false]); for($i=0; $i<10; $i++) { $this->line('开始采集旅游线路:第 ' . ($i + 1) . ' 页'); $data = $http->get('https://m.mafengwo.cn/sales/ajax.php', [ 'sF' => 'search_new_list', 'offset' => $i * 10, //分页参数 ]); if (empty($data['data'])) { continue; } $data = $data['data']; preg_match_all('/exists_update == false && CollectProduct::where(['unique_id' => $id, 'site' => 1])->exists()) { continue; } $this->line('开始采集:' . $id); $info = $http->get('https://m.mafengwo.cn/sales/detail/index/info?id=' . $id); //旅游须知 $know = $info['data']['list']['content'][0]['content'] ?? []; if (isset($info['data']['list']['content'][0]['content']) && is_array($info['data']['list']['content'][0]['content'])) { $know = current(array_filter($info['data']['list']['content'][0]['content'], fn($v) => isset($v['name']) && $v['name'] == '购买须知')); if (isset($know['content']) && is_array($know['content'])) { $know = array_reduce( $know['content'], fn($v1, $v2) => $v1 . (isset($v2['name']) && is_string($v2['name']) ? "

{$v2['name']}

" : '') . (isset($v2['content']) && is_string($v2['content']) ? $v2['content'] : '') ); } } //产品详情 $content = ''; if (isset($info['data']['list']['content'][0]['content']) && is_array($info['data']['list']['content'][0]['content'])) { $content = current(array_filter($info['data']['list']['content'][0]['content'], fn($v) => isset($v['key']) && $v['key'] == 'introduce')); if (isset($content['content']) && is_array($content['content'])) { $content = current(array_filter($content['content'], fn($v) => isset($v['key']) && $v['key'] == 'introduction')); $content = is_string($content['content']) ? $content['content'] : ''; } else { $content = ''; } } //扩展字段 $extends = []; if (isset($info['data']['list']['base']['tags']) && is_array($info['data']['list']['base']['tags'])) { foreach ($info['data']['list']['base']['tags'] as $tag) { $extends['field_0_project'][] = ['name' => $tag, 'num' => '', 'price' => '']; } } CollectProduct::updateOrCreate(['unique_id' => $id, 'site' => 1], [ 'unique_id' => $id, 'site' => 1, 'type' => 0, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项 'title' => mb_substr($info['data']['list']['base']['title'] ?? '', 0, 255), 'price' => $info['data']['list']['base']['price_zhanshi'] ?? 0, 'original_price' => ($info['data']['list']['base']['price_zhanshi'] ?? 0) * 1.58, 'pictures' => $info['data']['list']['base']['imgList'] ?? [], 'stock' => mt_rand(1000, 9999), 'sale' => $info['data']['list']['base']['sold']['num'] ?? 0, 'status' => -2, //-2下架 'know' => $know, 'content' => $content, 'extends' => $extends, 'longitude' => 0, 'latitude' => 0, 'address' => '', ]); $this->line($id . ' 采集完毕!' . PHP_EOL); } } } }