option('import');
if (!empty($supplier_id)) {
$this->import($supplier_id);
$this->line("供应商 $supplier_id 导入完毕");
} else {
// $this->exists_update = false;
$this->tourist_line();
$this->hotel();
$this->scenic();
// $this->db_img_replace();
$this->line('全部采集完毕');
}
return Command::SUCCESS;
}
//导入
private function import($supplier_id)
{
$limit = $this->option('limit');
$cpModel = CollectProduct::query()->orderBy('id', 'desc');
if (!empty($limit)) {
$arr = explode(',', $limit);
$count = count($arr);
if ($count == 1) {
$import_data = $cpModel->limit($arr[0])->get()->toArray();
} else if ($count == 2) {
$import_data = $cpModel->offset($arr[0])->limit($arr[1])->get()->toArray();
} else {
$import_data = $cpModel->get()->toArray();
}
} else {
$import_data = $cpModel->get()->toArray();
}
if (ctype_digit($supplier_id)) {
$ids = [$supplier_id];
} else if ($supplier_id == 'all') {
$ids = Supplier::query()->where('id', '>', 1)->pluck('id');
} else {
return;
}
foreach ($ids as $supplier_id) {
$this->line('正在导入 ' . $supplier_id);
array_walk($import_data, function ($v) use ($supplier_id) {
$v['supplier_id'] = $supplier_id;
unset($v['unique_id'], $v['site']);
Product::updateOrCreate(['supplier_id' => $supplier_id, 'title' => $v['title']], $v);
});
$this->line("导入 $supplier_id 结束");
}
}
//景区采集
private function scenic()
{
$http = Http::withOptions(['verify' => false])->withHeaders(['User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1']);
for ($i=15; $i<20; $i++) {
$this->line('开始采集景区:第 ' . ($i + 1) . ' 页');
$html = $http->get('https://m.mafengwo.cn/jd/10030/gonglve.html?page=' . ($i + 1));
if (preg_match_all('/data-url="(.*?)"/', $html, $matches)) {
if(empty($matches[1])) continue;
foreach ($matches[1] as $url) {
$this->line('开始采集 ' . $url);
$res = $http->get('https://m.mafengwo.cn' . $url);
if (!preg_match('/\/(\d+)\.html/', $url, $match)) continue;
$id = $match[1];
if ($this->exists_update == false && CollectProduct::where(['unique_id' => $id, 'site' => 1])->exists()) {
continue;
}
$inc_sales = $http->post('https://m.mafengwo.cn/poi/poi/inc_sales', ['poiid' => $id]);
preg_match('#¥(\d+)#', $inc_sales['html'] ?? '', $match_price);
preg_match('#
(.*?)
#', $res, $match_title);
preg_match_all('#[\s\S]*?
(\d+)条#', $res, $match_sale);
preg_match('#"row h1" data-jump="ticket">\s*([\w\W]*?)\s* \s*\s*([\w\W]*?)\s*
#s', $res, $match_content);
preg_match_all('#时间:\s*\s*(.*?)\s*#', $res, $match_open_time);
//扩展字段
preg_match('#地址:(.*?)
#', $res, $match_address);
$pos_start = strpos($res, '');
$pos_end = strpos($res, '
', $pos_start);
preg_match_all('#([^<>]*?)#s', substr($res, $pos_start, $pos_end - $pos_start), $match_project);
if (isset($match_project[1]) && is_array($match_project[1])) {
$extends['field_2_project'] = array_map(function($v) {
return [
'name' => $v,
'num' => '',
'price' => '',
];
}, $match_project[1]);
}
$extends['field_2_address'] = $match_address[1] ?? '';
$extends['field_2_open_time'][0]['node'] = '营业时间';
$extends['field_2_open_time'][0]['summer'] = '上岛时间';
$extends['field_2_open_time'][0]['winter'] = '下岛时间';
if (isset($match_open_time[1]) && is_array($match_open_time[1])) {
$extends['field_2_open_time'][1]['node'] = $match_open_time[1][0] ?? '';
$extends['field_2_open_time'][1]['summer'] = $match_open_time[1][1] ?? '';
$extends['field_2_open_time'][1]['winter'] = $match_open_time[1][2] ?? '';
}
$this->save_to_db(['unique_id' => $id, 'site' => 1], [
'unique_id' => $id,
'site' => 1,
'type' => 2, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项
'title' => $match_title[1] ?? '',
'price' => $match_price[1] ?? 0,
'original_price' => ($match_price[1] ?? 0) * 1.58,
'pictures' => $match_pictures[1] ?? [],
'stock' => mt_rand(1000, 9999),
'sale' => $match_sale[1] ?? 0,
'status' => -2, //-2下架
'know' => $match_know[1] ?? '',
'content' => $match_content[1] ?? '',
'extends' => $extends,
'longitude' => 0,
'latitude' => 0,
'address' => $match_address[1] ?? '',
]);
$this->line("采集 $url 完毕" . PHP_EOL);
}
}
$this->line('第 ' . ($i + 1) . ' 页采集结束' . PHP_EOL);
}
}
//酒店采集
private function hotel()
{
$http = Http::withOptions(['verify' => false]);
$mddid = 10030; //10030==三亚
for ($i=0; $i<10; $i++) {
$this->line('开始采集酒店:第 ' . ($i + 1) . ' 页');
$data = $http->get('https://m.mafengwo.cn/rest/hotel/hotels/', [
'filter' => [
'mddid' => $mddid
],
'page' => [
'mode' => 'sequential',
'boundary' => $i * 20, //分页参数
'num' => 20
],
]);
if (empty($data['data']['list'])) {
continue;
}
foreach ($data['data']['list'] as $v) {
if (empty($v['id'])) continue;
if ($this->exists_update == false && CollectProduct::where(['unique_id' => $v['id'], 'site' => 1])->exists()) {
continue;
}
$this->line('采集详情:' . $v['id']);
//基本信息
$params = [
'_ts' => time() . '123',
'hotel_id' => (string)$v['id'],
'lat' => '',
'lng' => '',
'rmdd_id' => (string)$mddid,
];
$params['_sn'] = $this->_sn($params);
$data = $http->get('https://m.mafengwo.cn/hservice/detail/info/base_info', $params);
if (empty($data['data']['info'])) {
continue;
}
$base_info = $data['data']['info'];
//酒店详情
$data = $http->get('https://m.mafengwo.cn/hservice/detail/info/guide_info', ['hotel_id' => $v['id']]);
$guide_info = $data['data']['info'] ?? [];
//旅游须知
$know = "入住时间:" . (!empty($guide_info['check_in']['title']) ? $guide_info['check_in']['title'] : '') . "
";
$know .= "离店时间:" . (!empty($guide_info['check_out']['title']) ? $guide_info['check_out']['title'] : '') . "
";
$know .= '' . array_reduce($base_info['facility_sort'] ?? [], fn($v1, $v2) => $v1 . $v2['title'] ?? '') . '
';
//扩展字段
$extends['field_1_tags'] = array_map(fn($v) => $v['title'] ?? '', $guide_info['facility'] ?? []);
$extends['field_1_name'] = $base_info['name'];
$extends['field_1_address'] = $base_info['address'];
$extends['field_1_latitude'] = $base_info['lat'];
$extends['field_1_longitude'] = $base_info['lng'];
$this->save_to_db(['unique_id' => $v['id'], 'site' => 1], [
'unique_id' => $v['id'],
'site' => 1,
'type' => 1, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项
'title' => mb_substr($base_info['name'] ?? '' . $base_info['level'] ?? '', 0, 255),
'price' => mt_rand(150, 350),
'original_price' => mt_rand(350, 550),
'pictures' => array_map(fn($v) => $v['url'] ?? '', $base_info['album'] ?? []) ?? [],
'stock' => mt_rand(1000, 9999),
'sale' => $base_info['num_collect'] ?? 0,
'status' => -2, //-2下架
'know' => $know,
'content' => $guide_info['intro'] ?? '',
'extends' => $extends,
'longitude' => $base_info['lng'] ?? 0,
'latitude' => $base_info['lat'] ?? 0,
'address' => $base_info['address'] ?? '',
]);
$this->line("{$v['id']} 采集结束" . PHP_EOL);
}
$this->line('第 ' . ($i + 1) . ' 页采集结束' . PHP_EOL);
}
}
//酒店详情计算_sn
private function _sn($params): string
{
ksort($params);
return substr(md5(json_encode($params) . 'c9d6618dbc657b41a66eb0af952906f1'), 2, 10);
}
//旅游线路采集
private function tourist_line()
{
$http = Http::withOptions(['verify' => false]);
for($i=0; $i<10; $i++) {
$this->line('开始采集旅游线路:第 ' . ($i + 1) . ' 页');
$data = $http->get('https://m.mafengwo.cn/sales/ajax.php', [
'sF' => 'search_new_list',
'offset' => $i * 10, //分页参数
]);
if (empty($data['data'])) {
continue;
}
$data = $data['data'];
preg_match_all('/exists_update == false && CollectProduct::where(['unique_id' => $id, 'site' => 1])->exists()) {
continue;
}
$this->line('开始采集:' . $id);
$info = $http->get('https://m.mafengwo.cn/sales/detail/index/info?id=' . $id);
//旅游须知
$know = $info['data']['list']['content'][0]['content'] ?? [];
if (isset($info['data']['list']['content'][0]['content']) && is_array($info['data']['list']['content'][0]['content'])) {
$know = current(array_filter($info['data']['list']['content'][0]['content'], fn($v) => isset($v['name']) && $v['name'] == '购买须知'));
if (isset($know['content']) && is_array($know['content'])) {
$know = array_reduce(
$know['content'], fn($v1, $v2) => $v1 .
(isset($v2['name']) && is_string($v2['name']) ? "{$v2['name']}
" : '') .
(isset($v2['content']) && is_string($v2['content']) ? $v2['content'] : '')
);
}
}
//产品详情
$content = '';
if (isset($info['data']['list']['content'][0]['content']) && is_array($info['data']['list']['content'][0]['content'])) {
$content = current(array_filter($info['data']['list']['content'][0]['content'], fn($v) => isset($v['key']) && $v['key'] == 'introduce'));
if (isset($content['content']) && is_array($content['content'])) {
$content = current(array_filter($content['content'], fn($v) => isset($v['key']) && $v['key'] == 'introduction'));
$content = is_string($content['content']) ? $content['content'] : '';
} else {
$content = '';
}
}
//扩展字段
$extends = [];
if (isset($info['data']['list']['base']['tags']) && is_array($info['data']['list']['base']['tags'])) {
foreach ($info['data']['list']['base']['tags'] as $tag) {
$extends['field_0_project'][] = ['name' => $tag, 'num' => '', 'price' => ''];
}
}
$this->save_to_db(['unique_id' => $id, 'site' => 1], [
'unique_id' => $id,
'site' => 1,
'type' => 0, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项
'title' => mb_substr($info['data']['list']['base']['title'] ?? '', 0, 255),
'price' => $info['data']['list']['base']['price_zhanshi'] ?? 0,
'original_price' => ($info['data']['list']['base']['price_zhanshi'] ?? 0) * 1.58,
'pictures' => $info['data']['list']['base']['imgList'] ?? [],
'stock' => mt_rand(1000, 9999),
'sale' => $info['data']['list']['base']['sold']['num'] ?? 0,
'status' => -2, //-2下架
'know' => $know,
'content' => $content,
'extends' => $extends,
'longitude' => 0,
'latitude' => 0,
'address' => '',
]);
$this->line($id . ' 采集完毕!' . PHP_EOL);
}
}
}
//保存到数据库
private function save_to_db($unique_flag, $data)
{
$this->search_img($data);
CollectProduct::updateOrCreate($unique_flag, $data);
}
//搜索数据中的数据,有图片则下载并规格数据
private function db_img_replace()
{
$all = CollectProduct::all(['id', 'pictures', 'know', 'content']);
foreach ($all as $model) {
$this->line('当前处理数据:' . $model->id);
$data = $model->toArray();
$this->search_img($data);
$model->pictures = $data['pictures'];
$model->know = $data['know'];
$model->content = $data['content'];
$model->save();
}
}
//从数据中搜索图片并下载
private function search_img(&$data)
{
$img_host = PHP_OS == 'WINNT' ? 'https://yytx.eugyl.com/storage/' : env('APP_URL') . '/storage/';
//保存产品图片集
if (!empty($data['pictures']) && is_array($data['pictures'])) {
foreach ($data['pictures'] as &$url) {
if (substr($url, 0, 4) != 'http' || str_contains($url, $img_host)) continue;
$this->line('正在下载图片:' . $url);
$src = $this->download_img($url);
if (!$src) continue;
$url = $src;
}
}
//保存富文本内的图片
foreach ($data as $key => &$rich_text) {
//只替换know和content字段
if ($key == 'know' || $key == 'content') {
//删除href链接
$rich_text = preg_replace('#|#', '', $rich_text);
if (!empty($rich_text) && preg_match_all('/
/i', $rich_text, $image_url) && !empty($image_url[2]) && is_array($image_url[2])) {
foreach (array_unique($image_url[2]) as $url) {
if (substr($url, 0, 4) != 'http' || str_contains($url, $img_host)) continue;
$this->line('正在下载图片:' . $url);
$src = $this->download_img($url);
if (!$src) continue;
$rich_text = str_replace([$url, ' data-src'], [$img_host . $src, ' src'], $rich_text);
}
}
}
}
}
//下载图片
private function download_img($url): string
{
$file_info = pathinfo($url);
$ext = 'jpg';
if (!empty($file_info['extension'])) {
$pos = strpos($file_info['extension'], '?');
$ext = $pos ? substr($file_info['extension'], 0, $pos) : $file_info['extension'];
}
try {
$raw = file_get_contents($url);
} catch (\Exception $e) {
return '';
}
if (empty($raw)) {
return '';
}
$md5 = md5($raw);
$filename = 'collect/images/' . date('Y-m-d') . '/' . $md5 . '.' . $ext;
if (file_exists(storage_path($filename)) || Storage::put('public/' . $filename, $raw)) {
return $filename;
} else {
return '';
}
}
}