海南旅游SAAS
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

456 lines
15 KiB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
  1. <?php
  2. namespace App\Console\Commands;
  3. use App\Models\CollectProduct;
  4. use App\Models\Product;
  5. use App\Models\Supplier;
  6. use Illuminate\Console\Command;
  7. use Illuminate\Support\Facades\Http;
  8. use Illuminate\Support\Facades\Storage;
  9. class Collector extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. *
  14. * @var string
  15. */
  16. protected $signature = 'collector {--import=} {--limit=}'; //php artisan collector --import=$supplier_id
  17. /**
  18. * The console command description.
  19. *
  20. * @var string
  21. */
  22. protected $description = '马蜂窝产品采集';
  23. /**
  24. * 如果已经存在是否要更新,true更新,false不更新
  25. * @var bool
  26. */
  27. private bool $exists_update = true;
  28. /**
  29. * Create a new command instance.
  30. *
  31. * @return void
  32. */
  33. public function __construct()
  34. {
  35. parent::__construct();
  36. }
  37. /**
  38. * Execute the console command.
  39. *
  40. * @return int
  41. */
  42. public function handle()
  43. {
  44. $supplier_id = $this->option('import');
  45. if (!empty($supplier_id)) {
  46. $this->import($supplier_id);
  47. $this->line("供应商 $supplier_id 导入完毕");
  48. } else {
  49. // $this->exists_update = false;
  50. $this->tourist_line();
  51. $this->hotel();
  52. $this->scenic();
  53. // $this->db_img_replace();
  54. $this->line('全部采集完毕');
  55. }
  56. return Command::SUCCESS;
  57. }
  58. //导入
  59. private function import($supplier_id)
  60. {
  61. $limit = $this->option('limit');
  62. $cpModel = CollectProduct::query()->orderBy('id', 'desc');
  63. if (!empty($limit)) {
  64. $arr = explode(',', $limit);
  65. $count = count($arr);
  66. if ($count == 1) {
  67. $import_data = $cpModel->limit($arr[0])->get()->toArray();
  68. } else if ($count == 2) {
  69. $import_data = $cpModel->offset($arr[0])->limit($arr[1])->get()->toArray();
  70. } else {
  71. $import_data = $cpModel->get()->toArray();
  72. }
  73. } else {
  74. $import_data = $cpModel->get()->toArray();
  75. }
  76. if (ctype_digit($supplier_id)) {
  77. $ids = [$supplier_id];
  78. } else if ($supplier_id == 'all') {
  79. $ids = Supplier::query()->where('id', '>', 1)->pluck('id');
  80. } else {
  81. return;
  82. }
  83. foreach ($ids as $supplier_id) {
  84. $this->line('正在导入 ' . $supplier_id);
  85. array_walk($import_data, function ($v) use ($supplier_id) {
  86. $v['supplier_id'] = $supplier_id;
  87. unset($v['unique_id'], $v['site']);
  88. Product::updateOrCreate(['supplier_id' => $supplier_id, 'title' => $v['title']], $v);
  89. });
  90. $this->line("导入 $supplier_id 结束");
  91. }
  92. }
  93. //景区采集
  94. private function scenic()
  95. {
  96. $http = Http::withOptions(['verify' => false])->withHeaders(['User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1']);
  97. for ($i=15; $i<20; $i++) {
  98. $this->line('开始采集景区:第 ' . ($i + 1) . ' 页');
  99. $html = $http->get('https://m.mafengwo.cn/jd/10030/gonglve.html?page=' . ($i + 1));
  100. if (preg_match_all('/data-url="(.*?)"/', $html, $matches)) {
  101. if(empty($matches[1])) continue;
  102. foreach ($matches[1] as $url) {
  103. $this->line('开始采集 ' . $url);
  104. $res = $http->get('https://m.mafengwo.cn' . $url);
  105. if (!preg_match('/\/(\d+)\.html/', $url, $match)) continue;
  106. $id = $match[1];
  107. if ($this->exists_update == false && CollectProduct::where(['unique_id' => $id, 'site' => 1])->exists()) {
  108. continue;
  109. }
  110. $inc_sales = $http->post('https://m.mafengwo.cn/poi/poi/inc_sales', ['poiid' => $id]);
  111. preg_match('#<strong><em>¥</em>(\d+)</strong>#', $inc_sales['html'] ?? '', $match_price);
  112. preg_match('#<h1>(.*?)</h1>#', $res, $match_title);
  113. preg_match_all('#<div class="swiper-slide">[\s\S]*?<img src="(.*?)"#', $res, $match_pictures);
  114. preg_match('#蜂蜂点评<br/><strong>(\d+)条#', $res, $match_sale);
  115. preg_match('#"row h1" data-jump="ticket">\s*([\w\W]*?)\s*</div>\s*<div class="row h1"#s', $res, $match_know);
  116. preg_match('#<div class="desc" style="display: none;">\s*([\w\W]*?)\s*</div>#s', $res, $match_content);
  117. preg_match_all('#时间:</dt>\s*<dd>\s*(.*?)\s*</dd>#', $res, $match_open_time);
  118. //扩展字段
  119. preg_match('#<p class="t2">地址:(.*?)<i></i></p>#', $res, $match_address);
  120. $pos_start = strpos($res, '<div class="keywords">');
  121. $pos_end = strpos($res, '</div>', $pos_start);
  122. preg_match_all('#<span>([^<>]*?)</span>#s', substr($res, $pos_start, $pos_end - $pos_start), $match_project);
  123. if (isset($match_project[1]) && is_array($match_project[1])) {
  124. $extends['field_2_project'] = array_map(function($v) {
  125. return [
  126. 'name' => $v,
  127. 'num' => '',
  128. 'price' => '',
  129. ];
  130. }, $match_project[1]);
  131. }
  132. $extends['field_2_address'] = $match_address[1] ?? '';
  133. $extends['field_2_open_time'][0]['node'] = '营业时间';
  134. $extends['field_2_open_time'][0]['summer'] = '上岛时间';
  135. $extends['field_2_open_time'][0]['winter'] = '下岛时间';
  136. if (isset($match_open_time[1]) && is_array($match_open_time[1])) {
  137. $extends['field_2_open_time'][1]['node'] = $match_open_time[1][0] ?? '';
  138. $extends['field_2_open_time'][1]['summer'] = $match_open_time[1][1] ?? '';
  139. $extends['field_2_open_time'][1]['winter'] = $match_open_time[1][2] ?? '';
  140. }
  141. $this->save_to_db(['unique_id' => $id, 'site' => 1], [
  142. 'unique_id' => $id,
  143. 'site' => 1,
  144. 'type' => 2, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项
  145. 'title' => $match_title[1] ?? '',
  146. 'price' => $match_price[1] ?? 0,
  147. 'original_price' => ($match_price[1] ?? 0) * 1.58,
  148. 'pictures' => $match_pictures[1] ?? [],
  149. 'stock' => mt_rand(1000, 9999),
  150. 'sale' => $match_sale[1] ?? 0,
  151. 'status' => -2, //-2下架
  152. 'know' => $match_know[1] ?? '',
  153. 'content' => $match_content[1] ?? '',
  154. 'extends' => $extends,
  155. 'longitude' => 0,
  156. 'latitude' => 0,
  157. 'address' => $match_address[1] ?? '',
  158. ]);
  159. $this->line("采集 $url 完毕" . PHP_EOL);
  160. }
  161. }
  162. $this->line('第 ' . ($i + 1) . ' 页采集结束' . PHP_EOL);
  163. }
  164. }
  165. //酒店采集
  166. private function hotel()
  167. {
  168. $http = Http::withOptions(['verify' => false]);
  169. $mddid = 10030; //10030==三亚
  170. for ($i=0; $i<10; $i++) {
  171. $this->line('开始采集酒店:第 ' . ($i + 1) . ' 页');
  172. $data = $http->get('https://m.mafengwo.cn/rest/hotel/hotels/', [
  173. 'filter' => [
  174. 'mddid' => $mddid
  175. ],
  176. 'page' => [
  177. 'mode' => 'sequential',
  178. 'boundary' => $i * 20, //分页参数
  179. 'num' => 20
  180. ],
  181. ]);
  182. if (empty($data['data']['list'])) {
  183. continue;
  184. }
  185. foreach ($data['data']['list'] as $v) {
  186. if (empty($v['id'])) continue;
  187. if ($this->exists_update == false && CollectProduct::where(['unique_id' => $v['id'], 'site' => 1])->exists()) {
  188. continue;
  189. }
  190. $this->line('采集详情:' . $v['id']);
  191. //基本信息
  192. $params = [
  193. '_ts' => time() . '123',
  194. 'hotel_id' => (string)$v['id'],
  195. 'lat' => '',
  196. 'lng' => '',
  197. 'rmdd_id' => (string)$mddid,
  198. ];
  199. $params['_sn'] = $this->_sn($params);
  200. $data = $http->get('https://m.mafengwo.cn/hservice/detail/info/base_info', $params);
  201. if (empty($data['data']['info'])) {
  202. continue;
  203. }
  204. $base_info = $data['data']['info'];
  205. //酒店详情
  206. $data = $http->get('https://m.mafengwo.cn/hservice/detail/info/guide_info', ['hotel_id' => $v['id']]);
  207. $guide_info = $data['data']['info'] ?? [];
  208. //旅游须知
  209. $know = "<p>入住时间:" . (!empty($guide_info['check_in']['title']) ? $guide_info['check_in']['title'] : '') . "</p>";
  210. $know .= "<p>离店时间:" . (!empty($guide_info['check_out']['title']) ? $guide_info['check_out']['title'] : '') . "</p>";
  211. $know .= '<p>' . array_reduce($base_info['facility_sort'] ?? [], fn($v1, $v2) => $v1 . $v2['title'] ?? '') . '</p>';
  212. //扩展字段
  213. $extends['field_1_tags'] = array_map(fn($v) => $v['title'] ?? '', $guide_info['facility'] ?? []);
  214. $extends['field_1_name'] = $base_info['name'];
  215. $extends['field_1_address'] = $base_info['address'];
  216. $extends['field_1_latitude'] = $base_info['lat'];
  217. $extends['field_1_longitude'] = $base_info['lng'];
  218. $this->save_to_db(['unique_id' => $v['id'], 'site' => 1], [
  219. 'unique_id' => $v['id'],
  220. 'site' => 1,
  221. 'type' => 1, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项
  222. 'title' => mb_substr($base_info['name'] ?? '' . $base_info['level'] ?? '', 0, 255),
  223. 'price' => mt_rand(150, 350),
  224. 'original_price' => mt_rand(350, 550),
  225. 'pictures' => array_map(fn($v) => $v['url'] ?? '', $base_info['album'] ?? []) ?? [],
  226. 'stock' => mt_rand(1000, 9999),
  227. 'sale' => $base_info['num_collect'] ?? 0,
  228. 'status' => -2, //-2下架
  229. 'know' => $know,
  230. 'content' => $guide_info['intro'] ?? '',
  231. 'extends' => $extends,
  232. 'longitude' => $base_info['lng'] ?? 0,
  233. 'latitude' => $base_info['lat'] ?? 0,
  234. 'address' => $base_info['address'] ?? '',
  235. ]);
  236. $this->line("{$v['id']} 采集结束" . PHP_EOL);
  237. }
  238. $this->line('第 ' . ($i + 1) . ' 页采集结束' . PHP_EOL);
  239. }
  240. }
  241. //酒店详情计算_sn
  242. private function _sn($params): string
  243. {
  244. ksort($params);
  245. return substr(md5(json_encode($params) . 'c9d6618dbc657b41a66eb0af952906f1'), 2, 10);
  246. }
  247. //旅游线路采集
  248. private function tourist_line()
  249. {
  250. $http = Http::withOptions(['verify' => false]);
  251. for($i=0; $i<10; $i++) {
  252. $this->line('开始采集旅游线路:第 ' . ($i + 1) . ' 页');
  253. $data = $http->get('https://m.mafengwo.cn/sales/ajax.php', [
  254. 'sF' => 'search_new_list',
  255. 'offset' => $i * 10, //分页参数
  256. ]);
  257. if (empty($data['data'])) {
  258. continue;
  259. }
  260. $data = $data['data'];
  261. preg_match_all('/<a href="\/sales\/(\d+)\.html"/', $data, $matches);
  262. if (empty($matches[1])) continue;
  263. foreach ($matches[1] as $id) {
  264. if ($this->exists_update == false && CollectProduct::where(['unique_id' => $id, 'site' => 1])->exists()) {
  265. continue;
  266. }
  267. $this->line('开始采集:' . $id);
  268. $info = $http->get('https://m.mafengwo.cn/sales/detail/index/info?id=' . $id);
  269. //旅游须知
  270. $know = $info['data']['list']['content'][0]['content'] ?? [];
  271. if (isset($info['data']['list']['content'][0]['content']) && is_array($info['data']['list']['content'][0]['content'])) {
  272. $know = current(array_filter($info['data']['list']['content'][0]['content'], fn($v) => isset($v['name']) && $v['name'] == '购买须知'));
  273. if (isset($know['content']) && is_array($know['content'])) {
  274. $know = array_reduce(
  275. $know['content'], fn($v1, $v2) => $v1 .
  276. (isset($v2['name']) && is_string($v2['name']) ? "<h3>{$v2['name']}</h3>" : '') .
  277. (isset($v2['content']) && is_string($v2['content']) ? $v2['content'] : '')
  278. );
  279. }
  280. }
  281. //产品详情
  282. $content = '';
  283. if (isset($info['data']['list']['content'][0]['content']) && is_array($info['data']['list']['content'][0]['content'])) {
  284. $content = current(array_filter($info['data']['list']['content'][0]['content'], fn($v) => isset($v['key']) && $v['key'] == 'introduce'));
  285. if (isset($content['content']) && is_array($content['content'])) {
  286. $content = current(array_filter($content['content'], fn($v) => isset($v['key']) && $v['key'] == 'introduction'));
  287. $content = is_string($content['content']) ? $content['content'] : '';
  288. } else {
  289. $content = '';
  290. }
  291. }
  292. //扩展字段
  293. $extends = [];
  294. if (isset($info['data']['list']['base']['tags']) && is_array($info['data']['list']['base']['tags'])) {
  295. foreach ($info['data']['list']['base']['tags'] as $tag) {
  296. $extends['field_0_project'][] = ['name' => $tag, 'num' => '', 'price' => ''];
  297. }
  298. }
  299. $this->save_to_db(['unique_id' => $id, 'site' => 1], [
  300. 'unique_id' => $id,
  301. 'site' => 1,
  302. 'type' => 0, //0:旅游线路、1:酒店、2:景区、3:餐厅、4:车队、5:单项
  303. 'title' => mb_substr($info['data']['list']['base']['title'] ?? '', 0, 255),
  304. 'price' => $info['data']['list']['base']['price_zhanshi'] ?? 0,
  305. 'original_price' => ($info['data']['list']['base']['price_zhanshi'] ?? 0) * 1.58,
  306. 'pictures' => $info['data']['list']['base']['imgList'] ?? [],
  307. 'stock' => mt_rand(1000, 9999),
  308. 'sale' => $info['data']['list']['base']['sold']['num'] ?? 0,
  309. 'status' => -2, //-2下架
  310. 'know' => $know,
  311. 'content' => $content,
  312. 'extends' => $extends,
  313. 'longitude' => 0,
  314. 'latitude' => 0,
  315. 'address' => '',
  316. ]);
  317. $this->line($id . ' 采集完毕!' . PHP_EOL);
  318. }
  319. }
  320. }
  321. //保存到数据库
  322. private function save_to_db($unique_flag, $data)
  323. {
  324. $this->search_img($data);
  325. CollectProduct::updateOrCreate($unique_flag, $data);
  326. }
  327. //搜索数据中的数据,有图片则下载并规格数据
  328. private function db_img_replace()
  329. {
  330. $all = CollectProduct::all(['id', 'pictures', 'know', 'content']);
  331. foreach ($all as $model) {
  332. $this->line('当前处理数据:' . $model->id);
  333. $data = $model->toArray();
  334. $this->search_img($data);
  335. $model->pictures = $data['pictures'];
  336. $model->know = $data['know'];
  337. $model->content = $data['content'];
  338. $model->save();
  339. }
  340. }
  341. //从数据中搜索图片并下载
  342. private function search_img(&$data)
  343. {
  344. $img_host = PHP_OS == 'WINNT' ? 'https://yytx.eugyl.com/storage/' : env('APP_URL') . '/storage/';
  345. //保存产品图片集
  346. if (!empty($data['pictures']) && is_array($data['pictures'])) {
  347. foreach ($data['pictures'] as &$url) {
  348. if (substr($url, 0, 4) != 'http' || str_contains($url, $img_host)) continue;
  349. $this->line('正在下载图片:' . $url);
  350. $src = $this->download_img($url);
  351. if (!$src) continue;
  352. $url = $src;
  353. }
  354. }
  355. //保存富文本内的图片
  356. foreach ($data as $key => &$rich_text) {
  357. //只替换know和content字段
  358. if ($key == 'know' || $key == 'content') {
  359. //删除href链接
  360. $rich_text = preg_replace('#<a.*?>|</a>#', '', $rich_text);
  361. if (!empty($rich_text) && preg_match_all('/<img\s+.*?src=([\'"])(.*?)\1.*?>/i', $rich_text, $image_url) && !empty($image_url[2]) && is_array($image_url[2])) {
  362. foreach (array_unique($image_url[2]) as $url) {
  363. if (substr($url, 0, 4) != 'http' || str_contains($url, $img_host)) continue;
  364. $this->line('正在下载图片:' . $url);
  365. $src = $this->download_img($url);
  366. if (!$src) continue;
  367. $rich_text = str_replace([$url, ' data-src'], [$img_host . $src, ' src'], $rich_text);
  368. }
  369. }
  370. }
  371. }
  372. }
  373. //下载图片
  374. private function download_img($url): string
  375. {
  376. $file_info = pathinfo($url);
  377. $ext = 'jpg';
  378. if (!empty($file_info['extension'])) {
  379. $pos = strpos($file_info['extension'], '?');
  380. $ext = $pos ? substr($file_info['extension'], 0, $pos) : $file_info['extension'];
  381. }
  382. try {
  383. $raw = file_get_contents($url);
  384. } catch (\Exception $e) {
  385. return '';
  386. }
  387. if (empty($raw)) {
  388. return '';
  389. }
  390. $md5 = md5($raw);
  391. $filename = 'collect/images/' . date('Y-m-d') . '/' . $md5 . '.' . $ext;
  392. if (file_exists(storage_path($filename)) || Storage::put('public/' . $filename, $raw)) {
  393. return $filename;
  394. } else {
  395. return '';
  396. }
  397. }
  398. }