<?php $url = 'https://site.ru/type/news'; require_once(MODX_CORE_PATH . 'components/simplehtmldom/simple-html-dom.php'); $html = file_get_html($url); $teasers = $html->find('.teaser'); $news = []; foreach ($teasers as $teaser) { $link = $teaser->find('a', 0); $date = $teaser->find('.date', 0); $parser = date_create_from_format('d.m.y', trim($date->plaintext)); $publishedon = date_format($parser, 'Y-m-d'); $news[] = ['link' => 'https://site.ru' . $link->href, 'publishedon' => $publishedon, 'pagetitle' => $link->plaintext]; } foreach ($news as $page) { $page['parent'] = 15; $html = file_get_html($page['link']); $content = $html->find('.body', 0); $imgs = $content->find('img'); foreach ($imgs as $img) { if (strpos($img->src, 'http') !== false) { $url = $img->src; } else { $url = 'https://site.ru' . $img->src; } $name = md5(uniqid()) . '.' . pathinfo($url, PATHINFO_EXTENSION); $path = MODX_BASE_PATH . 'assets/images/archive/' . $name; file_put_contents($path, file_get_contents($url)); $img->src = '/assets/images/archive/' . $name; if (!isset($page['img']) || !$page['img']) { $page['tv1'] = $img->src; } } if ($olds = $content->find('.field-field-old-filename',0)) { $olds->outertext = ''; } $page['content'] = $content->innertext; if (!$res = $modx->getObject('modResource', ['parent' => $page['parent'], 'pagetitle' => $page['pagetitle'], 'publishedon' => strtotime($page['publishedon'])])) { if ($count = $modx->getCount('modResource', ['parent' => $page['parent'], 'pagetitle' => $page['pagetitle']])) { $page['alias'] = $page['pagetitle'] . '-' . $count; print $page['alias'] . ' - '; } $response = $modx->runProcessor('resource/create', $page); if ($response->isError()) { echo '<p>' . $page['pagetitle'] . '</p>'; return $modx->error->failure($response->getMessage()); } else { echo '<p>Resource created</p>'; } } else { echo '<p>Resource found</p>'; } }
Парсинг новостей с картинками для сайта
17 июня 2019, 16:07
А это снипет или плагин создавать?
Если плагин, то прошу инструкцию на какое событие