<?php
$url = 'https://site.ru/type/news';
require_once(MODX_CORE_PATH . 'components/simplehtmldom/simple-html-dom.php');
$html = file_get_html($url);
$teasers = $html->find('.teaser');
$news = [];
foreach ($teasers as $teaser) {
$link = $teaser->find('a', 0);
$date = $teaser->find('.date', 0);
$parser = date_create_from_format('d.m.y', trim($date->plaintext));
$publishedon = date_format($parser, 'Y-m-d');
$news[] = ['link' => 'https://site.ru' . $link->href, 'publishedon' => $publishedon, 'pagetitle' => $link->plaintext];
}
foreach ($news as $page) {
$page['parent'] = 15;
$html = file_get_html($page['link']);
$content = $html->find('.body', 0);
$imgs = $content->find('img');
foreach ($imgs as $img) {
if (strpos($img->src, 'http') !== false) {
$url = $img->src;
} else {
$url = 'https://site.ru' . $img->src;
}
$name = md5(uniqid()) . '.' . pathinfo($url, PATHINFO_EXTENSION);
$path = MODX_BASE_PATH . 'assets/images/archive/' . $name;
file_put_contents($path, file_get_contents($url));
$img->src = '/assets/images/archive/' . $name;
if (!isset($page['img']) || !$page['img']) {
$page['tv1'] = $img->src;
}
}
if ($olds = $content->find('.field-field-old-filename',0)) {
$olds->outertext = '';
}
$page['content'] = $content->innertext;
if (!$res = $modx->getObject('modResource', ['parent' => $page['parent'], 'pagetitle' => $page['pagetitle'], 'publishedon' => strtotime($page['publishedon'])])) {
if ($count = $modx->getCount('modResource', ['parent' => $page['parent'], 'pagetitle' => $page['pagetitle']])) {
$page['alias'] = $page['pagetitle'] . '-' . $count;
print $page['alias'] . ' - ';
}
$response = $modx->runProcessor('resource/create', $page);
if ($response->isError()) {
echo '<p>' . $page['pagetitle'] . '</p>';
return $modx->error->failure($response->getMessage());
} else {
echo '<p>Resource created</p>';
}
} else {
echo '<p>Resource found</p>';
}
}
Парсинг новостей с картинками для сайта
17 июня 2019, 16:07
Объектная
А это снипет или плагин создавать?
Если плагин, то прошу инструкцию на какое событие