<?php

namespace ExternalImporter\application\components;

defined('\ABSPATH') || exit;

use ExternalImporter\application\helpers\TextHelper;
use ExternalImporter\application\admin\ExtractorApi;
use ExternalImporter\application\components\ParserTask;
use ExternalImporter\application\exceptions\ParserWarning;
use ExternalImporter\application\models\TaskModel;
use ExternalImporter\application\helpers\ParserHelper;
use ExternalImporter\application\Plugin;

/**
 * CatalogProcessor class file
 *
 * @author keywordrush.com <support@keywordrush.com>
 * @link https://www.keywordrush.com
 * @copyright Copyright &copy; 2025 keywordrush.com
 */
class TaskProcessor
{

    const STOP_TASK_ERROR_COUNT = 5;

    protected $init_data;

    public function __construct(array $init_data)
    {
        if (empty($init_data['url']) && empty($init_data['urls']))
        {
            throw new \InvalidArgumentException('Missing input: specify a listing URL or at least one product URL.');
        }

        if (isset($init_data['url']) && !TextHelper::isValidUrl($init_data['url']))
        {
            throw new \InvalidArgumentException(
                __('Invalid listing URL. Check the address and try again.', 'external-importer')
            );
        }

        if (isset($init_data['urls']))
        {
            if (!$init_data['urls'] = self::prepareProductUrls($init_data['urls']))
            {
                throw new \InvalidArgumentException(
                    __('Invalid product URL(s). Check the addresses and try again.', 'external-importer')
                );
            }
        }

        $this->init_data = $init_data;
    }

    public function run()
    {
        if (!$task = TaskModel::model()->getTask($this->init_data))
        {
            $task = $this->createTask();
        }

        $parserTask = $task['data'];

        if ($parserTask->setNextStep() === false)
        {
            ExtractorApi::jsonSuccess(__('Parsing complete — all products processed.', 'external-importer'));
        }

        if ($parserTask->isLimitProductReached())
        {
            ExtractorApi::jsonSuccess(__('Configured product limit reached.', 'external-importer'));
        }

        if ($parserTask->getStep() == ParserTask::STEP_PARSE_LISING)
            $this->parseListing($task);
        elseif ($parserTask->getStep() == ParserTask::STEP_PARSE_PRODUCTS)
            $this->parseProducts($task);
        else
            throw new \Exception("Unknown task step.");
    }

    private function parseListing(array $task)
    {
        $parserTask = $task['data'];
        if (!$url = $parserTask->getNextPaginationUri())
            throw new \Exception("No pagination URL found.");

        $pagination_count = $parserTask->getPaginationCount();

        $error = '';
        try
        {
            $listing = ParserHelper::parseListing($url);
        }
        catch (\Exception $e)
        {
            $error = $e->getMessage();
        }

        if ($parsed = $parserTask->getPaginationParsedCount())
            $message = sprintf(__('Page #%d:', 'external-importer'), $parsed + 1) . ' ';
        else
            $message = '';

        if ($error)
        {
            // update task
            $parserTask->setPaginationStatusError();
            TaskModel::model()->save($task);

            $message = wp_kses(
                sprintf(
                    /* translators: 1: listing URL, 2: error detail */
                    '%1$s <a href="%2$s" target="_blank" rel="noopener noreferrer">%3$s</a>. <em>%4$s</em>.',
                    esc_html__('Unable to parse the', 'external-importer'),
                    esc_url($url),
                    esc_html__('listing URL ↗', 'external-importer'),
                    esc_html(is_scalar($error) ? (string) $error : wp_json_encode($error))
                ),
                array(
                    'a'  => array(
                        'href'   => true,
                        'target' => true,
                        'rel'    => true,
                    ),
                    'em' => array(),
                )
            );
            $this->maybeStopTask($parserTask, $message);
        }

        // success
        $parserTask->setPaginationStatusSuccess();
        $parserTask->addLinks($listing->links);

        if (isset($task['init_data']['automatic_pagination']) && $task['init_data']['automatic_pagination'])
            $parserTask->addPagination($listing->pagination);

        TaskModel::model()->save($task);

        $product_count = is_countable($listing->links) ? count($listing->links) : 0;

        $pagination_note = '';
        if (! empty($task['init_data']['automatic_pagination']))
        {
            $found = max(0, (int) $parserTask->getPaginationCount() - (int) $pagination_count);
            $pagination_note = ' · ' . sprintf(
                esc_html(_n('%d listing URL discovered', '%d listing URLs discovered', $found, 'external-importer')),
                $found
            );
        }

        $message .= wp_kses(
            sprintf(
                /* translators: 1: "Parsed", 2: listing link, 3: product URLs found text, 4: optional pagination note */
                '%1$s %2$s — %3$s%4$s.',
                esc_html__('Parsed', 'external-importer'),
                sprintf(
                    '<a href="%s" target="_blank" rel="noopener noreferrer">%s</a>',
                    esc_url($url),
                    esc_html__('listing URL ↗', 'external-importer')
                ),
                sprintf(
                    esc_html(_n('%d product URL found', '%d product URLs found', $product_count, 'external-importer')),
                    $product_count
                ),
                $pagination_note
            ),
            array(
                'a' => array(
                    'href'   => true,
                    'target' => true,
                    'rel'    => true,
                ),
            )
        );
        $return['products'] = array();
        $return['cmd'] = 'next';
        $return['log'] = array(
            'message' => $message,
            'type' => 'info',
        );

        if (Plugin::isDevEnvironment())
        {
            $return['debug'] = 'Used parsers for listing: ' . join(', ', ParserHelper::getLastExtractor()->getLastUsedParsers());
            $return['debug'] .= "\r\n";
            $return['debug'] .= "Product URLs:\r\n";
            $return['debug'] .= join("\r\n", $listing->links);
            $return['debug'] .= "\r\n";
            $return['debug'] .= "Listing URLs:\r\n";
            $return['debug'] .= join("\r\n", $listing->pagination);
        }

        $return['parsers'] = ParserHelper::getLastExtractor()->getLastUsedParsers();
        $return['provider'] = ParserHelper::getLastExtractor()->getLastUsedProvider();

        ExtractorApi::formatJsonData($return, $parserTask);
    }

    private function parseProducts(array $task)
    {
        $parserTask = $task['data'];
        if (!$url = $parserTask->getNextLinkUri())
        {
            throw new \Exception("No product URL found.");
        }

        $error = '';
        $warning = '';
        try
        {
            $product = ParserHelper::maybeParseProduct($url);
        }
        catch (ParserWarning $e)
        {
            $warning = $e->getMessage();
            $error_code = $e->getCode();
        }
        catch (\Exception $e)
        {
            $error = $e->getMessage();
            $error_code = $e->getCode();
        }

        if ($error || $warning)
        {
            $is_warning = !$error && $warning;

            if ($is_warning)
            {
                $parserTask->setLinkStatusWarning();
            }
            else
            {
                $parserTask->setLinkStatusError();
            }

            TaskModel::model()->save($task);

            $code       = is_numeric($error_code) ? (int) $error_code : 0;
            $reason_raw = $error ?: $warning;
            $reason     = is_scalar($reason_raw) ? (string) $reason_raw : wp_json_encode($reason_raw);

            $block_note = '';
            if (!$is_warning && in_array($code, array(403, 503), true))
            {
                $block_note = wp_kses(
                    sprintf(
                        '%1$s <a href="%2$s" target="_blank" rel="noopener noreferrer">%3$s</a>.',
                        esc_html__('IP blocking issue on the source site.', 'external-importer'),
                        esc_url('https://ei-docs.keywordrush.com/extracting-products/crawling-services'),
                        esc_html__('Try activating a crawling service ↗', 'external-importer')
                    ),
                    array(
                        'a' => array(
                            'href'   => true,
                            'target' => true,
                            'rel'    => true,
                        ),
                    )
                );
            }

            $lead = $is_warning
                ? esc_html__('Skipped', 'external-importer')
                : esc_html__('Unable to parse', 'external-importer');

            $message = wp_kses(
                sprintf(
                    /* translators: 1: lead text, 2: URL, 3: 'product URL ↗' label, 4: reason text, 5: optional guidance note */
                    '%1$s <a href="%2$s" target="_blank" rel="noopener noreferrer">%3$s</a>. <em>%4$s</em>%5$s',
                    $lead,
                    esc_url($url),
                    esc_html__('product URL ↗', 'external-importer'),
                    esc_html($reason),
                    $block_note ? ' ' . $block_note : ''
                ),
                array(
                    'a'  => array(
                        'href'   => true,
                        'target' => true,
                        'rel'    => true,
                    ),
                    'em' => array(),
                )
            );

            $this->maybeStopTask($parserTask, $message, $is_warning);
        }

        // success
        $parserTask->setLinkStatusSuccess();
        TaskModel::model()->save($task);

        $return['products'] = array($product);
        $log = array(
            'message' => wp_kses(
                sprintf(
                    /* translators: 1: 'Parsed', 2: product URL, 3: truncated product title */
                    '%1$s <a href="%2$s" target="_blank" rel="noopener noreferrer"><em>%3$s</em> ↗</a>',
                    esc_html__('Parsed', 'external-importer'),
                    esc_url($url),
                    esc_html(TextHelper::truncate((string) $product->title, 85))
                ),
                array(
                    'a'  => array(
                        'href'   => true,
                        'target' => true,
                        'rel'    => true,
                    ),
                    'em' => array(),
                )
            ),
            'type' => 'success',
        );
        $return['log'] = array($log);

        if ($parserTask->setNextStep() === false)
        {
            $return['log'][] = array(
                'message' => esc_html__('Parsing complete — all products processed.', 'external-importer'),
                'type'    => 'info',
            );
            $return['cmd'] = 'stop';
        }
        elseif ($parserTask->isLimitProductReached())
        {

            $return['log'][] = array(
                'message' => esc_html__('Configured product limit reached — stopping.', 'external-importer'),
                'type'    => 'info',
            );
            $return['cmd'] = 'stop';
        }
        else
            $return['cmd'] = 'next';

        if (Plugin::isDevEnvironment())
        {
            $return['debug'] = 'Used parsers for product: ' . join(', ', ParserHelper::getLastExtractor()->getLastUsedParsers());
        }

        $return['parsers'] = ParserHelper::getLastExtractor()->getLastUsedParsers();
        $return['provider'] = ParserHelper::getLastExtractor()->getLastUsedProvider();

        ExtractorApi::formatJsonData($return, $parserTask);
    }

    private function createTask()
    {
        if (!empty($this->init_data['url']))
            $parserTask = $this->createListingParserTask();
        elseif (!empty($this->init_data['urls']))
            $parserTask = $this->createProductsParserTask();
        else
            throw new \Exception("Unknown task type.");

        if (!$id = TaskModel::model()->createOrUpdate($this->init_data, $parserTask))
            throw new \Exception("Task cannot be created. Unknown error.");

        return TaskModel::model()->getTask($this->init_data);
    }

    private function createListingParserTask()
    {
        $parserTask = new ParserTask($this->init_data);
        // init pagination with listing URL
        $parserTask->addPagination($this->init_data['url']);
        return $parserTask;
    }

    private function createProductsParserTask()
    {
        $parserTask = new ParserTask($this->init_data);
        $parserTask->addLinks($this->init_data['urls']);
        return $parserTask;
    }

    private function maybeStopTask(ParserTask $parserTask, $message, $is_warning = false)
    {
        $stop_error_count = \apply_filters('ei_stop_task_error_count', self::STOP_TASK_ERROR_COUNT);

        if ($parserTask->setNextStep() === false)
        {
            $cmd = 'stop';
            $message .= ' ' . esc_html__('Task stopped — no products to parse.', 'external-importer');
        }
        elseif ($stop_error_count && $parserTask->getConsecutiveErrors() >= $stop_error_count)
        {
            $cmd = 'stop';
            $message .= ' ' . esc_html__('Task stopped — too many consecutive errors.', 'external-importer');
        }
        else
            $cmd = 'next';

        if ($is_warning)
            ExtractorApi::jsonWarning($message, $cmd);
        else
            ExtractorApi::jsonError($message, $cmd);
    }

    public static function prepareProductUrls($urls)
    {
        if (!is_array($urls))
            $urls = explode("\n", $urls);

        $results = array();
        foreach ($urls as $url)
        {
            $url = trim($url);

            if (!$url || !TextHelper::isValidUrl($url))
                continue;

            if (in_array($url, $results))
                continue;

            $results[] = $url;
        }

        return $results;
    }
}
