<?php

namespace ExternalImporter\application\admin;

defined('\ABSPATH') || exit;

use ExternalImporter\application\Plugin;
use ExternalImporter\application\components\Config;
use ExternalImporter\application\components\Throttler;
use ExternalImporter\application\helpers\TextHelper;

/**
 * ParserConfig class file
 *
 * @author keywordrush.com <support@keywordrush.com>
 * @link https://www.keywordrush.com
 * @copyright Copyright &copy; 2025 keywordrush.com
 */
class ParserConfig extends Config
{

    public static $cookies = array();

    public function page_slug()
    {
        return Plugin::getSlug() . '-settings-parser';
    }

    public function option_name()
    {
        return Plugin::getSlug() . '-settings-parser';
    }

    public function header_name()
    {
        return __('Extractor', 'external-importer');
    }

    public function add_admin_menu()
    {
        \add_submenu_page('options.php', __('Extractor settings', 'external-importer') . ' &lsaquo; ' . Plugin::getName(), __('Extractor settings', 'external-importer'), 'manage_options', $this->page_slug(), array($this, 'settings_page'));
    }

    public static function get_scraping_providers()
    {
        return array(
            'scraperapi' => array(
                'id'   => 'scraperapi',
                'name' => __('ScraperAPI', 'external-importer'),
                'link' => 'https://www.keywordrush.com/go/scraperapi',
            ),
            'scrapingdog' => array(
                'id'   => 'scrapingdog',
                'name' => __('Scrapingdog', 'external-importer'),
                'link' => 'https://www.keywordrush.com/go/scrapingdog',
            ),
            'scrapeowl' => array(
                'id'   => 'scrapeowl',
                'name' => __('ScrapeOwl', 'external-importer'),
                'link' => 'https://scrapeowl.com/',
            ),
            'crawlbase' => array(
                'id'   => 'crawlbase',
                'name' => __('Crawlbase', 'external-importer'),
                'link' => 'https://crawlbase.com/',
            ),
        );
    }

    protected function options()
    {
        $options = array(
            'respect_robots' => array(
                'title' => __('Respect robots.txt', 'external-importer'),
                'label' => __('Read and respect robots.txt rules', 'external-importer'),
                'callback' => array($this, 'render_checkbox'),
                'default' => false,
            ),
            'use_sessions' => array(
                'title' => __('Sessions', 'external-importer'),
                'label' => __('Keep session alive', 'external-importer'),
                'description' => __('Save cookies between requests.', 'external-importer') . '<br />' .
                    sprintf('<a href="%s">', \get_admin_url(\get_current_blog_id(), 'admin.php?page=external-importer-tools&action=session_destroy')) . __('Clear session variables', 'external-importer') . ' </a>',
                'callback' => array($this, 'render_checkbox'),
                'default' => true,
            ),
            'daily_limit' => array(
                'title' => __('Daily limit', 'external-importer'),
                'description' => __('The maximum number of requests for each store. 0 - unlimited.', 'external-importer') . '<br />' .
                    __('If the limit is reached, then all automatic requests to the store will be throttled until the end of the day.', 'external-importer'),
                'callback' => array($this, 'render_input'),
                'class' => 'small-text',
                'type' => 'number',
                'validator' => array(
                    'trim',
                ),
                'default' => 500,
            ),
            'throttle_1' => array(
                'title' => __('Throttle for 1 hour', 'external-importer'),
                'label' => sprintf(__('Throttle all automatic requests to the store for 1 hour if %d errors occur', 'external-importer'), Throttler::ERRORS_COUNT_1HOUR),
                'description' => '',
                'callback' => array($this, 'render_checkbox'),
                'default' => true,
            ),
            'throttle_24' => array(
                'title' => __('Throttle for 24 hours', 'external-importer'),
                'label' => sprintf(__('Throttle all automatic requests to the store for 24 hours if %d errors occur', 'external-importer'), Throttler::ERRORS_COUNT_24HOURS),
                'description' => '',
                'callback' => array($this, 'render_checkbox'),
                'default' => true,
            ),
            'ae_integration' => array(
                'title' => __('Affiliate Egg integration', 'external-importer'),
                'description' => sprintf(__('Use <a target="_blank" href="%s">Affiliate Egg</a> parsers if possible.', 'external-importer'), 'https://www.keywordrush.com/affiliateegg'),
                'callback' => array($this, 'render_dropdown'),
                'dropdown_options' => array(
                    'enabled' => __('Enabled', 'external-importer'),
                    'disabled' => __('Disabled', 'external-importer'),
                ),
                'default' => 'enabled',
            ),
        );

        $option = array_merge($options, $this->get_scraping_provider_options());

        $options = $option + array(

            'routing_rules' => array(
                'title'       => __('Routing rules', 'external-importer'),
                'description' => __('Define which scraping provider should be used for particular domains or URL patterns. Rules are processed from top to bottom; the first match wins.', 'external-importer'),
                'callback'    => array($this, 'render_routing_rules_block'),
                'default'     => array(),
                'validator'   => array(
                    array(
                        'call' => array($this, 'formatRoutingRules'),
                        'type' => 'filter',
                    ),
                ),
            ),
            'proxy_list' => array(
                'title' => __('Proxy list', 'external-importer'),
                'description' => sprintf(__('Comma-separated list of proxies in the form of %s, eg: %s', 'external-importer'), 'user:password@proxyserver:proxyport', 'socks4://11.22.33.44:1080,http://10.20.30.40:8080'),
                'callback' => array($this, 'render_textarea'),
                'default' => '',
                'validator' => array(
                    array(
                        'call' => array($this, 'proxyListFilter'),
                        'type' => 'filter',
                    ),
                ),
            ),
            'proxy_domains' => array(
                'title' => __('Domains routed via Proxy', 'external-importer'),
                'description' => __('Comma-separated list of domains should send requests through the Proxy list.', 'external-importer'),
                'callback' => array($this, 'render_input'),
                'default' => '',
                'validator' => array(
                    array(
                        'call' => array($this, 'proxyDomainsFilter'),
                        'type' => 'filter',
                    ),
                ),
            ),
            'cookies' => array(
                'title' => __('Custom cookies', 'external-importer'),
                'callback' => array($this, 'render_cookies_fields_block'),
                'validator' => array(
                    array(
                        'call' => array($this, 'formatCookiesFields'),
                        'type' => 'filter',
                    ),
                ),
            ),
        );

        if ($this->isAdminPage())
        {
            $t1 = Throttler::getThrottledByDailyLimit();
            $t2 = Throttler::getThrottledByErrors1();
            $t3 = Throttler::getThrottledByErrors24();

            $options['daily_limit']['description'] .= '<br /><em>' . sprintf(__('Currently throttled domains: %s.', 'external-importer'), TextHelper::formatMoreList($t1, 10, '0')) . '</em>';
            $options['throttle_1']['description'] .= '<em>' . sprintf(__('Currently throttled domains: %s.', 'external-importer'), TextHelper::formatMoreList($t2, 10, '0')) . '</em>';
            $options['throttle_24']['description'] .= '<em>' . sprintf(__('Currently throttled domains: %s.', 'external-importer'), TextHelper::formatMoreList($t3, 10, '0')) . '</em>';
        }

        return $options;
    }

    public function render_cookies_fields_line($args)
    {
        $i = isset($args['_field']) ? $args['_field'] : 0;
        $name = isset($args['value'][$i]['name']) ? $args['value'][$i]['name'] : '';
        $value = isset($args['value'][$i]['value']) ? $args['value'][$i]['value'] : '';

        echo '<input name="' . \esc_attr($args['option_name']) . '['
            . \esc_attr($args['name']) . '][' . $i . '][name]" value="'
            . \esc_attr($name) . '" class="text" placeholder="' . \esc_attr(__('Domain name', 'external-importer')) . '"  type="text"/>';
        echo ' &#x203A; ';
        echo '<input name="' . \esc_attr($args['option_name']) . '['
            . \esc_attr($args['name']) . '][' . $i . '][value]" value="'
            . \esc_attr($value) . '" class="regular-text ltr" placeholder="' . \esc_attr(__('Cookies', 'external-importer')) . '"  type="text"/>';
    }

    public function render_cookies_fields_block($args)
    {
        if (is_array($args['value']))
            $total = count($args['value']) + 3;
        else
            $total = 3;

        for ($i = 0; $i < $total; $i++)
        {
            echo '<div style="padding-bottom: 5px;">';
            $args['_field'] = $i;
            $this->render_cookies_fields_line($args);
            echo '</div>';
        }
        if ($args['description'])
            echo '<p class="description">' . $args['description'] . '</p>';
    }

    public function settings_page()
    {
        PluginAdmin::getInstance()->render('settings', array('page_slug' => $this->page_slug()));
    }

    public function getExtractorConfig()
    {
        return array(
            'respect_robots' => $this->option('respect_robots'),
            'use_sessions' => $this->option('use_sessions'),
        );
    }

    public function proxyListFilter($value)
    {
        return TextHelper::commaList($value);
    }

    public function proxyDomainsFilter($value)
    {
        $domains = TextHelper::commaListArray($value);
        $results = array();
        foreach ($domains as $domain)
        {
            if ($h = TextHelper::getHostName($domain))
                $host = $h;
            else
                $host = preg_replace('/^www\./', '', strtolower(trim(\sanitize_text_field($domain))));

            if ($host && TextHelper::isValidDomainName($host))
                $results[] = $host;
        }
        $results = array_unique($results);
        return join(',', $results);
    }

    public function formatCookiesFields($values)
    {
        $results = array();
        foreach ($values as $k => $value)
        {
            if ($host = TextHelper::getHostName($values[$k]['name']))
                $name = $host;
            else
                $name = preg_replace('/^www\./', '', strtolower(trim(\sanitize_text_field($value['name']))));

            $value = trim($value['value']);
            if (!$value || !$name || in_array($name, array_column($results, 'name')) || !TextHelper::isValidDomainName($name))
                continue;

            $result = array('name' => $name, 'value' => $value);
            $results[] = $result;
        }

        return $results;
    }

    public function getCookie($domain)
    {
        if (isset(self::$cookies[$domain]))
            return self::$cookies[$domain];

        $cookies = $this->option('cookies');

        if ($cookies)
        {
            foreach ($this->option('cookies') as $cookie)
            {
                if ($cookie['name'] == $domain)
                {
                    self::$cookies[$domain] = $cookie['value'];
                    return self::$cookies[$domain];
                }
            }
        }

        self::$cookies[$domain] = '';
        return self::$cookies[$domain];
    }

    public function getCookieByUrl($url)
    {
        return $this->getCookie(TextHelper::getHostName($url));
    }

    public function render_routing_rules_block($args)
    {
        $rules = array();

        if (!empty($args['value']) && is_array($args['value']))
        {
            $rules = $args['value'];
        }

        // At least one empty row
        if (empty($rules))
        {
            $rules[] = array();
        }

        $total_rules = is_array($rules) ? count($rules) : 0;
        $has_many_rules = $total_rules > 15;
?>

        <div class="external-importer-routing-rules-wrapper<?php echo $has_many_rules ? ' has-scroll' : ''; ?>">
            <table class="widefat striped" id="external-importer-routing-rules-table">
                <thead>
                    <tr>
                        <th style="width:40%;padding-left:10px;">
                            <div class="ei-routing-header">
                                <span><?php echo \esc_html__('Pattern', 'external-importer'); ?></span>
                                <input type="search"
                                    id="external-importer-routing-rules-filter"
                                    class="ei-routing-filter-input"
                                    style="padding:2px 5px;margin-left:10px;font-weight:normal;"
                                    placeholder="<?php esc_attr_e('Type to filter…', 'external-importer'); ?>">
                            </div>
                        </th>

                        <th style="width:15%;padding-left:10px;"><?php echo \esc_html__('Crawler', 'external-importer'); ?></th>
                        <th style="width:40%;padding-left:10px;"><?php echo \esc_html__('Additional parameters', 'external-importer'); ?></th>
                        <th style="width:5%;"></th>
                    </tr>
                </thead>

                <tbody>
                    <?php
                    foreach ($rules as $i => $rule)
                    {
                        $args['_field'] = $i;
                        $this->render_routing_rules_row($args);
                    }
                    ?>
                </tbody>
            </table>
        </div>

        <p>
            <button type="button" class="button" id="external-importer-add-routing-rule">
                <?php echo \esc_html__('Add rule', 'external-importer'); ?>
            </button>
        </p>

        <p class="description">
            <?php echo \esc_html__('Pattern examples:', 'external-importer'); ?>
            <code>example.com</code>,
            <code>*.example.com</code>,
            <code>example.com/path/*</code><br />

            <?php echo \esc_html__('Rules are checked from top to bottom. The first matching rule will be used.', 'external-importer'); ?><br />

            <?php echo \esc_html__('Additional parameters will be appended to the provider API request, for example:', 'external-importer'); ?>
            <code>premium=true&amp;country_code=us</code>
        </p>

        <?php
        if (!empty($args['description']))
        {
            echo '<p class="description">' . \wp_kses_post($args['description']) . '</p>';
        }

        $this->render_routing_rules_js();
    }

    public function render_routing_rules_row($args)
    {
        $i = isset($args['_field']) ? (int) $args['_field'] : 0;

        $value = isset($args['value'][$i]) && is_array($args['value'][$i]) ? $args['value'][$i] : array();

        $pattern  = isset($value['pattern']) ? $value['pattern'] : '';
        $provider = isset($value['provider']) ? $value['provider'] : '';
        $params   = isset($value['params']) ? $value['params'] : '';

        $option_name = isset($args['option_name']) ? $args['option_name'] : '';
        $field_name  = isset($args['name']) ? $args['name'] : 'routing_rules';

        echo '<tr data-index="' . \esc_attr($i) . '">';

        // Pattern
        echo '<td>';
        echo '<input type="text" style="width:100%;max-width:100%;" class="regular-text" name="' . \esc_attr($option_name) . '['
            . \esc_attr($field_name) . '][' . $i . '][pattern]" value="'
            . \esc_attr($pattern) . '" placeholder="'
            . \esc_attr(__('Domain or pattern, e.g. example.com', 'external-importer')) . '"/>';
        echo '</td>';

        // Provider
        echo '<td>';

        $providers = self::get_scraping_providers();

        echo '<select name="' . \esc_attr($option_name) . '['
            . \esc_attr($field_name) . '][' . $i . '][provider]">';
        echo '<option value="">' . \esc_html__('— Select —', 'external-importer') . '</option>';

        foreach ($providers as $provider_id => $provider_data)
        {
            echo '<option value="' . \esc_attr($provider_id) . '"'
                . \selected($provider, $provider_id, false) . '>'
                . \esc_html($provider_data['name']) . '</option>';
        }

        echo '</select>';
        echo '</td>';

        // Additional params
        echo '<td>';
        echo '<input type="text" style="width:100%;max-width:100%;" class="regular-text" name="' . \esc_attr($option_name) . '['
            . \esc_attr($field_name) . '][' . $i . '][params]" value="'
            . \esc_attr($params) . '" placeholder="'
            . \esc_attr(__('Additional query string, e.g. premium=true&country_code=us', 'external-importer')) . '"/>';
        echo '</td>';

        // Remove button
        echo '<td style="text-align:center;">';
        echo '<button type="button" class="button-link external-importer-routing-rule-remove" title="'
            . \esc_attr__('Remove rule', 'external-importer') . '">×</button>';
        echo '</td>';

        echo '</tr>';
    }

    protected function render_routing_rules_js()
    {
        static $printed = false;

        if ($printed)
        {
            return;
        }

        $printed = true;
        ?>
        <script type="text/javascript">
            jQuery(document).ready(function($) {
                var $table = $('#external-importer-routing-rules-table');
                var $tbody = $table.find('tbody');

                function reindexRows() {
                    $tbody.find('tr').each(function(index) {
                        var $row = $(this);
                        $row.attr('data-index', index);

                        $row.find('input, select').each(function() {
                            var $field = $(this);
                            var name = $field.attr('name');

                            if (!name) {
                                return;
                            }

                            // Replace the numeric index in: [routing_rules][0][pattern]
                            name = name.replace(/\[\d+\]\[(pattern|provider|params)\]/, '[' + index + '][$1]');
                            $field.attr('name', name);
                        });
                    });
                }

                $('#external-importer-add-routing-rule').on('click', function(e) {
                    e.preventDefault();

                    var $lastRow = $tbody.find('tr:last');
                    var $newRow;

                    if ($lastRow.length) {
                        $newRow = $lastRow.clone();

                        // Clear values in cloned row
                        $newRow.find('input').val('');
                        $newRow.find('select').prop('selectedIndex', 0);

                        $newRow.appendTo($tbody);
                        reindexRows();
                    }
                });

                $tbody.on('click', '.external-importer-routing-rule-remove', function(e) {
                    e.preventDefault();

                    var $rows = $tbody.find('tr');

                    if ($rows.length <= 1) {
                        // If only one row left, just clear its values instead of removing.
                        var $row = $(this).closest('tr');
                        $row.find('input').val('');
                        $row.find('select').prop('selectedIndex', 0);
                        return;
                    }

                    $(this).closest('tr').remove();
                    reindexRows();
                });
            });
        </script>
        <script type="text/javascript">
            jQuery(document).ready(function($) {
                var $table = $('#external-importer-routing-rules-table');
                var $tbody = $table.find('tbody');

                function reindexRows() {
                    $tbody.find('tr').each(function(index) {
                        var $row = $(this);
                        $row.attr('data-index', index);

                        $row.find('input, select').each(function() {
                            var $field = $(this);
                            var name = $field.attr('name');

                            if (!name) {
                                return;
                            }

                            name = name.replace(/\[\d+\]\[(pattern|provider|params)\]/, '[' + index + '][$1]');
                            $field.attr('name', name);
                        });
                    });
                }

                $('#external-importer-add-routing-rule').on('click', function(e) {
                    e.preventDefault();

                    var $lastRow = $tbody.find('tr:last');
                    var $newRow;

                    if ($lastRow.length) {
                        $newRow = $lastRow.clone();

                        $newRow.find('input').val('');
                        $newRow.find('select').prop('selectedIndex', 0);

                        $newRow.appendTo($tbody);
                        reindexRows();
                    }
                });

                $tbody.on('click', '.external-importer-routing-rule-remove', function(e) {
                    e.preventDefault();

                    var $rows = $tbody.find('tr');

                    if ($rows.length <= 1) {
                        var $row = $(this).closest('tr');
                        $row.find('input').val('');
                        $row.find('select').prop('selectedIndex', 0);
                        return;
                    }

                    $(this).closest('tr').remove();
                    reindexRows();
                });

                // Simple filter by pattern
                var $filter = $('#external-importer-routing-rules-filter');

                $filter.on('keyup change', function() {
                    var term = $.trim($(this).val()).toLowerCase();

                    $tbody.find('tr').each(function() {
                        var $row = $(this);

                        // Look at the Pattern input specifically:
                        var patternVal = $row.find('input[name*="[pattern]"]').val() || '';

                        if (term === '') {
                            $row.show();
                        } else if (patternVal.toLowerCase().indexOf(term) !== -1) {
                            $row.show();
                        } else {
                            $row.hide();
                        }
                    });
                });
            });
        </script>
<?php
    }

    public static function formatRoutingRules($rules)
    {
        if (!is_array($rules))
        {
            return array();
        }

        $providers         = self::get_scraping_providers();
        $allowed_providers = array_keys($providers);
        $clean             = array();

        foreach ($rules as $rule)
        {
            if (!is_array($rule))
            {
                continue;
            }

            $pattern_raw  = isset($rule['pattern'])  ? wp_unslash($rule['pattern'])  : '';
            $provider_raw = isset($rule['provider']) ? wp_unslash($rule['provider']) : '';
            $params_raw   = isset($rule['params'])   ? wp_unslash($rule['params'])   : '';

            // Normalize pattern

            $pattern = trim($pattern_raw);

            if ($pattern !== '')
            {
                // If user pasted a full URL (has scheme), extract host + path
                if (preg_match('~^[a-z][a-z0-9+\-.]*://~i', $pattern))
                {
                    $parsed = wp_parse_url($pattern);

                    if (!empty($parsed['host']))
                    {
                        $host = strtolower($parsed['host']); // normalize host
                        $path = isset($parsed['path']) ? $parsed['path'] : '';

                        // Build pattern as "host" or "host/path..."
                        $pattern = $host . $path;
                    }
                }

                // Final sanitization (keeps *, ., /, etc. but strips weird stuff)
                $pattern = sanitize_text_field($pattern);
            }

            $provider = $provider_raw !== '' ? sanitize_key($provider_raw) : '';
            $params = $params_raw !== '' ? sanitize_text_field($params_raw) : '';

            // Skip rows without required fields
            if ($pattern === '' || $provider === '')
            {
                continue;
            }

            if (!in_array($provider, $allowed_providers, true))
            {
                continue;
            }

            $clean[] = array(
                'pattern'  => $pattern,
                'provider' => $provider,
                'params'   => $params,
            );
        }

        return array_values($clean);
    }

    protected function get_scraping_provider_options()
    {
        $options = array();

        foreach (self::get_scraping_providers() as $provider_id => $provider)
        {
            $option_key = $provider_id . '_token';

            $title = sprintf(
                /* translators: %s: provider name */
                __('%s API key', 'external-importer'),
                $provider['name']
            );

            if (!empty($provider['link']))
            {
                $description = sprintf(
                    /* translators: 1: provider URL, 2: provider name */
                    __('Your <a target="_blank" href="%1$s">%2$s</a> token.', 'external-importer'),
                    esc_url($provider['link']),
                    esc_html(wp_strip_all_tags($provider['name']))
                );
            }
            else
            {
                $description = sprintf(
                    /* translators: %s: provider name */
                    __('Your %s token.', 'external-importer'),
                    esc_html(wp_strip_all_tags($provider['name']))
                );
            }

            $options[$option_key] = array(
                'title'       => $title,
                'description' => $description,
                'callback'    => array($this, 'render_password'),
                'default'     => '',
                'validator'   => array(
                    'trim',
                ),
            );
        }

        $options['routing_rules'] = array(
            'title'       => __('Routing rules', 'external-importer'),
            'description' => __(
                'Define which scraping provider should be used for particular domains or URL patterns. Rules are processed from top to bottom; the first match wins.',
                'external-importer'
            ),
            'callback'  => array($this, 'render_routing_rules_block'),
            'default'   => array(),
            'validator' => array(
                array(
                    'call' => array(self::class, 'formatRoutingRules'),
                    'type' => 'filter',
                ),
            ),
        );

        return $options;
    }
}
