<?php
namespace FindStr;
use WP_Error;
class Document {
public int $ID;
private \WP_Post $post;
public string $post_title;
public string $post_content;
public string $post_name;
public string $permalink;
public $language;
public array $taxonomies;
public string $post_date;
public string $post_modified;
public string $post_parent;
public string $post_type;
public array $featured_image;
public int $weight;
public int $sticky = 0;
public array $to_index;
private string $post_author;
/**
* @throws \Exception
*/
public function __construct( $post_id ) {
$post = get_post( $post_id );
if ( empty( $post ) ) {
throw new \Exception( 'Post not found' );
}
$this->post = $post;
$this->ID = $post->ID;
$this->post_title = html_entity_decode( get_the_title( $post ), ENT_QUOTES | ENT_HTML5, 'UTF-8' );
$this->post_content = self::prepare_content( $this->post->post_content );
$this->post_date = $this->post->post_date;
$this->post_name = $post->post_name;
$this->taxonomies = $this->post_add_taxonomies();
$this->post_modified = $this->post->post_modified;
$this->permalink = wp_parse_url( get_permalink( $this->post->ID ), PHP_URL_PATH );
$this->post_type = $this->post->post_type;
$this->post_parent = empty( $post->post_parent ) ? '' : get_the_title( $post->post_parent );
$this->post_author = get_the_author_meta( 'display_name', $post->post_author );
$this->featured_image = $this->get_image_data(
/**
* Filter the featured image ID
*
* @hook findstr_featured_image_id
*
* @param {int} $featured_image_id the featured image ID
* @param {WP_Post} $post the post object
*
* @returns {int} $featured_image_id the featured image ID
*
*/
apply_filters(
'findstr_featured_image_id',
get_post_thumbnail_id( $post ),
$post
)
);
$this->language = $this->get_language_code();
$this->sticky = (int) is_sticky( $post->ID );
$this->to_index = $this->get_fields_to_index();
$this->update_document_weight();
/**
* Filter the data to index
*
* @hook findstr_data_to_index
*
* @param {array} $fields the fields to index
* @param {WP_Post} $post the post object
*
* @returns {array} $data the data to index
*
*/
$this->to_index = apply_filters( 'findstr_data_to_index', $this->to_index, $this->post );
}
public function get_post_type( $post_type ): string {
$post_type_object = get_post_type_labels( get_post_type_object( $post_type ) );
return $post_type_object->name;
}
public function update_document_weight(): void {
$settings_weight_management = (array) ( new \FindStr\SettingsWeightManagement() )->get();
$weights = array();
//get attributes to use for weight
foreach ( $settings_weight_management as $weight_rules ) {
if ( ! empty( $this->to_index[ $weight_rules->name ] ) && $weight_rules->value === $this->to_index[ $weight_rules->name ] ) {
$weights[] = $weight_rules->weight;
}
}
if ( empty( $weights ) ) {
$weights[] = 10;
}
/**
* Filter the document weight
*
* @hook findstr_document_weight
*
* @param {int} $weight the weight of the document
* @param {WP_Post} $post the post object
*
* @returns {int} $weight the weight of the document
*
*/
$this->to_index['weight'] = apply_filters( 'findstr_document_weight', array_sum( $weights ), $this->post );
}
public function warning_handler( $error_no, $error_str, $error_file, $error_line ) {
new Log(
$error_str,
'warning',
array(
'function' => 'Document->prepare_content',
'ID' => $this->ID,
'error_no' => $error_no,
'error_file' => $error_file,
'error_line' => $error_line,
)
);
}
/**
* @param $content
*
* @return string
*/
public function prepare_content( $content ) : string {
//set error handler to catch errors
//phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_set_error_handler
set_error_handler( array( $this, 'warning_handler' ), E_WARNING | E_NOTICE | E_USER_WARNING | E_USER_NOTICE | E_STRICT | E_DEPRECATED | E_USER_DEPRECATED );
ob_start();
try {
echo apply_filters( 'the_content', $content );//phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped
} catch ( \Error $e ) {
new Log(
$e->getMessage(),
'error',
array(
'function' => 'Document->prepare_content',
'ID' => $this->ID,
)
);
}
$content = ob_get_contents();
ob_end_clean();
restore_error_handler();
$content = $this->strip_all_tags( $content );
$content = $this->strip_invisibles( $content );
//remove repeating consecutive words
$content = preg_replace( '/\b(\S+)(?:\s+\1\b)+/i', '$1', $content );
return wp_kses_post( trim( $content ) );
}
/**
* Get fields to index
*
* @return array
*/
public function get_fields_to_index() : array {
$indexable_fields = (array) ( new SettingsIndexableFields() )->get();
$fields = array(
'ID' => $this->ID,
);
foreach ( $indexable_fields as $key => $field ) {
if ( strpos( $field->id, 'cf/' ) === 0 ) { // custom fields
$meta_key = str_replace( 'cf/', '', $field->id );
if ( metadata_exists( 'post', $this->ID, $meta_key ) ) {
$fields[ $meta_key ] = get_post_meta( $this->ID, $meta_key, true ); //todo : add multiple values
}
} elseif ( strpos( $field->id, 'acf/' ) === 0 ) { //advanced custom fields
$meta_key = str_replace( 'acf/', '', $field->id );
if ( function_exists( 'get_field' ) ) {
$fields[ $meta_key ] = get_field( $meta_key, $this->ID, true );
}
} elseif ( strpos( $field->id, 'tax/' ) === 0 ) { //taxonomies
$taxonomy = str_replace( 'tax/', '', $field->id );
//check if post is associated with taxonomy
if ( ! is_object_in_taxonomy( $this->post_type, $taxonomy ) ) {
continue;
}
$post_terms = wp_get_post_terms( $this->ID, $taxonomy, array( 'fields' => 'names' ) );
$post_terms = array_map(
function( $term ) {
return html_entity_decode( $term, ENT_QUOTES | ENT_HTML5, 'UTF-8' );
},
$post_terms
);
/**
* Filter the post terms.
* This filter runs for each taxonomy to index based on indexable fields settings.
*
* @hook findstr_post_terms
*
* @param {array} $post_terms the terms to index
* @param {string} $taxonomy the taxonomy name
* @param {WP_Post} $post the post object
*
* @returns {array} $post_terms the terms to index
*
*/
$fields[ $taxonomy ] = apply_filters( 'findstr_post_terms', $post_terms, $taxonomy, $this->post );
} else {
if ( property_exists( $this, $field->id ) ) {
$fields[ $field->id ] = $this->{$field->id};
}
}
/**
*
* This filter runs for each field to index based on indexable fields settings.
* It allows to modify the fields to index.
* Note: $fields does not contain taxonomies and permalink, language and featured_image are added later.
* This filter applies in a loop, $fields array is not fully built yet.
*
* @hook findstr_indexable_fields_loop_to_index
*
* @param {array} $fields the fields to index
* @param {object} $field the field object
* @param {WP_Post} $post the post object
*
* @returns {array} $fields the fields to index
*/
$fields = apply_filters( 'findstr_indexable_fields_loop_to_index', $fields, $field, $this->post );
}
$fields['permalink'] = ! empty( $fields['permalink'] ) ? $fields['permalink'] : $this->permalink;
$fields['language'] = ! empty( $fields['language'] ) ? $fields['language'] : $this->language;
if ( is_array( $fields['language'] ) ) {
$fields['language'] = array_values( $fields['language'] );
$fields['permalinks'] = array();
foreach ( $fields['language'] as $language ) {
$fields['permalinks'][ $language ] = apply_filters( 'wpml_permalink', get_permalink( $this->post ), $language );
}
}
$fields['featured_image'] = ! empty( $fields['featured_image'] ) ? $fields['featured_image'] : $this->featured_image;
//apply filters for each field
foreach ( $fields as $key => $field ) {
/**
* Filter the indexable field.
* This filter runs for each field to index based on indexable fields settings.
*
* @hook findstr_indexable_field
*
* @param {mixed} $field the field value
* @param {string} $key the field key
* @param {WP_Post} $post the post object
*
* @returns {mixed} $field the field value to index
*/
$fields[ $key ] = apply_filters( 'findstr_indexable_field', $field, $key, $this->post );
}
$fields['sticky'] = $this->sticky;
$fields['menu_order'] = ! empty( $fields['menu_order'] ) ? $fields['menu_order'] : $this->post->menu_order;
return $fields;
}
/**
* Get language code of a post.
*
* @return mixed|string
*/
public function get_language_code() {
$post = $this->post;
$language_code = Helpers::get_language_code_by_post_id( $post->ID );
/**
* Filter the document language.
* with this filter you can change the language code of a post.
*
* @hook findstr_document_language
*
* @param {string|array} $language_code the language code
* @param {WP_Post} $post the post object
*
* @returns {string|array} $language_code the language code
*/
return apply_filters( 'findstr_document_language', $language_code, $post );
}
/**
* Strip invisible tags from text
*
* @param $text
*
* @return array|string|string[]|null
*/
public function strip_invisibles( $text ) {
if ( ! is_string( $text ) ) {
$text = strval( $text );
}
// decode html entities
$text = html_entity_decode( $text, ENT_QUOTES, 'UTF-8' );
// remove multiple spaces
$text = preg_replace(
"/(\t|\n|\v|\f|\r| |\xC2\x85|\xc2\xa0|\xe1\xa0\x8e|\xe2\x80[\x80-\x8D]|\xe2\x80\xa8|\xe2\x80\xa9|\xe2\x80\xaF|\xe2\x81\x9f|\xe2\x81\xa0|\xe3\x80\x80|\xef\xbb\xbf)+/",
' ',
$text
);
/**
* Filter the text after stripping invisible tags
*
* @hook findstr_strip_invisible
*
* @param {string} $text already stripped text
*
* @returns {string} $text
*/
return apply_filters( 'findstr_strip_invisible', $text );
}
/**
* Strip all tags
*
* @param string $content the post content.
*
* @return string
*/
public function strip_all_tags( $content ) : string {
if ( ! is_string( $content ) ) {
$content = '';
}
// remove invisible tags
$content = preg_replace(
array(
'@<style[^>]*?>.*?</style>@siu',
'@<script[^>]*?.*?</script>@siu',
'@<object[^>]*?.*?</object>@siu',
'@<embed[^>]*?.*?</embed>@siu',
'@<applet[^>]*?.*?</applet>@siu',
'@<noscript[^>]*?.*?</noscript>@siu',
'@<noembed[^>]*?.*?</noembed>@siu',
'@<iframe[^>]*?.*?</iframe>@siu',
'@<del[^>]*?.*?</del>@siu',
'@<!--.*?-->@siu',
),
' ',
$content
);
$content = str_replace( '<', ' <', $content ); //this helps to keep white spaces between tags
$content = wp_strip_all_tags( $content );
$content = preg_replace( '/<!--.*?-->/ms', ' ', $content );
$content = preg_replace( '/<[!a-zA-Z\/][^>].*?>/ms', ' ', $content );
$content = preg_replace( '/\s+/', ' ', $content );
return $content;
}
/**
* Get the image data for a given attachment ID
*
* @param $attachment_id
*
* @return array
*/
public function get_image_data( $attachment_id ): array {
$image_meta = wp_get_attachment_metadata( $attachment_id );
if ( ! $image_meta ) {
return array();
}
$image_data = array(
'title' => get_the_title( $attachment_id ),
'caption' => get_post_field( 'post_excerpt', $attachment_id ),
'alt' => get_post_meta( $attachment_id, '_wp_attachment_image_alt', true ),
'url' => wp_get_attachment_url( $attachment_id ),
);
if ( ! empty( $image_meta['sizes'] ) && is_array( $image_meta['sizes'] ) ) {
foreach ( $image_meta['sizes'] as $size => $size_data ) {
// Obtenir l'URL de l'image à cette taille
$image_url = wp_get_attachment_image_src( $attachment_id, $size );
$image_data[ $size ] = array(
'url' => $image_url[0],
'width' => $image_url[1],
'height' => $image_url[2],
);
}
}
return $image_data;
}
/**
* Add taxonomies to the document
*
* @return array
*/
private function post_add_taxonomies(): array {
$taxonomies = get_object_taxonomies( $this->post->post_type );
$taxonomies = array_diff( $taxonomies, array( 'translation_priority' ) );
/**
* Filter the document taxonomies.
* List of taxonomies added to document.
*
* @hook findstr_document_taxonomies
*
* @param {array} $taxonomies the taxonomies
* @param {WP_Post} $post the post object
*
* @returns {array} $taxonomies the taxonomies
*/
return (array) apply_filters( 'findstr_document_taxonomies', $taxonomies, $this->post );
}
}