diff --git a/README.md b/README.md index f8d57895..fbfd1195 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,11 @@ local site. 4. To test the functionality, you should use the Mailpit running on your local in the `https://mailpit.docker.so/` url to view the emails being sent by the feature. +#### Google indexing api automation (helfi_google_api-module) + +Job listing urls are automatically sent to google indexing api +on publish and unpublish events, a request is sent to google to either index or deindex the url. + ## Customizations ### Not part of global navigation diff --git a/composer.json b/composer.json index ce96ab23..878ac438 100644 --- a/composer.json +++ b/composer.json @@ -24,7 +24,8 @@ "drupal/migrate_plus": "^6.0", "drupal/raven": "^5.0", "drupal/redis": "^1.5", - "drush/drush": "^12" + "drush/drush": "^12", + "google/apiclient": "^2.17" }, "require-dev": { "donatj/mock-webserver": "^2.4", @@ -99,7 +100,10 @@ }, "patchLevel": { "drupal/core": "-p2" - } + }, + "google/apiclient-services": [ + "Indexing" + ] }, "repositories": [ { @@ -121,6 +125,7 @@ "copy-commit-message-script": "make copy-commit-message-script", "post-install-cmd": [ "@copy-commit-message-script" - ] + ], + "pre-autoload-dump": "Google\\Task\\Composer::cleanup" } } diff --git a/composer.lock b/composer.lock index 263b4baf..9d517da6 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "5289a22b79ddfac67b8b4ab51b94ab68", + "content-hash": "e1ab31840c7db8f1728b13d29a396d38", "packages": [ { "name": "asm89/stack-cors", @@ -8383,6 +8383,179 @@ }, "time": "2022-08-18T13:55:30+00:00" }, + { + "name": "google/apiclient", + "version": "v2.17.0", + "source": { + "type": "git", + "url": "https://github.com/googleapis/google-api-php-client.git", + "reference": "b1f63d72c44307ec8ef7bf18f1012de35d8944ed" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/googleapis/google-api-php-client/zipball/b1f63d72c44307ec8ef7bf18f1012de35d8944ed", + "reference": "b1f63d72c44307ec8ef7bf18f1012de35d8944ed", + "shasum": "" + }, + "require": { + "firebase/php-jwt": "^6.0", + "google/apiclient-services": "~0.350", + "google/auth": "^1.37", + "guzzlehttp/guzzle": "^7.4.5", + "guzzlehttp/psr7": "^2.6", + "monolog/monolog": "^2.9||^3.0", + "php": "^8.0", + "phpseclib/phpseclib": "^3.0.36" + }, + "require-dev": { + "cache/filesystem-adapter": "^1.1", + "composer/composer": "^1.10.23", + "phpcompatibility/php-compatibility": "^9.2", + "phpspec/prophecy-phpunit": "^2.1", + "phpunit/phpunit": "^9.6", + "squizlabs/php_codesniffer": "^3.8", + "symfony/css-selector": "~2.1", + "symfony/dom-crawler": "~2.1" + }, + "suggest": { + "cache/filesystem-adapter": "For caching certs and tokens (using Google\\Client::setCache)" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "2.x-dev" + } + }, + "autoload": { + "files": [ + "src/aliases.php" + ], + "psr-4": { + "Google\\": "src/" + }, + "classmap": [ + "src/aliases.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "description": "Client library for Google APIs", + "homepage": "http://developers.google.com/api-client-library/php", + "keywords": [ + "google" + ], + "support": { + "issues": "https://github.com/googleapis/google-api-php-client/issues", + "source": "https://github.com/googleapis/google-api-php-client/tree/v2.17.0" + }, + "time": "2024-07-10T14:57:54+00:00" + }, + { + "name": "google/apiclient-services", + "version": "v0.370.0", + "source": { + "type": "git", + "url": "https://github.com/googleapis/google-api-php-client-services.git", + "reference": "25ad8515701dd832313d0f5f0a828670d60e541a" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/googleapis/google-api-php-client-services/zipball/25ad8515701dd832313d0f5f0a828670d60e541a", + "reference": "25ad8515701dd832313d0f5f0a828670d60e541a", + "shasum": "" + }, + "require": { + "php": "^8.0" + }, + "require-dev": { + "phpunit/phpunit": "^9.6" + }, + "type": "library", + "autoload": { + "files": [ + "autoload.php" + ], + "psr-4": { + "Google\\Service\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "description": "Client library for Google APIs", + "homepage": "http://developers.google.com/api-client-library/php", + "keywords": [ + "google" + ], + "support": { + "issues": "https://github.com/googleapis/google-api-php-client-services/issues", + "source": "https://github.com/googleapis/google-api-php-client-services/tree/v0.370.0" + }, + "time": "2024-08-26T01:04:18+00:00" + }, + { + "name": "google/auth", + "version": "v1.42.0", + "source": { + "type": "git", + "url": "https://github.com/googleapis/google-auth-library-php.git", + "reference": "0c25599a91530b5847f129b271c536f75a7563f5" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/googleapis/google-auth-library-php/zipball/0c25599a91530b5847f129b271c536f75a7563f5", + "reference": "0c25599a91530b5847f129b271c536f75a7563f5", + "shasum": "" + }, + "require": { + "firebase/php-jwt": "^6.0", + "guzzlehttp/guzzle": "^7.4.5", + "guzzlehttp/psr7": "^2.4.5", + "php": "^8.0", + "psr/cache": "^2.0||^3.0", + "psr/http-message": "^1.1||^2.0" + }, + "require-dev": { + "guzzlehttp/promises": "^2.0", + "kelvinmo/simplejwt": "0.7.1", + "phpseclib/phpseclib": "^3.0.35", + "phpspec/prophecy-phpunit": "^2.1", + "phpunit/phpunit": "^9.6", + "sebastian/comparator": ">=1.2.3", + "squizlabs/php_codesniffer": "^3.5", + "symfony/process": "^6.0||^7.0", + "webmozart/assert": "^1.11" + }, + "suggest": { + "phpseclib/phpseclib": "May be used in place of OpenSSL for signing strings or for token management. Please require version ^2." + }, + "type": "library", + "autoload": { + "psr-4": { + "Google\\Auth\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "description": "Google Auth Library for PHP", + "homepage": "http://github.com/google/google-auth-library-php", + "keywords": [ + "Authentication", + "google", + "oauth2" + ], + "support": { + "docs": "https://googleapis.github.io/google-auth-library-php/main/", + "issues": "https://github.com/googleapis/google-auth-library-php/issues", + "source": "https://github.com/googleapis/google-auth-library-php/tree/v1.42.0" + }, + "time": "2024-08-26T18:33:48+00:00" + }, { "name": "grasmash/expander", "version": "3.0.0", @@ -10412,6 +10585,116 @@ }, "time": "2021-09-22T16:57:06+00:00" }, + { + "name": "phpseclib/phpseclib", + "version": "3.0.41", + "source": { + "type": "git", + "url": "https://github.com/phpseclib/phpseclib.git", + "reference": "621c73f7dcb310b61de34d1da4c4204e8ace6ceb" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpseclib/phpseclib/zipball/621c73f7dcb310b61de34d1da4c4204e8ace6ceb", + "reference": "621c73f7dcb310b61de34d1da4c4204e8ace6ceb", + "shasum": "" + }, + "require": { + "paragonie/constant_time_encoding": "^1|^2|^3", + "paragonie/random_compat": "^1.4|^2.0|^9.99.99", + "php": ">=5.6.1" + }, + "require-dev": { + "phpunit/phpunit": "*" + }, + "suggest": { + "ext-dom": "Install the DOM extension to load XML formatted public keys.", + "ext-gmp": "Install the GMP (GNU Multiple Precision) extension in order to speed up arbitrary precision integer arithmetic operations.", + "ext-libsodium": "SSH2/SFTP can make use of some algorithms provided by the libsodium-php extension.", + "ext-mcrypt": "Install the Mcrypt extension in order to speed up a few other cryptographic operations.", + "ext-openssl": "Install the OpenSSL extension in order to speed up a wide variety of cryptographic operations." + }, + "type": "library", + "autoload": { + "files": [ + "phpseclib/bootstrap.php" + ], + "psr-4": { + "phpseclib3\\": "phpseclib/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jim Wigginton", + "email": "terrafrost@php.net", + "role": "Lead Developer" + }, + { + "name": "Patrick Monnerat", + "email": "pm@datasphere.ch", + "role": "Developer" + }, + { + "name": "Andreas Fischer", + "email": "bantu@phpbb.com", + "role": "Developer" + }, + { + "name": "Hans-Jürgen Petrich", + "email": "petrich@tronic-media.com", + "role": "Developer" + }, + { + "name": "Graham Campbell", + "email": "graham@alt-three.com", + "role": "Developer" + } + ], + "description": "PHP Secure Communications Library - Pure-PHP implementations of RSA, AES, SSH2, SFTP, X.509 etc.", + "homepage": "http://phpseclib.sourceforge.net", + "keywords": [ + "BigInteger", + "aes", + "asn.1", + "asn1", + "blowfish", + "crypto", + "cryptography", + "encryption", + "rsa", + "security", + "sftp", + "signature", + "signing", + "ssh", + "twofish", + "x.509", + "x509" + ], + "support": { + "issues": "https://github.com/phpseclib/phpseclib/issues", + "source": "https://github.com/phpseclib/phpseclib/tree/3.0.41" + }, + "funding": [ + { + "url": "https://github.com/terrafrost", + "type": "github" + }, + { + "url": "https://www.patreon.com/phpseclib", + "type": "patreon" + }, + { + "url": "https://tidelift.com/funding/github/packagist/phpseclib/phpseclib", + "type": "tidelift" + } + ], + "time": "2024-08-12T00:13:54+00:00" + }, { "name": "phrity/net-uri", "version": "1.3.0", diff --git a/conf/cmi/core.extension.yml b/conf/cmi/core.extension.yml index c1ad84e9..13281915 100644 --- a/conf/cmi/core.extension.yml +++ b/conf/cmi/core.extension.yml @@ -46,6 +46,7 @@ module: helfi_ckeditor: 0 helfi_etusivu_entities: 0 helfi_eu_cookie_compliance: 0 + helfi_google_api: 0 helfi_hakuvahti: 0 helfi_image_styles: 0 helfi_media: 0 diff --git a/conf/cmi/helfi_google_api.settings.yml b/conf/cmi/helfi_google_api.settings.yml new file mode 100644 index 00000000..f070b2dd --- /dev/null +++ b/conf/cmi/helfi_google_api.settings.yml @@ -0,0 +1,2 @@ +dry_run: true +indexing_api_key: '' diff --git a/public/modules/custom/helfi_google_api/README.md b/public/modules/custom/helfi_google_api/README.md new file mode 100644 index 00000000..7a7f11ae --- /dev/null +++ b/public/modules/custom/helfi_google_api/README.md @@ -0,0 +1,47 @@ +# Google indexing api + +The module handles job listing indexing and deindexing using API provided by Google. Module requires an api key to work. +Api key is only set to production environment since Google doesn't provide testing environment. + +* `drupal/scheduler` -module events are used to trigger the indexing requests. +* `google/apiclient` -library is used to handle the communication with Google api. + +Api documentation: https://developers.google.com/search/apis/indexing-api/v3/quickstart +Google library: https://github.com/googleapis/google-api-php-client +Api key: Check keyvault + +## Development / local testing + +### API KEY +You must set the api key in local.settings.php in order to use the module. Without it, the feature won't do anything at all. +Api key is set to local.settings.php like this: `$config['helfi_google_api.settings']['indexing_api_key'] = '{}'` +Instead of empty json object you can get the correct key from Keyvault. + +### Indexing requests + +The indexing api only allows sending urls pointing to hel.fi domain. Therefore you can properly test the features +on production environment. You can send production urls to indexing api from local environment if you have +the auth key set properly. + +Sending local url to google indexing api results in 4xx error. + + +## Requests + +### Indexing + +* Indexing request is tied to scheduler event +* A temporary redirect is created for the entity that is used for the indexing. + +### Deindexing + +* Deindexing request is tied to scheduler event +* The temporary redirect is removed when deindexing is done. + +### Status check + +* You can send a status request to google api to find out if an URL has been indexed or deleted through the API + + + + diff --git a/public/modules/custom/helfi_google_api/helfi_google_api.info.yml b/public/modules/custom/helfi_google_api/helfi_google_api.info.yml new file mode 100644 index 00000000..32fe007c --- /dev/null +++ b/public/modules/custom/helfi_google_api/helfi_google_api.info.yml @@ -0,0 +1,9 @@ +name: 'Helfi Google api' +type: module +description: 'Google indexing api integration' +package: HELfi +core_version_requirement: ^10 || ^11 +dependencies: + - 'publication_date:publication_date' + - redirect:redirect + - scheduler:scheduler diff --git a/public/modules/custom/helfi_google_api/helfi_google_api.services.yml b/public/modules/custom/helfi_google_api/helfi_google_api.services.yml new file mode 100644 index 00000000..9f389276 --- /dev/null +++ b/public/modules/custom/helfi_google_api/helfi_google_api.services.yml @@ -0,0 +1,34 @@ +services: + _defaults: + autoconfigure: true + autowire: true + + logger.channel.helfi_google_api: + parent: logger.channel_base + arguments: ['helfi_google_api'] + + Drupal\helfi_google_api\GoogleApi: + class: Drupal\helfi_google_api\GoogleApi + arguments: + - '@config.factory' + - '@helfi_google_api.google_service' + + Drupal\helfi_google_api\JobIndexingService: + class: Drupal\helfi_google_api\JobIndexingService + autowire: true + arguments: + $logger: '@logger.channel.helfi_google_api' + + Drupal\helfi_google_api\EventSubscriber\JobPublishStateSubscriber: ~ + + helfi_google_api.google_service: + public: false + class: \Google\Service\Indexing + factory: [ '@helfi_google_api.google_service_factory', 'create'] + arguments: + - '@config.factory' + + helfi_google_api.google_service_factory: + class: Drupal\helfi_google_api\GoogleServiceFactory + arguments: + - '@config.factory' diff --git a/public/modules/custom/helfi_google_api/src/Drush/Commands/GoogleIndexingApiCommands.php b/public/modules/custom/helfi_google_api/src/Drush/Commands/GoogleIndexingApiCommands.php new file mode 100644 index 00000000..6edb6576 --- /dev/null +++ b/public/modules/custom/helfi_google_api/src/Drush/Commands/GoogleIndexingApiCommands.php @@ -0,0 +1,202 @@ +io()->writeln('Entity not found or not instance of JobListing'); + return DrushCommands::EXIT_FAILURE; + } + + if (!$entity->hasTranslation($langcode)) { + $this->io()->writeln('Translation does not exist.'); + return DrushCommands::EXIT_FAILURE; + } + $entity = $entity->getTranslation($langcode); + + try { + $response = $this->jobIndexingService->indexEntity($entity); + } + catch (\Exception $e) { + $this->io()->error($e->getMessage()); + return DrushCommands::EXIT_FAILURE; + } + + return $this->handleResponse($response); + } + + /** + * Deindex single entity by id. + * + * @param int $entity_id + * The entity id. + * @param string $langcode + * The entity langcode. + * + * @return int + * The exit code. + */ + #[Command(name: 'helfi:google-single-entity-deindex')] + public function deindexSingleItem( + int $entity_id, + string $langcode = 'fi', + ) : int { + $entity = Node::load($entity_id); + + if (!$entity instanceof JobListing) { + $this->io()->writeln('Entity not found or not instance of JobListing'); + return DrushCommands::EXIT_FAILURE; + } + + if (!$entity->hasTranslation($langcode)) { + $this->io()->writeln('Translation does not exist.'); + return DrushCommands::EXIT_FAILURE; + } + $entity = $entity->getTranslation($langcode); + + try { + $response = $this->jobIndexingService->deindexEntity($entity); + } + catch (\Exception $e) { + $this->io()->error($e->getMessage()); + return DrushCommands::EXIT_FAILURE; + } + + return $this->handleResponse($response); + } + + /** + * Request url indexing status from Google api. + * + * @param string $url + * The url to check. + * + * @return int + * The exit code. + */ + #[Command(name: 'helfi:google-url-index-status')] + public function checkUrlIndexStatus(string $url): int { + try { + $response = $this->jobIndexingService->checkItemIndexStatus($url); + } + catch (\Exception $e) { + $this->io()->writeln($e->getMessage()); + return DrushCommands::EXIT_FAILURE; + } + + $this->io()->writeln($response); + return DrushCommands::EXIT_SUCCESS; + } + + /** + * Check entity indexing status. + * + * @param int $entity_id + * The entity id. + * @param string $langcode + * The language code. + * + * @return int + * The exit code. + */ + #[Command(name: 'helfi:google-entity-index-status')] + public function checkEntityIndexStatus(int $entity_id, $langcode = 'fi'): int { + $entity = Node::load($entity_id); + + if (!$entity instanceof JobListing) { + $this->io()->writeln('Entity not found or not instance of JobListing'); + return DrushCommands::EXIT_FAILURE; + } + + if (!$entity->hasTranslation($langcode)) { + $this->io()->writeln('Translation does not exist.'); + return DrushCommands::EXIT_FAILURE; + } + $entity = $entity->getTranslation($langcode); + + try { + $response = $this->jobIndexingService->checkEntityIndexStatus($entity); + } + catch (\Exception $e) { + $this->io()->writeln($e->getMessage()); + return DrushCommands::EXIT_FAILURE; + } + + $this->io()->writeln($response); + return DrushCommands::EXIT_SUCCESS; + } + + /** + * Handle response. + * + * @param \Drupal\helfi_google_api\Response $response + * The response object. + * + * @return int + * Exit code. + */ + private function handleResponse(Response $response): int { + if ($response->getErrors()) { + $this->io()->writeln('Request successful. Errors returned: ' . json_encode($response->getErrors())); + return DrushCommands::EXIT_FAILURE_WITH_CLARITY; + } + + if ($response->isDryRun()) { + $urls = $response->getUrls(); + $this->io()->writeln('The api request would have sent following data: ' . json_encode($urls)); + return DrushCommands::EXIT_SUCCESS; + } + + $this->io()->writeln('Url indexed succesfully.'); + return DrushCommands::EXIT_SUCCESS; + } + +} diff --git a/public/modules/custom/helfi_google_api/src/EventSubscriber/JobPublishStateSubscriber.php b/public/modules/custom/helfi_google_api/src/EventSubscriber/JobPublishStateSubscriber.php new file mode 100644 index 00000000..3710c84a --- /dev/null +++ b/public/modules/custom/helfi_google_api/src/EventSubscriber/JobPublishStateSubscriber.php @@ -0,0 +1,80 @@ + 'sendIndexingRequest', + SchedulerEvents::PUBLISH_IMMEDIATELY => 'sendIndexRequest', + SchedulerEvents::UNPUBLISH => 'sendDeindexingRequest', + ]; + } + + /** + * Send indexing request to google. + * + * @param \Drupal\scheduler\SchedulerEvent $event + * The scheduler event. + */ + public function sendIndexingRequest(SchedulerEvent $event): void { + $entity = $event->getNode(); + if (!$entity instanceof JobListing) { + return; + } + + try { + $this->jobIndexingService->indexEntity($entity); + } + catch (\Exception $exception) { + // Has been logged by indexing service. + } + } + + /** + * Send deindexing request to google. + * + * @param \Drupal\scheduler\SchedulerEvent $event + * The scheduler event. + */ + public function sendDeindexingRequest(SchedulerEvent $event): void { + $entity = $event->getNode(); + if (!$entity instanceof JobListing) { + return; + } + + try { + $this->jobIndexingService->deindexEntity($entity); + } + catch (\Exception) { + // Has been logged by indexing service. + } + } + +} diff --git a/public/modules/custom/helfi_google_api/src/GoogleApi.php b/public/modules/custom/helfi_google_api/src/GoogleApi.php new file mode 100644 index 00000000..bbcd2534 --- /dev/null +++ b/public/modules/custom/helfi_google_api/src/GoogleApi.php @@ -0,0 +1,142 @@ +configFactory->get('helfi_google_api.settings'); + $key = $config->get('indexing_api_key') ?: ''; + $dryRun = $config->get('dry_run') ?: TRUE; + + return !$key || $dryRun; + } + + /** + * Send indexing or deindexing request for urls. + * + * @param array $urls + * Array of urls to index or deindex. + * @param bool $update + * TRUE to index the urls, FALSE for deindexing. + * + * @return Response + * Object which holds the handled urls and request errors. + */ + public function indexBatch(array $urls, bool $update): Response { + if ($this->isDryRun()) { + return new Response($urls, dryRun: TRUE); + } + + $batch = $this->indexingService->createBatch(); + $operation = $update ? self::UPDATE : self::DELETE; + + foreach ($urls as $url) { + $content = [ + 'type' => $operation, + 'url' => $url, + ]; + + $request = new Request( + method: 'POST', + uri: self::PUBLISH_ENDPOINT, + headers: ['Content-Type' => 'multipart/mixed'], + body: json_encode($content) + ); + + $batch->add($request); + } + + $responses = $batch->execute(); + + $errors = []; + foreach ($responses as $key => $response) { + if ($response instanceof Exception) { + $errors[] = "$key: {$response->getMessage()}"; + } + } + + return new Response($urls, $errors); + } + + /** + * Request url indexing status. + * + * Returns the dates of last update and delete requests. + * For debugging purposes only, since it spends the quota. + * + * @param string $url + * The url which indexing status you want to request. + * + * @return string + * The response as a string. + */ + public function checkIndexingStatus(string $url): string { + $client = $this->indexingService->getClient(); + $client->setUseBatch(FALSE); + + if ($this->isDryRun()) { + return "Dry running index status query with url: $url"; + } + + $client = $client->authorize(); + + $baseUrl = self::METADATA_ENDPOINT; + $query_parameter = '?url=' . urlencode($url); + $theUrl = $baseUrl . $query_parameter; + + $result = $client->request('GET', $theUrl); + return $result->getBody()->getContents(); + } + +} diff --git a/public/modules/custom/helfi_google_api/src/GoogleServiceFactory.php b/public/modules/custom/helfi_google_api/src/GoogleServiceFactory.php new file mode 100644 index 00000000..fd9e00ef --- /dev/null +++ b/public/modules/custom/helfi_google_api/src/GoogleServiceFactory.php @@ -0,0 +1,47 @@ +get('helfi_google_api.settings'); + $key = $config->get('indexing_api_key') ?: ''; + + $client = new GoogleClient(); + $client->setApplicationName('Helfi_Rekry'); + $client->addScope(self::SCOPES); + $client->setUseBatch(TRUE); + + if ($key) { + $client->setAuthConfig(json_decode($key, TRUE)); + $client->authorize(); + } + + return new Indexing($client); + } + +} diff --git a/public/modules/custom/helfi_google_api/src/JobIndexingService.php b/public/modules/custom/helfi_google_api/src/JobIndexingService.php new file mode 100644 index 00000000..99bf0bf6 --- /dev/null +++ b/public/modules/custom/helfi_google_api/src/JobIndexingService.php @@ -0,0 +1,365 @@ +googleApi->indexBatch($urls, $update); + $this->handleDebugMessage($response); + return $response; + } + catch (GuzzleException $e) { + $message = "Request failed with code {$e->getCode()}: {$e->getMessage()}"; + $this->logger->error($message); + throw new \Exception($message); + } + } + + /** + * Send indexing request to google. + * + * @param \Drupal\helfi_rekry_content\Entity\JobListing $entity + * Entity which indexing should be requested. + * + * @return \Drupal\helfi_google_api\Response + * The response object. + */ + public function indexEntity(JobListing $entity): Response { + $langcode = $entity->language()->getId(); + + $hasRedirect = $this->hasTemporaryRedirect($entity, $langcode); + if ($hasRedirect) { + throw new \Exception('Already indexed.'); + } + + // Create temporary redirect for the entity. + $redirectArray = $this->createTemporaryRedirectUrl($entity, $langcode); + $indexing_url = $redirectArray['indexing_url']; + $redirect = $redirectArray['redirect']; + + try { + $result = $this->handleIndexingRequest([$indexing_url], TRUE); + } + catch (\Exception $e) { + // If the request fails, remove the redirect. + $redirect->delete(); + throw $e; + } + + if ($result->getErrors()) { + $total = count($result->getUrls()); + $errorCount = count($result->getErrors()); + $errorsString = json_encode($result->getErrors()); + $this->logger->error(("Unable to index $errorCount/$total items: $errorsString")); + } + + return $result; + } + + /** + * Handle entity deindexing request. + * + * @param \Drupal\helfi_rekry_content\Entity\JobListing $entity + * Entity to request deindexing for. + * + * @return \Drupal\helfi_google_api\Response + * The response object. + */ + public function deindexEntity(JobListing $entity): Response { + $language = $entity->language(); + $redirect = $this->getExistingTemporaryRedirect($entity, $language->getId()); + if (!$redirect) { + $message = "Entity of id {$entity->id()} doesn't have the required temporary redirect."; + $this->logger->error($message); + throw new \Exception($message); + } + + $base_url = $this->urlGenerator->generateFromRoute( + '', + [], + [ + 'absolute' => TRUE, + 'language' => $language, + ] + ); + + $url_to_deindex = $base_url . $redirect->getSourceUrl(); + try { + $result = $this->handleIndexingRequest([$url_to_deindex], FALSE); + } + catch (\Exception $e) { + throw $e; + } + + // No need to delete redirects on debug run. + if (!$result->isDryRun()) { + $redirect->delete(); + } + + if ($result->getErrors()) { + $total = count($result->getUrls()); + $errorCount = count($result->getErrors()); + $errorsString = json_encode($result->getErrors()); + $this->logger->error(("Unable to index $errorCount/$total items: $errorsString")); + } + + return $result; + } + + /** + * Check url indexing status. + * + * Status check request uses the api quota. + * + * @param string $url + * An url to check. + * + * @return string + * Status as a string. + */ + public function checkItemIndexStatus(string $url): string { + return $this->googleApi->checkIndexingStatus($url); + } + + /** + * If entity seems to be indexed, send a status query. + * + * Status check request uses the api quota. + * + * @param \Drupal\helfi_rekry_content\Entity\JobListing $entity + * Entity to check. + * + * @return string + * The url index status as a string. + */ + public function checkEntityIndexStatus(JobListing $entity): string { + $language = $entity->language(); + + $baseUrl = $this->urlGenerator->generateFromRoute('', [], ['absolute' => TRUE, 'language' => $language]); + $job_alias = $this->aliasManager->getAliasByPath("/node/{$entity->id()}", $language->getId()); + + $query = $this->entityTypeManager->getStorage('redirect')->getQuery(); + $redirectIds = $query->condition('redirect_redirect__uri', "internal:/node/{$entity->id()}") + ->condition('status_code', 301) + ->condition('language', $language->getId()) + ->accessCheck(FALSE) + ->execute(); + + // Get the indexed redirect. + $redirects = Redirect::loadMultiple($redirectIds); + foreach ($redirects as $redirect) { + $source = $redirect->getSourceUrl(); + + if (str_contains($source, "$job_alias-")) { + $correct_redirect = $redirect; + break; + } + } + + if (!isset($correct_redirect)) { + throw new \Exception('Entity doesn\'t have temporary redirect.'); + } + + $url_to_check = $baseUrl . $correct_redirect->getSourceUrl(); + try { + return $this->googleApi->checkIndexingStatus($url_to_check); + } + catch (GuzzleException $e) { + $this->logger->error("Request failed with code {$e->getCode()}: {$e->getMessage()}"); + throw new \Exception($e->getMessage()); + } + catch (\Exception $e) { + $this->logger->error('Error while checking indexing status: ' . $e->getMessage()); + throw $e; + } + + } + + /** + * Does the entity have a temporary redirect. + * + * Temporary redirect is created for all entities before requesting indexing. + * Once delete-request is sent, the url cannot be indexed again using the api. + * Hence we should not use the original url. + * + * @param \Drupal\helfi_rekry_content\Entity\JobListing $entity + * The entity to check. + * @param string $langcode + * The language code. + * + * @return bool + * Has temporary redirect. + */ + public function hasTemporaryRedirect(JobListing $entity, string $langcode): bool { + // In case of dry run, we can always say FALSE. + if ($this->googleApi->isDryRun()) { + return FALSE; + } + + $job_alias = $this->getEntityAlias($entity, $langcode); + + $query = $this->entityTypeManager->getStorage('redirect') + ->getQuery(); + + $redirectIds = $query->condition('redirect_redirect__uri', "internal:/node/{$entity->id()}") + ->condition('status_code', 301) + ->condition('language', $langcode) + ->accessCheck(FALSE) + ->execute(); + $redirects = Redirect::loadMultiple($redirectIds); + + foreach ($redirects as $redirect) { + $source = $redirect->getSourceUrl(); + + if (str_contains($source, "$job_alias-")) { + return TRUE; + } + } + + return FALSE; + } + + /** + * Create a redirect for the indexing request. + * + * Temporary redirect is created for all entities before requesting indexing. + * Once delete request is sent, the url cannot be indexed again using the api. + * Hence we should not use the original url. + * + * @param \Drupal\helfi_rekry_content\Entity\JobListing $entity + * The entity to index. + * @param string $langcode + * The language code. + * + * @return array + * Indexing_url as the absolute url and the redirect object. + */ + public function createTemporaryRedirectUrl(JobListing $entity, string $langcode): array { + $alias = $this->getEntityAlias($entity, $langcode); + $now = strtotime('now'); + $temp_alias = "$alias-$now"; + $indexing_url = "{$entity->toUrl()->setAbsolute()->toString()}-$now"; + + $redirect = Redirect::create([ + 'redirect_source' => ltrim($temp_alias, '/'), + 'redirect_redirect' => "internal:/node/{$entity->id()}", + 'language' => $langcode, + 'status_code' => 301, + ]); + + // Only save the redirect if module set up properly. + if (!$this->googleApi->isDryRun()) { + $redirect->save(); + } + + return ['indexing_url' => $indexing_url, 'redirect' => $redirect]; + } + + /** + * Get the temporary redirect url. + * + * @param \Drupal\helfi_rekry_content\Entity\JobListing $entity + * The entity to index. + * @param string $langcode + * The language code. + * + * @return \Drupal\redirect\Entity\Redirect|null + * The redirect object. + */ + public function getExistingTemporaryRedirect(JobListing $entity, string $langcode): Redirect|null { + $job_alias = $this->getEntityAlias($entity, $langcode); + + $query = $this->entityTypeManager->getStorage('redirect') + ->getQuery(); + + $redirectIds = $query->condition('redirect_redirect__uri', "internal:/node/{$entity->id()}") + ->condition('status_code', 301) + ->condition('language', $langcode) + ->accessCheck(FALSE) + ->execute(); + $redirects = Redirect::loadMultiple($redirectIds); + + // For debugging purposes, debugging won't save the redirect. + if (!$redirects && $this->googleApi->isDryRun()) { + return $this->createTemporaryRedirectUrl($entity, $langcode)['redirect']; + } + + if (!$redirects) { + return NULL; + } + + foreach ($redirects as $redirect) { + $source = $redirect->getSourceUrl(); + + if (str_contains($source, "$job_alias-")) { + return $redirect; + } + } + return NULL; + } + + /** + * Get the alias for an entity. + * + * @param \Drupal\helfi_rekry_content\Entity\JobListing $entity + * The entity. + * @param string $langcode + * The language code. + * + * @return string + * Alias for the entity. + */ + private function getEntityAlias(JobListing $entity, string $langcode): string { + return $this->aliasManager->getAliasByPath("/node/{$entity->id()}", $langcode); + } + + /** + * Send debug message if in debug mode. + * + * @param Response $response + * The response. + */ + private function handleDebugMessage(Response $response): void { + if ($response->isDryRun()) { + $this->logger->debug('Request would have sent following urls to api: ' . json_encode($response->getUrls())); + } + } + +} diff --git a/public/modules/custom/helfi_google_api/src/Response.php b/public/modules/custom/helfi_google_api/src/Response.php new file mode 100644 index 00000000..648c595d --- /dev/null +++ b/public/modules/custom/helfi_google_api/src/Response.php @@ -0,0 +1,62 @@ +urls; + } + + /** + * Get the errors. + * + * @return array + * Errors for each url. + */ + public function getErrors(): array { + return $this->errors; + } + + /** + * The request was not actually sent. + * + * Either the api key is not set or + * in settings config, enabled in false. + * + * @return bool + * This is a debug run. + */ + public function isDryRun(): bool { + return $this->dryRun; + } + +} diff --git a/public/modules/custom/helfi_google_api/tests/src/Kernel/IndexingTest.php b/public/modules/custom/helfi_google_api/tests/src/Kernel/IndexingTest.php new file mode 100644 index 00000000..e69de29b diff --git a/public/modules/custom/helfi_rekry_content/src/Entity/JobListing.php b/public/modules/custom/helfi_rekry_content/src/Entity/JobListing.php index f4e5efd1..f466a7bb 100644 --- a/public/modules/custom/helfi_rekry_content/src/Entity/JobListing.php +++ b/public/modules/custom/helfi_rekry_content/src/Entity/JobListing.php @@ -7,7 +7,7 @@ use Drupal\node\Entity\Node; /** - * Bundle class for hel_map paragraph. + * Bundle class for JobListing paragraph. */ class JobListing extends Node { diff --git a/public/sites/default/production.settings.php b/public/sites/default/production.settings.php index 7a1ea057..ef524b4f 100644 --- a/public/sites/default/production.settings.php +++ b/public/sites/default/production.settings.php @@ -2,3 +2,7 @@ $config['openid_connect.client.tunnistamo']['settings']['is_production'] = TRUE; $config['helfi_proxy.settings']['tunnistamo_return_url'] = '/fi/avoimet-tyopaikat/openid-connect/tunnistamo'; +$config['helfi_google_api.settings']['indexing_api_key'] = getenv('GOOGLE_INDEXING_API_KEY'); + +// Remove the comment when it's time to enable the feature on production +// $config['helfi_google_api.settings']['dry_run'] = FALSE;