diff --git a/CHANGELOG.md b/CHANGELOG.md index dbbb02b..e3420ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). In order to read more about upgrading and BC breaks have a look at the [UPGRADE Document](UPGRADE.md). +## 3.4.1 (28. April 2022) + ++ []() Use transaction to sync index table when crawler finish the process. + ## 3.4.0 (5. April 2022) + Updated deps to latest version of `smalot/pdfparser` parser which now requires at least version php 7.1. Therefore raise php version requirements for luya module crawler to version 7.1 to (which is outdated for a long time already: https://www.php.net/supported-versions.php) diff --git a/src/crawler/ResultHandler.php b/src/crawler/ResultHandler.php index b3538c5..52e00af 100644 --- a/src/crawler/ResultHandler.php +++ b/src/crawler/ResultHandler.php @@ -9,6 +9,7 @@ use Nadar\Crawler\Crawler; use Nadar\Crawler\Interfaces\HandlerInterface; use Nadar\Crawler\Result; +use Yii; use yii\helpers\Console; /** @@ -85,48 +86,57 @@ public function afterRun(Result $result) */ public function onEnd(Crawler $crawler) { - $keepIndexIds = []; - - $total = (int) Builderindex::find()->count(); - $i = 0; - if ($this->controller->verbose) { - Console::startProgress(0, $total, 'synchronize index: ', false); - } - foreach (Builderindex::find()->batch() as $batch) { - foreach ($batch as $builderIndex) { - $index = Index::findOne(['url' => $builderIndex->url]); - - if (!$index) { - $index = new Index(); - $index->added_to_index = time(); - } - - $index->url = $builderIndex->url; - $index->title = $builderIndex->title; - $index->description = $builderIndex->description; - $index->content = $builderIndex->content; - $index->language_info = $builderIndex->language_info; - $index->last_update = time(); - $index->url_found_on_page = $builderIndex->url_found_on_page; - $index->group = $builderIndex->group; - $index->save(); - - $keepIndexIds[] = $index->id; - unset($index, $builderIndex); - $i++; - - if ($this->controller->verbose) { - Console::updateProgress($i, $total); + $transaction = Yii::$app->db->beginTransaction(); + try { + $keepIndexIds = []; + + $total = (int) Builderindex::find()->count(); + $i = 0; + if ($this->controller->verbose) { + Console::startProgress(0, $total, 'synchronize index: ', false); + } + foreach (Builderindex::find()->batch() as $batch) { + foreach ($batch as $builderIndex) { + $index = Index::findOne(['url' => $builderIndex->url]); + + if (!$index) { + $index = new Index(); + $index->added_to_index = time(); + } + + $index->url = $builderIndex->url; + $index->title = $builderIndex->title; + $index->description = $builderIndex->description; + $index->content = $builderIndex->content; + $index->language_info = $builderIndex->language_info; + $index->last_update = time(); + $index->url_found_on_page = $builderIndex->url_found_on_page; + $index->group = $builderIndex->group; + $index->save(); + + $keepIndexIds[] = $index->id; + unset($index, $builderIndex); + $i++; + + if ($this->controller->verbose) { + Console::updateProgress($i, $total); + } } } - } - Index::deleteAll(['not in', 'id', $keepIndexIds]); + Index::deleteAll(['not in', 'id', $keepIndexIds]); - if ($this->controller->verbose) { - Console::endProgress("done." . PHP_EOL); + if ($this->controller->verbose) { + Console::endProgress("done." . PHP_EOL); + } + $transaction->commit(); + unset($batch); + } catch (\Exception $e) { + $transaction->rollBack(); + throw $e; + } catch (\Throwable $e) { + $transaction->rollBack(); + throw $e; } - - unset($batch); } }