Skip to content

Commit

Permalink
Merge pull request #45 from luyadev/trans
Browse files Browse the repository at this point in the history
transaction
  • Loading branch information
nadar authored Apr 28, 2022
2 parents c4e39d0 + f520fa6 commit 2f94a84
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 38 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/).
In order to read more about upgrading and BC breaks have a look at the [UPGRADE Document](UPGRADE.md).

## 3.4.1 (28. April 2022)

+ [#45](https://github.com/luyadev/luya-module-crawler/pull/45) Use transaction to sync index table when crawler finish the process.

## 3.4.0 (5. April 2022)

+ Updated deps to latest version of `smalot/pdfparser` parser which now requires at least version php 7.1. Therefore raise php version requirements for luya module crawler to version 7.1 to (which is outdated for a long time already: https://www.php.net/supported-versions.php)
Expand Down
86 changes: 48 additions & 38 deletions src/crawler/ResultHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use Nadar\Crawler\Crawler;
use Nadar\Crawler\Interfaces\HandlerInterface;
use Nadar\Crawler\Result;
use Yii;
use yii\helpers\Console;

/**
Expand Down Expand Up @@ -85,48 +86,57 @@ public function afterRun(Result $result)
*/
public function onEnd(Crawler $crawler)
{
$keepIndexIds = [];

$total = (int) Builderindex::find()->count();
$i = 0;
if ($this->controller->verbose) {
Console::startProgress(0, $total, 'synchronize index: ', false);
}
foreach (Builderindex::find()->batch() as $batch) {
foreach ($batch as $builderIndex) {
$index = Index::findOne(['url' => $builderIndex->url]);

if (!$index) {
$index = new Index();
$index->added_to_index = time();
}

$index->url = $builderIndex->url;
$index->title = $builderIndex->title;
$index->description = $builderIndex->description;
$index->content = $builderIndex->content;
$index->language_info = $builderIndex->language_info;
$index->last_update = time();
$index->url_found_on_page = $builderIndex->url_found_on_page;
$index->group = $builderIndex->group;
$index->save();

$keepIndexIds[] = $index->id;
unset($index, $builderIndex);
$i++;

if ($this->controller->verbose) {
Console::updateProgress($i, $total);
$transaction = Yii::$app->db->beginTransaction();
try {
$keepIndexIds = [];

$total = (int) Builderindex::find()->count();
$i = 0;
if ($this->controller->verbose) {
Console::startProgress(0, $total, 'synchronize index: ', false);
}
foreach (Builderindex::find()->batch() as $batch) {
foreach ($batch as $builderIndex) {
$index = Index::findOne(['url' => $builderIndex->url]);

if (!$index) {
$index = new Index();
$index->added_to_index = time();
}

$index->url = $builderIndex->url;
$index->title = $builderIndex->title;
$index->description = $builderIndex->description;
$index->content = $builderIndex->content;
$index->language_info = $builderIndex->language_info;
$index->last_update = time();
$index->url_found_on_page = $builderIndex->url_found_on_page;
$index->group = $builderIndex->group;
$index->save();

$keepIndexIds[] = $index->id;
unset($index, $builderIndex);
$i++;

if ($this->controller->verbose) {
Console::updateProgress($i, $total);
}
}
}
}

Index::deleteAll(['not in', 'id', $keepIndexIds]);
Index::deleteAll(['not in', 'id', $keepIndexIds]);

if ($this->controller->verbose) {
Console::endProgress("done." . PHP_EOL);
if ($this->controller->verbose) {
Console::endProgress("done." . PHP_EOL);
}
$transaction->commit();
unset($batch);
} catch (\Exception $e) {
$transaction->rollBack();
throw $e;
} catch (\Throwable $e) {
$transaction->rollBack();
throw $e;
}

unset($batch);
}
}

0 comments on commit 2f94a84

Please sign in to comment.