Skip to content

Commit

Permalink
refactoring, reduce dry, ToDo: fix counting of the last items "while …
Browse files Browse the repository at this point in the history
…($cursor < $this->total)" end on 2400 by 2412 items
  • Loading branch information
thomas-sc committed May 15, 2024
1 parent f7f95e0 commit a1715dd
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 110 deletions.
206 changes: 97 additions & 109 deletions Classes/Command/IndexCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use Slub\LisztCommon\Common\ElasticClientBuilder;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
Expand All @@ -43,29 +44,37 @@ class IndexCommand extends Command
protected int $total;
protected Collection $locales;
protected Collection $localizedCitations;
protected InputInterface $input;

function __construct(SiteFinder $siteFinder)
{
parent::__construct();

$this->locales = Collection::wrap($siteFinder->getAllSites())->
map(function (Site $site): array { return $site->getLanguages(); })->
flatten()->
map(function (SiteLanguage $language): string { return $language->getHreflang(); });
map(function (Site $site): array {
return $site->getLanguages();
})->
flatten()->
map(function (SiteLanguage $language): string {
return $language->getHreflang();
});
}

protected function getRequest(): ServerRequestInterface
{
// ToDo: $GLOBALS['TYPO3_REQUEST'] was deprecated in TYPO3 v9.2 and will be removed in a future version.
return $GLOBALS['TYPO3_REQUEST'];
}

protected function configure(): void
{
$this->setDescription('Create elasticsearch index from zotero bibliography');
$this->setDescription('Create elasticsearch index from zotero bibliography')
->addOption('fetch-citations', null, InputOption::VALUE_NONE, 'Run with fetch localized citations');
}

protected function initialize(InputInterface $input, OutputInterface $output) {
$this->extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('liszt_bibliography');
protected function initialize(InputInterface $input, OutputInterface $output)
{
$this->extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('liszt_bibliography');
$this->client = ElasticClientBuilder::getClient();
$this->apiKey = $this->extConf['zoteroApiKey'];
$this->io = new SymfonyStyle($input, $output);
Expand All @@ -75,12 +84,14 @@ protected function initialize(InputInterface $input, OutputInterface $output) {
protected function execute(InputInterface $input, OutputInterface $output): int
{
// get bulk size and total size
$this->bulkSize = (int) $this->extConf['zoteroBulkSize'];

$this->bulkSize = (int)$this->extConf['zoteroBulkSize'];
$this->input = $input;
$this->io->section('Fetching Bibliography Data');
$this->fetchBibliography();
$this->io->section('Fetching Localized Citations');
$this->fetchCitations();
if ($input->getOption('fetch-citations')) {
$this->io->section('Fetching Localized Citations');
$this->fetchCitations();
}
$this->io->section('Fetching TEI Data');
$this->fetchTeiData();
$this->io->section('Building Datasets');
Expand All @@ -94,23 +105,25 @@ protected function buildDataSets(): void
{
$this->io->progressStart($this->total);
$this->dataSets = $this->bibliographyItems->
map(function($bibliographyItem) {
$this->io->progressAdvance();
return self::buildDataSet($bibliographyItem, $this->localizedCitations, $this->teiDataSets);
});
map(function ($bibliographyItem) {
$this->io->progressAdvance();
return self::buildDataSet($bibliographyItem, $this->input->getOption('fetch-citations') ? $this->localizedCitations : null, $this->teiDataSets);
});
$this->io->progressFinish();
}

protected static function buildDataSet(
array $bibliographyItem,
Collection $localizedCitations,
Collection $teiDataSets
array $bibliographyItem,
?Collection $localizedCitations,
Collection $teiDataSets
)
{
$key = $bibliographyItem['key'];
$bibliographyItem['localizedCitations'] = [];
foreach ($localizedCitations as $locale => $localizedCitation) {
$bibliographyItem['localizedCitations'][$locale] = $localizedCitation->get($key)['citation'];
if ($localizedCitations) {
foreach ($localizedCitations as $locale => $localizedCitation) {
$bibliographyItem['localizedCitations'][$locale] = $localizedCitation->get($key)['citation'];
}
}
$bibliographyItem['tei'] = $teiDataSets->get($key);
return $bibliographyItem;
Expand All @@ -119,33 +132,26 @@ protected static function buildDataSet(
protected function fetchBibliography(): void
{
$client = new ZoteroApi($this->extConf['zoteroApiKey']);
$response = $client->
$this->bibliographyItems = new Collection();
$this->total = 1;
$cursor = 0;
// fetch bibliography items bulkwise
while ($cursor < $this->total) {
$response = $client->
group($this->extConf['zoteroGroupId'])->
items()->
top()->
limit(1)->
start($cursor)->
limit($this->bulkSize)->
send();
$this->total = (int) $response->getHeaders()['Total-Results'][0];

// fetch bibliography items bulkwise
$this->io->progressStart($this->total);
$collection = new Collection($response->getBody());
$this->bibliographyItems = $collection->
pluck('data');

$cursor = $this->bulkSize;
while ($cursor < $this->total) {
$this->io->progressAdvance($this->bulkSize);
$response = $client->
group($this->extConf['zoteroGroupId'])->
items()->
top()->
start($cursor)->
limit($this->bulkSize)->
send();
$this->total = (int)$response->getHeaders()['Total-Results'][0];
if ($cursor === 0) {
$this->io->progressStart($this->total);
}
$collection = new Collection($response->getBody());
$this->bibliographyItems = $this->bibliographyItems->
concat($collection->pluck('data'));
concat($collection->pluck('data'));
$this->io->progressAdvance($this->bulkSize);
$cursor += $this->bulkSize;
}
$this->io->progressFinish();
Expand All @@ -154,97 +160,79 @@ protected function fetchBibliography(): void
protected function fetchCitations(): void
{
$this->localizedCitations = new Collection();
$this->locales->each(function($locale) { $this->fetchCitationLocale($locale); });
$this->locales->each(function ($locale) {
$this->fetchCitationLocale($locale);
});
}

protected function fetchCitationLocale(string $locale): void
{
$client = new ZoteroApi($this->extConf['zoteroApiKey']);
$style = $this->extConf['zoteroStyle'];
$response = $client->
group($this->extConf['zoteroGroupId'])->
items()->
top()->
limit(1)->
setInclude('citation')->
setStyle($style)->
setLinkwrap()->
setLocale($locale)->
send();

// fetch bibliography items bulkwise
$this->io->text($locale);
$this->io->progressStart($this->total);
$result = Collection::wrap($response->getBody())->keyBy('key');

$cursor = $this->bulkSize;
$result = new Collection();
$cursor = 0;
// fetch bibliography items bulkwise
while ($cursor < $this->total) {
if ($cursor === 0) {
$this->io->progressStart($this->total);
}
try {
$response = $client->
group($this->extConf['zoteroGroupId'])->
items()->
top()->
start($cursor)->
limit($this->bulkSize)->
setInclude('citation')->
setStyle($style)->
setLinkwrap()->
setLocale($locale)->
send();
group($this->extConf['zoteroGroupId'])->
items()->
top()->
start($cursor)->
limit($this->bulkSize)->
setInclude('citation')->
setStyle($style)->
setLinkwrap()->
setLocale($locale)->
send();
$result = $result->merge(Collection::wrap($response->getBody())->keyBy('key'));
$this->io->progressAdvance($this->bulkSize);
$cursor += $this->bulkSize;
} catch (\Exception $e) {
$this->io->newline(2);
$this->io->caution($e->getMessage());

$this->io->note('Stay calm. This is normal for Zotero\'s API. I\'m trying it again. fetchCitationLocale');
}
$this->io->progressAdvance($this->bulkSize);
}

$this->localizedCitations = $this->localizedCitations->merge(
new Collection([ $locale => $result ])
new Collection([$locale => $result])
);
$this->io->progressFinish();
}

protected function fetchTeiData(): void
{
$client = new ZoteroApi($this->extConf['zoteroApiKey']);
$response = $client->
group($this->extConf['zoteroGroupId'])->
items()->
top()->
limit(1)->
setInclude('tei')->
send();

$this->teiDataSets = new Collection();
$cursor = 0;
// fetch bibliography items bulkwise
$this->io->progressStart($this->total);
$collection = new Collection($response->getBody());
$this->teiDataSets = $collection->keyBy('key');

$cursor = $this->bulkSize;
while ($cursor < $this->total) {
if ($cursor === 0) {
$this->io->progressStart($this->total);
}
try {
$response = $client->
group($this->extConf['zoteroGroupId'])->
items()->
top()->
start($cursor)->
limit($this->bulkSize)->
setInclude('tei')->
send();
group($this->extConf['zoteroGroupId'])->
items()->
top()->
start($cursor)->
limit($this->bulkSize)->
setInclude('tei')->
send();
$collection = new Collection($response->getBody());
$this->teiDataSets = $this->teiDataSets->
concat($collection->keyBy('key'));
concat($collection->keyBy('key'));
$this->io->progressAdvance($this->bulkSize);
$cursor += $this->bulkSize;
} catch (\Exception $e) {
$this->io->newline(2);
$this->io->caution($e->getMessage());
$this->io->note('Stay calm. This is normal for Zotero\'s API. I\'m trying it again. fetchTeiData');
}
$this->io->progressAdvance($this->bulkSize);
}
$this->io->progressFinish();
}
Expand All @@ -259,18 +247,18 @@ protected function commitBibliography(): void

// index params -> mapping fields for facetting in index
// Todo: optimize with synthetic _source and copy fields?
/* $elasticIndexMappings = [
'index' => ['index' => $index],
'body' => [
'mappings' => [
'properties' => [
'itemType' => [
'type' => 'keyword',
/* $elasticIndexMappings = [
'index' => ['index' => $index],
'body' => [
'mappings' => [
'properties' => [
'itemType' => [
'type' => 'keyword',
]
]
]
]
]
]
];*/
];*/

/* For more recent versions of Elasticsearch (8.x),
a call to $client->indices()->exists($indexParams) no longer returns a boolean,
Expand All @@ -283,20 +271,20 @@ protected function commitBibliography(): void
}
} catch (\Exception $e) {
if ($e->getCode() === 404) {
echo 'code='.$e->getCode();
$this->io->note("Index: " .$index. " not exist. Try to create new index");
echo 'code=' . $e->getCode();
$this->io->note("Index: " . $index . " not exist. Try to create new index");
$this->client->indices()->create(['index' => $index]);
} else {
$this->io->error("Exception: " . $e->getMessage());
exit; // or die(); // eventually clean memory
}
}

$params = [ 'body' => [] ];
$params = ['body' => []];
$bulkCount = 0;
foreach ($this->dataSets as $document) {
$this->io->progressAdvance();
$params['body'][] = [ 'index' =>
$params['body'][] = ['index' =>
[
'_index' => $index,
'_id' => $document['key']
Expand All @@ -306,11 +294,11 @@ protected function commitBibliography(): void

if (!(++$bulkCount % $this->extConf['elasticBulkSize'])) {
$this->client->bulk($params);
$params = [ 'body' => [] ];
$params = ['body' => []];
}
}
$this->io->progressFinish();
$this->client->bulk($params);
// $this->client->bulk($params);

$this->io->text('done');
}
Expand Down
2 changes: 1 addition & 1 deletion Configuration/Services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ services:
-
name: console.command
command: 'liszt-bibliography:index'
description: 'Create elasticsearch index from zotero bibliography'
description: 'Create elasticsearch index from zotero bibliography, options: --fetch-citations'
schedulable: true

0 comments on commit a1715dd

Please sign in to comment.