From f7a97f5134d36845c5c0ad120b9d444236fef45f Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Wed, 26 Jun 2024 13:08:49 +0200 Subject: [PATCH 1/3] Model TaggerStore after TagsetStore --- .../org/ivdnt/galahad/FileBackedValue.kt | 1 - .../org/ivdnt/galahad/data/corpus/Corpus.kt | 8 +- .../galahad/jobs/InternalJobController.kt | 6 +- .../main/kotlin/org/ivdnt/galahad/jobs/Job.kt | 615 +++++++++--------- .../kotlin/org/ivdnt/galahad/jobs/Jobs.kt | 6 +- .../kotlin/org/ivdnt/galahad/jobs/State.kt | 4 +- .../ivdnt/galahad/port/LayerTransformer.kt | 6 +- .../org/ivdnt/galahad/taggers/Tagger.kt | 34 + .../org/ivdnt/galahad/taggers/TaggerStore.kt | 59 ++ .../org/ivdnt/galahad/taggers/Taggers.kt | 89 --- .../galahad/taggers/TaggersController.kt | 24 +- .../kotlin/org/ivdnt/galahad/jobs/JobsTest.kt | 4 +- ...erTest.kt => TaggerStoreControllerTest.kt} | 2 +- 13 files changed, 428 insertions(+), 430 deletions(-) create mode 100644 server/src/main/kotlin/org/ivdnt/galahad/taggers/Tagger.kt create mode 100644 server/src/main/kotlin/org/ivdnt/galahad/taggers/TaggerStore.kt delete mode 100644 server/src/main/kotlin/org/ivdnt/galahad/taggers/Taggers.kt rename server/src/test/kotlin/org/ivdnt/galahad/taggers/{TaggersControllerTest.kt => TaggerStoreControllerTest.kt} (97%) diff --git a/server/src/main/kotlin/org/ivdnt/galahad/FileBackedValue.kt b/server/src/main/kotlin/org/ivdnt/galahad/FileBackedValue.kt index 8dccae2..03bfe0c 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/FileBackedValue.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/FileBackedValue.kt @@ -9,7 +9,6 @@ import java.io.File val mapper: ObjectMapper by lazy { ObjectMapper() } -const val LOCK_SLEEP_TIME = 100L // ms to sleep before retrying to access locked file. abstract class FileBackedCache( file: File, diff --git a/server/src/main/kotlin/org/ivdnt/galahad/data/corpus/Corpus.kt b/server/src/main/kotlin/org/ivdnt/galahad/data/corpus/Corpus.kt index ba70eb3..ea22361 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/data/corpus/Corpus.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/data/corpus/Corpus.kt @@ -13,7 +13,7 @@ import org.ivdnt.galahad.data.document.SOURCE_LAYER_NAME import org.ivdnt.galahad.jobs.Jobs import org.ivdnt.galahad.port.CmdiMetadata import org.ivdnt.galahad.port.CorpusTransformMetadata -import org.ivdnt.galahad.taggers.Taggers +import org.ivdnt.galahad.taggers.Tagger import org.ivdnt.galahad.util.createZipFile import java.io.File import java.io.OutputStream @@ -94,10 +94,10 @@ class Corpus( override fun expensiveGet() = metadataCache.get() } - val sourceTagger: ExpensiveGettable = object : ExpensiveGettable { - override fun expensiveGet(): Taggers.Summary { + val sourceTagger: ExpensiveGettable = object : ExpensiveGettable { + override fun expensiveGet(): Tagger { val metadata = metadata.expensiveGet() - return Taggers.Summary( + return Tagger( id = SOURCE_LAYER_NAME, description = "uploaded annotations", tagset = metadata.tagset, diff --git a/server/src/main/kotlin/org/ivdnt/galahad/jobs/InternalJobController.kt b/server/src/main/kotlin/org/ivdnt/galahad/jobs/InternalJobController.kt index ff93245..a5e5f2c 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/jobs/InternalJobController.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/jobs/InternalJobController.kt @@ -11,7 +11,7 @@ import org.ivdnt.galahad.data.layer.Layer import org.ivdnt.galahad.port.InternalFile import org.ivdnt.galahad.port.SourceLayerableFile import org.ivdnt.galahad.port.tsv.TSVFile -import org.ivdnt.galahad.taggers.Taggers +import org.ivdnt.galahad.taggers.Tagger import org.ivdnt.galahad.tagset.Tagset import org.ivdnt.galahad.tagset.TagsetStore import org.springframework.web.bind.annotation.* @@ -61,8 +61,8 @@ class InternalJobController ( val (corpusID, jobName, documentName) = dataForProcessingID( fileId ) ?: throw Exception("Processing ID not found, was this file uploaded by me?") val original: Document = corpora.getUncheckedCorpusAccess( corpusID ).documents.readOrThrow( documentName ) val job: Job = corpora.getUncheckedCorpusAccess( corpusID ).jobs.readOrThrow( jobName ) - val taggerSummary: Taggers.Summary? = job.taggers.getSummaryOrNull(job.name, null ).expensiveGet() - val tagset: Tagset? = tagsets.getOrNull(taggerSummary?.tagset) + val taggerTagger: Tagger? = job.taggerStore.getSummaryOrNull(job.name, null ).expensiveGet() + val tagset: Tagset? = tagsets.getOrNull(taggerTagger?.tagset) when (val uploadedFile = InternalFile.from(tempFile, DocumentFormat.Tsv).expensiveGet()) { // Treat TSVFiles separately form SourceLayerableFiles, because calling sourceLayer() on a TSV diff --git a/server/src/main/kotlin/org/ivdnt/galahad/jobs/Job.kt b/server/src/main/kotlin/org/ivdnt/galahad/jobs/Job.kt index 3637f45..4c644c1 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/jobs/Job.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/jobs/Job.kt @@ -1,308 +1,309 @@ -package org.ivdnt.galahad.jobs - -import com.beust.klaxon.JsonObject -import com.beust.klaxon.Parser -import kotlinx.coroutines.launch -import kotlinx.coroutines.runBlocking -import org.apache.logging.log4j.kotlin.Logging -import org.ivdnt.galahad.BaseFileSystemStore -import org.ivdnt.galahad.FileBackedCache -import org.ivdnt.galahad.FileBackedValue -import org.ivdnt.galahad.data.corpus.Corpus -import org.ivdnt.galahad.data.document.SOURCE_LAYER_NAME -import org.ivdnt.galahad.data.layer.LayerPreview -import org.ivdnt.galahad.data.layer.LayerSummary -import org.ivdnt.galahad.data.layer.plus -import org.ivdnt.galahad.evaluation.metrics.* -import org.ivdnt.galahad.jobs.DocumentJob.DocumentProcessingStatus -import org.ivdnt.galahad.taggers.Taggers -import org.springframework.core.io.FileSystemResource -import org.springframework.http.* -import org.springframework.util.LinkedMultiValueMap -import org.springframework.web.client.RestTemplate -import org.springframework.web.util.UriComponentsBuilder -import java.io.File -import java.net.URL -import java.util.* - -/** - * A job is saved to disk as a folder under jobs/ (managed by [Jobs]), with the following files: - * - * - documents/: a folder containing all documents in the job. A single document is represented by [DocumentJob]. These can be retrieved with [document]. - * - _isActive: a file that stores whether the job is currently being processed by the tagger. - * - assay.cache: a cache file storing the global [Metrics] of the job. - * - state.cache: a cache file storing the [State] of the job. - */ -class Job( - workDirectory: File, // the name of this directory is the name of the job/tagger - private val corpus: Corpus, -) : BaseFileSystemStore(workDirectory), Logging { - - val taggers = Taggers() - val name: String = workDirectory.name - - private val documentsWorkDirectory = workDirectory.resolve("documents") - private val documentNames - get() = documentsWorkDirectory.list()?.toSet() ?: throw Exception("Error accessing job documents") - - /** - * Note: this init block has to be above [documents]. - * Because this.documents requires the documents dir to exist. - */ - init { - // TODO cleaner solution - if (workDirectory.name == "null" || workDirectory.name == "undefined") { - workDirectory.deleteRecursively() - throw Exception("Job name not allowed") - } - documentsWorkDirectory.mkdirs() - if (!taggers.ids.contains(name) && name != SOURCE_LAYER_NAME) { - // A job without a tagger is probably invalid, but we want to be careful, - // so we only delete it if the job is empty - // Otherwise it deserves at least manual inspection - if (documentNames.isEmpty()) workDirectory.deleteRecursively() - throw Exception("Tagger $name unknown.") - } - } - - private val documents: List = documentNames.map { document(it) } - - /** Number of documents at the tagger per job */ - val DOC_PARALLELIZATION_SIZE = 3 - - /** - * Whether the job is currently being processed (i.e. has sent files to the tagger to become tagged at some point). - */ - var _isActive: FileBackedValue = FileBackedValue(workDirectory.resolve("_isActive"), false) - - var isActive: Boolean - get() = _isActive.read() - set(value) { - _isActive.modify { value }; corpus.invalidateCache() - } - - /** - * The sum of the global [Metrics] score of all the documents of the job (as opposed to per PoS). - * Cached in a file, as it is expensive. - */ - val assay = object : FileBackedCache>( - file = workDirectory.resolve("assay.cache"), initValue = mapOf() - ) { - override fun isValid(lastModified: Long): Boolean { - return lastModified >= this@Job.lastModified - } - - override fun set(): Map { - return CorpusMetrics( - corpus = corpus, - settings = METRIC_TYPES, - hypothesis = name, - reference = SOURCE_LAYER_NAME - ).metricTypes.mapValues { it.value.toFlat() } - } - } - - /** - * Progress of the job based on the status of the [DocumentJob]s of this job. - */ - val progress: Progress - get() { - val statuses = corpus.documents.allNames.map { document(it).status } - val errors = - documentNames.mapNotNull { name -> document(name).getError?.let { error -> name to error } }.toMap() - return Progress( - pending = statuses.count { it == DocumentProcessingStatus.PENDING }, - processing = statuses.count { it == DocumentProcessingStatus.PROCESSING }, - failed = statuses.count { it == DocumentProcessingStatus.ERROR }, - finished = statuses.count { it == DocumentProcessingStatus.FINISHED }, - errors = errors - ) - } - - private fun deleteInactiveProcesses() { - documents.filter { it.isProcessing }.forEach { documentJob -> - // For each document that claims to be processing, verify if its pid is present at the tagger - // If not, delete pid. - try { - val jsonStr: String? = - taggerRequest(this, "status/${documentJob.getProcessingID}", HttpMethod.GET, String::class.java) - val parser: Parser = Parser.default() - val json: JsonObject = parser.parse(StringBuilder(jsonStr!!)) as JsonObject - if (json.boolean("busy") == false && json.boolean("pending") == false) { - // The doc is either finished, has an error, or does not exist. - documentJob.cancel() - stateFile.delete() - } - } catch (e: Exception) { - // The tagger can't be reached, so no way to tell if the document is still processing. - // If the tagger restarts, it does reprocess documents. Maybe including this one, so we keep it. - } - } - if (documents.count { it.isProcessing } == 0 && isActive) { - // Writing invalidates cache, so only write if isActive would change. - isActive = false - } - } - - /** - * Preview of the resulting terms of this job. - * Show the first preview of the first document that isn't LayerPreview.EMPTY. - */ - val preview: LayerPreview - get() = documents.map { it.result.preview }.firstNotNullOfOrNull { it: LayerPreview -> - if (it == LayerPreview.EMPTY) null else it - } ?: LayerPreview.EMPTY - val stateFile: File = workDirectory.resolve("state.cache") - - /** - * The state of the job, which is cached in a file. - * This is a very expensive operation, so we want to cache it. - */ - private val stateCache = object : FileBackedCache( - file = stateFile, initValue = State() - ) { - override fun isValid(lastModified: Long): Boolean { - return lastModified >= this@Job.lastModified - } - - override fun set(): State { - // sum up the number of tokens/lemmas/etc of all documents - // This is very expensive - val resultSummary: LayerSummary = - documents.map { it.result.summary }.reduceOrNull { a, b -> a + b } ?: LayerSummary() - return State( - taggers.getSummaryOrNull(name, corpus.sourceTagger).expensiveGet() ?: Taggers.Summary(), - progress, - preview, - resultSummary, - lastModified = this@Job.lastModified - ) - } - } - - val state: State - get() { - deleteInactiveProcesses() - return stateCache.get() - } - - fun document(name: String): DocumentJob { - return DocumentJob(documentsWorkDirectory.resolve(name)) - } - - fun documentNameForProcessingIDOrNull(id: UUID): String? { - return documents.filter { it.getProcessingID == id }.map { it.name }.firstOrNull() - } - - fun start() { - isActive = true - next() - } - - fun next() { - if (name == SOURCE_LAYER_NAME) return // Nothing to process - if (!isActive) return - // Launch a coroutine so we can quickly return - runBlocking { - launch { - uploadDocs() - } - } - } - - /** - * Upload documents to the tagger where they will be automatically processed. - * Only ever upload as many files such that there are [DOC_PARALLELIZATION_SIZE] number of documents at the tagger. - * Upon upload, a processingID is returned by the tagger, which we store in the respective [DocumentJob]. - */ - private fun uploadDocs() { - // Quickly count the documents currently being processed - val numCurrentlyBeingProcessed = documents.count { it.status == DocumentProcessingStatus.PROCESSING } - - // Upload the first documents to the tagger - // Because the tag function might be activated multiple times, - // We correct the number to remain with the defined parallelization - val numberToUpload = 0.coerceAtLeast(DOC_PARALLELIZATION_SIZE - numCurrentlyBeingProcessed) - - // Upload the documents to the tagger - corpus.documents.readAll().filter { - val metadata = it.metadata.expensiveGet() - metadata.valid && document(metadata.name).status == DocumentProcessingStatus.PENDING || document( - metadata.name - ).status == DocumentProcessingStatus.ERROR - }.take(numberToUpload).forEach { - val processingID = postInputToTagger(it.plainTextFile) - // Store the processingID, so we can match it with the incoming file later - document(it.metadata.expensiveGet().name).setProcessingID(processingID) - } - } - - /** Cancel the job by deleting all the currently processing input files at the tagger. */ - fun cancel() { - isActive = false - documents.forEach { documentJob -> - try { - if (documentJob.isProcessing) { - deleteInputAtTagger(documentJob.getProcessingID!!) - } - } catch (e: Exception) { - // Ignore, so we cancel other documents even if one fails. - } finally { - documentJob.cancel() - } - } - } - - fun delete() { - cancel() - workDirectory.deleteRecursively() - } - - /** Upload a single file to the tagger */ - private fun postInputToTagger(file: File): UUID { - // Custom request entity due to file. - val headers = HttpHeaders() - headers.contentType = MediaType.MULTIPART_FORM_DATA - val params = LinkedMultiValueMap() - params.add("file", FileSystemResource(file)) - val requestEntity: HttpEntity> = HttpEntity(params, headers) - - val route = "input" - val result: String? = taggerRequest(this, route, HttpMethod.POST, String::class.java, requestEntity) - return UUID.fromString(result) ?: throw Exception("No result received when uploading file") - } - - // Delete input files so that they won't be processed anymore. - // For example because the user cancelled the job. - private fun deleteInputAtTagger(pid: UUID) { - val route = "input/$pid" - taggerRequest(this, route, HttpMethod.DELETE, Void::class.java) - } - - companion object { - private fun taggerRequest( - job: Job, route: String, method: HttpMethod, type: Class, - requestEntity: HttpEntity>? = null, - ): T? { - // Setup request. - val restTemplate = RestTemplate() - val endpoint = URL("${job.taggers.getURL(job.name)}/$route") - val builder = UriComponentsBuilder.fromUri(endpoint.toURI()) - // Send request. - val responseEntity = try { - restTemplate.exchange( - builder.build().encode().toUri(), method, requestEntity, // Allowed to be null - type - ) - } catch (e: Exception) { - throw Exception("Failed to connect to tagger ${job.name} with exception ${e}.") - } - // Handle result. - if (responseEntity.statusCode != HttpStatus.OK) { - throw Exception("$method file returned ${responseEntity.statusCode} with response ${responseEntity.body}") - } else { - return responseEntity.body - } - } - } +package org.ivdnt.galahad.jobs + +import com.beust.klaxon.JsonObject +import com.beust.klaxon.Parser +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import org.apache.logging.log4j.kotlin.Logging +import org.ivdnt.galahad.BaseFileSystemStore +import org.ivdnt.galahad.FileBackedCache +import org.ivdnt.galahad.FileBackedValue +import org.ivdnt.galahad.data.corpus.Corpus +import org.ivdnt.galahad.data.document.SOURCE_LAYER_NAME +import org.ivdnt.galahad.data.layer.LayerPreview +import org.ivdnt.galahad.data.layer.LayerSummary +import org.ivdnt.galahad.data.layer.plus +import org.ivdnt.galahad.evaluation.metrics.* +import org.ivdnt.galahad.jobs.DocumentJob.DocumentProcessingStatus +import org.ivdnt.galahad.taggers.Tagger +import org.ivdnt.galahad.taggers.TaggerStore +import org.springframework.core.io.FileSystemResource +import org.springframework.http.* +import org.springframework.util.LinkedMultiValueMap +import org.springframework.web.client.RestTemplate +import org.springframework.web.util.UriComponentsBuilder +import java.io.File +import java.net.URL +import java.util.* + +/** + * A job is saved to disk as a folder under jobs/ (managed by [Jobs]), with the following files: + * + * - documents/: a folder containing all documents in the job. A single document is represented by [DocumentJob]. These can be retrieved with [document]. + * - _isActive: a file that stores whether the job is currently being processed by the tagger. + * - assay.cache: a cache file storing the global [Metrics] of the job. + * - state.cache: a cache file storing the [State] of the job. + */ +class Job( + workDirectory: File, // the name of this directory is the name of the job/tagger + private val corpus: Corpus, +) : BaseFileSystemStore(workDirectory), Logging { + + val taggerStore = TaggerStore() + val name: String = workDirectory.name + + private val documentsWorkDirectory = workDirectory.resolve("documents") + private val documentNames + get() = documentsWorkDirectory.list()?.toSet() ?: throw Exception("Error accessing job documents") + + /** + * Note: this init block has to be above [documents]. + * Because this.documents requires the documents dir to exist. + */ + init { + // TODO cleaner solution + if (workDirectory.name == "null" || workDirectory.name == "undefined") { + workDirectory.deleteRecursively() + throw Exception("Job name not allowed") + } + documentsWorkDirectory.mkdirs() + if (!taggerStore.ids.contains(name) && name != SOURCE_LAYER_NAME) { + // A job without a tagger is probably invalid, but we want to be careful, + // so we only delete it if the job is empty + // Otherwise it deserves at least manual inspection + if (documentNames.isEmpty()) workDirectory.deleteRecursively() + throw Exception("Tagger $name unknown.") + } + } + + private val documents: List = documentNames.map { document(it) } + + /** Number of documents at the tagger per job */ + val DOC_PARALLELIZATION_SIZE = 3 + + /** + * Whether the job is currently being processed (i.e. has sent files to the tagger to become tagged at some point). + */ + var _isActive: FileBackedValue = FileBackedValue(workDirectory.resolve("_isActive"), false) + + var isActive: Boolean + get() = _isActive.read() + set(value) { + _isActive.modify { value }; corpus.invalidateCache() + } + + /** + * The sum of the global [Metrics] score of all the documents of the job (as opposed to per PoS). + * Cached in a file, as it is expensive. + */ + val assay = object : FileBackedCache>( + file = workDirectory.resolve("assay.cache"), initValue = mapOf() + ) { + override fun isValid(lastModified: Long): Boolean { + return lastModified >= this@Job.lastModified + } + + override fun set(): Map { + return CorpusMetrics( + corpus = corpus, + settings = METRIC_TYPES, + hypothesis = name, + reference = SOURCE_LAYER_NAME + ).metricTypes.mapValues { it.value.toFlat() } + } + } + + /** + * Progress of the job based on the status of the [DocumentJob]s of this job. + */ + val progress: Progress + get() { + val statuses = corpus.documents.allNames.map { document(it).status } + val errors = + documentNames.mapNotNull { name -> document(name).getError?.let { error -> name to error } }.toMap() + return Progress( + pending = statuses.count { it == DocumentProcessingStatus.PENDING }, + processing = statuses.count { it == DocumentProcessingStatus.PROCESSING }, + failed = statuses.count { it == DocumentProcessingStatus.ERROR }, + finished = statuses.count { it == DocumentProcessingStatus.FINISHED }, + errors = errors + ) + } + + private fun deleteInactiveProcesses() { + documents.filter { it.isProcessing }.forEach { documentJob -> + // For each document that claims to be processing, verify if its pid is present at the tagger + // If not, delete pid. + try { + val jsonStr: String? = + taggerRequest(this, "status/${documentJob.getProcessingID}", HttpMethod.GET, String::class.java) + val parser: Parser = Parser.default() + val json: JsonObject = parser.parse(StringBuilder(jsonStr!!)) as JsonObject + if (json.boolean("busy") == false && json.boolean("pending") == false) { + // The doc is either finished, has an error, or does not exist. + documentJob.cancel() + stateFile.delete() + } + } catch (e: Exception) { + // The tagger can't be reached, so no way to tell if the document is still processing. + // If the tagger restarts, it does reprocess documents. Maybe including this one, so we keep it. + } + } + if (documents.count { it.isProcessing } == 0 && isActive) { + // Writing invalidates cache, so only write if isActive would change. + isActive = false + } + } + + /** + * Preview of the resulting terms of this job. + * Show the first preview of the first document that isn't LayerPreview.EMPTY. + */ + val preview: LayerPreview + get() = documents.map { it.result.preview }.firstNotNullOfOrNull { it: LayerPreview -> + if (it == LayerPreview.EMPTY) null else it + } ?: LayerPreview.EMPTY + val stateFile: File = workDirectory.resolve("state.cache") + + /** + * The state of the job, which is cached in a file. + * This is a very expensive operation, so we want to cache it. + */ + private val stateCache = object : FileBackedCache( + file = stateFile, initValue = State() + ) { + override fun isValid(lastModified: Long): Boolean { + return lastModified >= this@Job.lastModified + } + + override fun set(): State { + // sum up the number of tokens/lemmas/etc of all documents + // This is very expensive + val resultSummary: LayerSummary = + documents.map { it.result.summary }.reduceOrNull { a, b -> a + b } ?: LayerSummary() + return State( + taggerStore.getSummaryOrNull(name, corpus.sourceTagger).expensiveGet() ?: Tagger(), + progress, + preview, + resultSummary, + lastModified = this@Job.lastModified + ) + } + } + + val state: State + get() { + deleteInactiveProcesses() + return stateCache.get() + } + + fun document(name: String): DocumentJob { + return DocumentJob(documentsWorkDirectory.resolve(name)) + } + + fun documentNameForProcessingIDOrNull(id: UUID): String? { + return documents.filter { it.getProcessingID == id }.map { it.name }.firstOrNull() + } + + fun start() { + isActive = true + next() + } + + fun next() { + if (name == SOURCE_LAYER_NAME) return // Nothing to process + if (!isActive) return + // Launch a coroutine so we can quickly return + runBlocking { + launch { + uploadDocs() + } + } + } + + /** + * Upload documents to the tagger where they will be automatically processed. + * Only ever upload as many files such that there are [DOC_PARALLELIZATION_SIZE] number of documents at the tagger. + * Upon upload, a processingID is returned by the tagger, which we store in the respective [DocumentJob]. + */ + private fun uploadDocs() { + // Quickly count the documents currently being processed + val numCurrentlyBeingProcessed = documents.count { it.status == DocumentProcessingStatus.PROCESSING } + + // Upload the first documents to the tagger + // Because the tag function might be activated multiple times, + // We correct the number to remain with the defined parallelization + val numberToUpload = 0.coerceAtLeast(DOC_PARALLELIZATION_SIZE - numCurrentlyBeingProcessed) + + // Upload the documents to the tagger + corpus.documents.readAll().filter { + val metadata = it.metadata.expensiveGet() + metadata.valid && document(metadata.name).status == DocumentProcessingStatus.PENDING || document( + metadata.name + ).status == DocumentProcessingStatus.ERROR + }.take(numberToUpload).forEach { + val processingID = postInputToTagger(it.plainTextFile) + // Store the processingID, so we can match it with the incoming file later + document(it.metadata.expensiveGet().name).setProcessingID(processingID) + } + } + + /** Cancel the job by deleting all the currently processing input files at the tagger. */ + fun cancel() { + isActive = false + documents.forEach { documentJob -> + try { + if (documentJob.isProcessing) { + deleteInputAtTagger(documentJob.getProcessingID!!) + } + } catch (e: Exception) { + // Ignore, so we cancel other documents even if one fails. + } finally { + documentJob.cancel() + } + } + } + + fun delete() { + cancel() + workDirectory.deleteRecursively() + } + + /** Upload a single file to the tagger */ + private fun postInputToTagger(file: File): UUID { + // Custom request entity due to file. + val headers = HttpHeaders() + headers.contentType = MediaType.MULTIPART_FORM_DATA + val params = LinkedMultiValueMap() + params.add("file", FileSystemResource(file)) + val requestEntity: HttpEntity> = HttpEntity(params, headers) + + val route = "input" + val result: String? = taggerRequest(this, route, HttpMethod.POST, String::class.java, requestEntity) + return UUID.fromString(result) ?: throw Exception("No result received when uploading file") + } + + // Delete input files so that they won't be processed anymore. + // For example because the user cancelled the job. + private fun deleteInputAtTagger(pid: UUID) { + val route = "input/$pid" + taggerRequest(this, route, HttpMethod.DELETE, Void::class.java) + } + + companion object { + private fun taggerRequest( + job: Job, route: String, method: HttpMethod, type: Class, + requestEntity: HttpEntity>? = null, + ): T? { + // Setup request. + val restTemplate = RestTemplate() + val endpoint = URL("${job.taggerStore.getURL(job.name)}/$route") + val builder = UriComponentsBuilder.fromUri(endpoint.toURI()) + // Send request. + val responseEntity = try { + restTemplate.exchange( + builder.build().encode().toUri(), method, requestEntity, // Allowed to be null + type + ) + } catch (e: Exception) { + throw Exception("Failed to connect to tagger ${job.name} with exception ${e}.") + } + // Handle result. + if (responseEntity.statusCode != HttpStatus.OK) { + throw Exception("$method file returned ${responseEntity.statusCode} with response ${responseEntity.body}") + } else { + return responseEntity.body + } + } + } } \ No newline at end of file diff --git a/server/src/main/kotlin/org/ivdnt/galahad/jobs/Jobs.kt b/server/src/main/kotlin/org/ivdnt/galahad/jobs/Jobs.kt index d03e78e..7ee114b 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/jobs/Jobs.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/jobs/Jobs.kt @@ -5,7 +5,7 @@ import org.ivdnt.galahad.app.NamedCRUDSet import org.ivdnt.galahad.data.corpus.Corpus import org.ivdnt.galahad.data.layer.LayerPreview import org.ivdnt.galahad.data.layer.LayerSummary -import org.ivdnt.galahad.taggers.Taggers +import org.ivdnt.galahad.taggers.TaggerStore import java.io.File class Jobs( @@ -13,12 +13,12 @@ class Jobs( private val corpus: Corpus, ) : BaseFileSystemStore(workDirectory), NamedCRUDSet { - private val taggers = Taggers() + private val taggerStore = TaggerStore() // better be verbose than sorry fun readAllJobStatesIncludingPotentialJobs(): Set { val existingJobs = readAll().map { it.state } - val potentialJobs = taggers.summaries.map { it.expensiveGet() }.map { + val potentialJobs = taggerStore.taggers.map { it.expensiveGet() }.map { State( it, Progress(pending = corpus.documents.readAll().size), LayerPreview.EMPTY, LayerSummary(), 0 ) diff --git a/server/src/main/kotlin/org/ivdnt/galahad/jobs/State.kt b/server/src/main/kotlin/org/ivdnt/galahad/jobs/State.kt index e292ba8..e1ed66b 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/jobs/State.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/jobs/State.kt @@ -3,13 +3,13 @@ package org.ivdnt.galahad.jobs import com.fasterxml.jackson.annotation.JsonProperty import org.ivdnt.galahad.data.layer.LayerPreview import org.ivdnt.galahad.data.layer.LayerSummary -import org.ivdnt.galahad.taggers.Taggers +import org.ivdnt.galahad.taggers.Tagger /** * Cache-able job metadata. */ class State( - @JsonProperty("tagger") val tagger: Taggers.Summary = Taggers.Summary(), + @JsonProperty("tagger") val tagger: Tagger = Tagger(), @JsonProperty("progress") val progress: Progress = Progress(), @JsonProperty("preview") val preview: LayerPreview = LayerPreview(), @JsonProperty("resultSummary") val resultSummary: LayerSummary = LayerSummary(), diff --git a/server/src/main/kotlin/org/ivdnt/galahad/port/LayerTransformer.kt b/server/src/main/kotlin/org/ivdnt/galahad/port/LayerTransformer.kt index 577ba51..5198007 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/port/LayerTransformer.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/port/LayerTransformer.kt @@ -1,7 +1,7 @@ package org.ivdnt.galahad.port import org.ivdnt.galahad.data.document.DocumentFormat -import org.ivdnt.galahad.taggers.Taggers +import org.ivdnt.galahad.taggers.TaggerStore import org.ivdnt.galahad.tagset.TagsetStore import java.io.File import java.io.OutputStream @@ -14,8 +14,8 @@ open class LayerTransformer ( ) { private val tagsets = TagsetStore() - private val taggers = Taggers() - val tagger = taggers.getSummaryOrThrow(transformMetadata.job.name, transformMetadata.corpus.sourceTagger ).expensiveGet() + private val taggerStore = TaggerStore() + val tagger = taggerStore.getSummaryOrThrow(transformMetadata.job.name, transformMetadata.corpus.sourceTagger ).expensiveGet() protected val result = transformMetadata.layer protected val document = transformMetadata.document diff --git a/server/src/main/kotlin/org/ivdnt/galahad/taggers/Tagger.kt b/server/src/main/kotlin/org/ivdnt/galahad/taggers/Tagger.kt new file mode 100644 index 0000000..98f0150 --- /dev/null +++ b/server/src/main/kotlin/org/ivdnt/galahad/taggers/Tagger.kt @@ -0,0 +1,34 @@ +package org.ivdnt.galahad.taggers + +import com.fasterxml.jackson.annotation.JsonIgnore +import com.fasterxml.jackson.annotation.JsonProperty +import org.ivdnt.galahad.app.JSONable + +class Tagger ( + // The id should be equal to the filename + // i.e. mytagger.yaml should have id 'mytagger' + // This ought te be set when loading from file + // This name will be used as hostname + // So can only contain certain characters + @JsonProperty("id") var id: String = "", + @JsonProperty("description") var description: String = "", + @JsonProperty("tagset") var tagset: String? = null, + @JsonProperty("eraFrom") var eraFrom: Int = 0, + @JsonProperty("eraTo") var eraTo: Int = 0, + @JsonProperty("produces") var produces: Set = setOf(), + @JsonProperty("model") var model: LinkItem = LinkItem(), + @JsonProperty("software") var software: LinkItem = LinkItem(), + @JsonProperty("dataset") var dataset: LinkItem = LinkItem(), + @JsonProperty("trainedBy") var trainedBy: String = "", + @JsonProperty("date") var date: String = "", +) : JSONable { + @JsonIgnore + var version: String = "" + @JsonIgnore + var devport: Int? = 0 + + class LinkItem ( + @JsonProperty("name") var name: String = "", + @JsonProperty("href") var href: String = "" + ) +} \ No newline at end of file diff --git a/server/src/main/kotlin/org/ivdnt/galahad/taggers/TaggerStore.kt b/server/src/main/kotlin/org/ivdnt/galahad/taggers/TaggerStore.kt new file mode 100644 index 0000000..6c0772a --- /dev/null +++ b/server/src/main/kotlin/org/ivdnt/galahad/taggers/TaggerStore.kt @@ -0,0 +1,59 @@ +package org.ivdnt.galahad.taggers + +import org.apache.logging.log4j.kotlin.Logging +import org.ivdnt.galahad.BaseFileSystemStore +import org.ivdnt.galahad.app.ExpensiveGettable +import org.ivdnt.galahad.app.application_profile +import org.ivdnt.galahad.data.document.SOURCE_LAYER_NAME +import org.yaml.snakeyaml.LoaderOptions +import org.yaml.snakeyaml.Yaml +import org.yaml.snakeyaml.constructor.Constructor +import java.io.File +import java.net.URL + +const val TAGGERS_DIR = "data/taggers" +class TaggerStore : BaseFileSystemStore ( + File( TAGGERS_DIR ) +), Logging { + + val ids: List + get() = workDirectory.listFiles() + ?.map { it.nameWithoutExtension } + ?: throw Exception("Failed to get tagger ids") + + val taggers: List> + get() = ids.map { getSummaryOrThrow(it, null) } // We can provide null since there is nothing like SOURCE_LAYER.yaml in ids + + fun getSummaryOrThrow(tagger: String, sourceLayerTagger: ExpensiveGettable? ) = object : ExpensiveGettable { + override fun expensiveGet(): Tagger { + return getSummaryOrNull( tagger, sourceLayerTagger ).expensiveGet() ?: throw Exception("Failed to read tagger $tagger") + } + } + + /** + * @param sourceLayer since SOURCE_LAYER_NAME never corresponds to a valid .yaml file, but it may be considered a 'tagger' in the sense that there + * exists a resulting job, the cleanest solution is to require explicit definition of the desired return value in case of SOURCE_LAYER_NAME + */ + fun getSummaryOrNull(tagger: String, sourceLayerTagger: ExpensiveGettable? ) = object : ExpensiveGettable { + override fun expensiveGet(): Tagger? { + if( tagger == SOURCE_LAYER_NAME ) return sourceLayerTagger?.expensiveGet() // throw Exception("Don't use this for sourceLayer") + val file = workDirectory.resolve( "$tagger.yaml" ) + return try { + Yaml(Constructor(Tagger::class.java, LoaderOptions())).load( file.inputStream() ) + } catch ( e:Exception ) { + logger.error("Failed to read tagger ${file.name} from file. Exception $e") + return null + } + } + } + + fun getURL( tagger: String ): URL { + return if(application_profile.contains("dev") ) { + val summary = getSummaryOrThrow(tagger, null).expensiveGet() + URL("http://localhost:${summary.devport}") + } else { + URL("http://$tagger:8080") + } + } + +} \ No newline at end of file diff --git a/server/src/main/kotlin/org/ivdnt/galahad/taggers/Taggers.kt b/server/src/main/kotlin/org/ivdnt/galahad/taggers/Taggers.kt deleted file mode 100644 index b8f6f2d..0000000 --- a/server/src/main/kotlin/org/ivdnt/galahad/taggers/Taggers.kt +++ /dev/null @@ -1,89 +0,0 @@ -package org.ivdnt.galahad.taggers - -import com.fasterxml.jackson.annotation.JsonIgnore -import com.fasterxml.jackson.annotation.JsonProperty -import org.apache.logging.log4j.kotlin.Logging -import org.ivdnt.galahad.BaseFileSystemStore -import org.ivdnt.galahad.app.ExpensiveGettable -import org.ivdnt.galahad.app.JSONable -import org.ivdnt.galahad.app.application_profile -import org.ivdnt.galahad.data.document.SOURCE_LAYER_NAME -import org.yaml.snakeyaml.LoaderOptions -import org.yaml.snakeyaml.Yaml -import org.yaml.snakeyaml.constructor.Constructor -import java.io.File -import java.net.URL - -const val TAGGERS_DIR = "data/taggers" -class Taggers : BaseFileSystemStore ( - File( TAGGERS_DIR ) -), Logging { - - val ids: List - get() = workDirectory.listFiles() - ?.map { it.nameWithoutExtension } - ?: throw Exception("Failed to get tagger ids") - - val summaries: List> - get() = ids.map { getSummaryOrThrow(it, null) } // We can provide null since there is nothing like SOURCE_LAYER.yaml in ids - - fun getSummaryOrThrow( tagger: String, sourceLayerSummary: ExpensiveGettable? ) = object : ExpensiveGettable { - override fun expensiveGet(): Summary { - return getSummaryOrNull( tagger, sourceLayerSummary ).expensiveGet() ?: throw Exception("Failed to read tagger $tagger") - } - } - - /** - * @param sourceLayer since SOURCE_LAYER_NAME never corresponds to a valid .yaml file, but it may be considered a 'tagger' in the sense that there - * exists a resulting job, the cleanest solution is to require explicit definition of the desired return value in case of SOURCE_LAYER_NAME - */ - fun getSummaryOrNull( tagger: String, sourceLayerSummary: ExpensiveGettable? ) = object : ExpensiveGettable { - override fun expensiveGet(): Summary? { - if( tagger == SOURCE_LAYER_NAME ) return sourceLayerSummary?.expensiveGet() // throw Exception("Don't use this for sourceLayer") - val file = workDirectory.resolve( "$tagger.yaml" ) - return try { - Yaml(Constructor(Summary::class.java, LoaderOptions())).load( file.inputStream() ) - } catch ( e:Exception ) { - logger.error("Failed to read tagger ${file.name} from file. Exception $e") - return null - } - } - } - - fun getURL( tagger: String ): URL { - return if(application_profile.contains("dev") ) { - val summary = getSummaryOrThrow(tagger, null).expensiveGet() - URL("http://localhost:${summary.devport}") - } else { - URL("http://$tagger:8080") - } - } - - class Summary ( - // The id should be equal to the filename - // i.e. mytagger.yaml should have id 'mytagger' - // This ought te be set when loading from file - // This name will be used as hostname - // So can only contain certain characters - @JsonProperty("id") var id: String = "", - @JsonProperty("description") var description: String = "", - @JsonProperty("tagset") var tagset: String? = null, - @JsonProperty("eraFrom") var eraFrom: Int = 0, - @JsonProperty("eraTo") var eraTo: Int = 0, - @JsonProperty("produces") var produces: Set = setOf(), - @JsonProperty("model") var model: LinkItem = LinkItem(), - @JsonProperty("software") var software: LinkItem = LinkItem(), - @JsonProperty("dataset") var dataset: LinkItem = LinkItem(), - @JsonProperty("trainedBy") var trainedBy: String = "", - @JsonProperty("date") var date: String = "", - ) : JSONable { - @JsonIgnore var version: String = "" - @JsonIgnore var devport: Int? = 0 - - class LinkItem ( - @JsonProperty("name") var name: String = "", - @JsonProperty("href") var href: String = "" - ) - } - -} \ No newline at end of file diff --git a/server/src/main/kotlin/org/ivdnt/galahad/taggers/TaggersController.kt b/server/src/main/kotlin/org/ivdnt/galahad/taggers/TaggersController.kt index 28dbf6a..b97d658 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/taggers/TaggersController.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/taggers/TaggersController.kt @@ -4,13 +4,10 @@ import com.beust.klaxon.JsonObject import com.beust.klaxon.Parser import com.beust.klaxon.Parser.Companion.default import com.fasterxml.jackson.annotation.JsonProperty -import jakarta.servlet.http.HttpServletRequest -import jakarta.servlet.http.HttpServletResponse import org.apache.logging.log4j.kotlin.Logging import org.ivdnt.galahad.app.TAGGERS_URL import org.ivdnt.galahad.app.TAGGER_HEALTH_URL import org.ivdnt.galahad.app.TAGGER_URL -import org.springframework.beans.factory.annotation.Autowired import org.springframework.http.HttpMethod import org.springframework.web.bind.annotation.CrossOrigin import org.springframework.web.bind.annotation.GetMapping @@ -27,16 +24,13 @@ import java.net.http.HttpResponse @RestController class TaggersController : Logging { - @Autowired private val request: HttpServletRequest? = null - @Autowired private val response: HttpServletResponse? = null + private val taggerStore = TaggerStore() - private val taggers = Taggers() + @GetMapping( TAGGERS_URL ) @CrossOrigin fun getTaggers(): Set = + taggerStore.taggers.map { it.expensiveGet() }.toSet() - @GetMapping( TAGGERS_URL ) @CrossOrigin fun getTaggers(): Set = - taggers.summaries.map { it.expensiveGet() }.toSet() - - @GetMapping( TAGGER_URL ) @CrossOrigin fun getTagger( @PathVariable tagger: String ): Taggers.Summary? = - taggers.getSummaryOrNull( tagger, null ).expensiveGet() // Note: sourceLayer is not a tagger here + @GetMapping( TAGGER_URL ) @CrossOrigin fun getTagger( @PathVariable tagger: String ): Tagger? = + taggerStore.getSummaryOrNull( tagger, null ).expensiveGet() // Note: sourceLayer is not a tagger here @GetMapping( TAGGER_HEALTH_URL ) @CrossOrigin fun getTaggerHealth( @PathVariable tagger: String ): TaggerHealth = expensiveGetHealthFor( tagger ) @@ -46,7 +40,7 @@ class TaggersController : Logging { // However, we still think it is representative/informative val client = HttpClient.newBuilder().build() val request = HttpRequest.newBuilder() - .uri(URI.create("${taggers.getURL(tagger)}/health")) + .uri(URI.create("${taggerStore.getURL(tagger)}/health")) .build() return try { @@ -67,7 +61,7 @@ class TaggersController : Logging { message = "Can connect to tagger. Taggers health response: ${response.body()}" ) } catch ( e: Exception ) { - logger.error("Failed to connect to tagger $tagger on url ${taggers.getURL(tagger)}. Error: $e") + logger.error("Failed to connect to tagger $tagger on url ${taggerStore.getURL(tagger)}. Error: $e") // If we cannot connect, there is no use in tagging, so just return return TaggerHealth( status = TaggerHealthStatus.ERROR, message = "Cannot connect to tagger" ) } @@ -81,11 +75,11 @@ class TaggersController : Logging { @CrossOrigin fun getActiveDocsAtTaggers(): Int { var count = 0 - for (tagger in taggers.summaries) { + for (tagger in taggerStore.taggers) { val name = tagger.expensiveGet().id val restTemplate = RestTemplate() - val endpoint = URL("${taggers.getURL(name)}/status") + val endpoint = URL("${taggerStore.getURL(name)}/status") val builder = UriComponentsBuilder.fromUri(endpoint.toURI()) try { val res = restTemplate.exchange( diff --git a/server/src/test/kotlin/org/ivdnt/galahad/jobs/JobsTest.kt b/server/src/test/kotlin/org/ivdnt/galahad/jobs/JobsTest.kt index 10922b9..6ee35d1 100644 --- a/server/src/test/kotlin/org/ivdnt/galahad/jobs/JobsTest.kt +++ b/server/src/test/kotlin/org/ivdnt/galahad/jobs/JobsTest.kt @@ -3,7 +3,7 @@ package org.ivdnt.galahad.jobs import org.ivdnt.galahad.TestConfig import org.ivdnt.galahad.data.corpus.Corpus import org.ivdnt.galahad.port.createCorpus -import org.ivdnt.galahad.taggers.Taggers +import org.ivdnt.galahad.taggers.TaggerStore import org.junit.jupiter.api.Assertions.* import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test @@ -21,7 +21,7 @@ class JobsTest { val name = TestConfig.TAGGER_NAME // Check if empty assertEquals(0, corpus.jobs.readAll().size) - val numTaggers = Taggers().summaries.size + 1 // +1 for source layer + val numTaggers = TaggerStore().taggers.size + 1 // +1 for source layer assertEquals(numTaggers, corpus.jobs.readAllJobStatesIncludingPotentialJobs().size) assertNull(corpus.jobs.readOrNull(name)) assertThrows(Exception::class.java) { corpus.jobs.readOrThrow(name) } diff --git a/server/src/test/kotlin/org/ivdnt/galahad/taggers/TaggersControllerTest.kt b/server/src/test/kotlin/org/ivdnt/galahad/taggers/TaggerStoreControllerTest.kt similarity index 97% rename from server/src/test/kotlin/org/ivdnt/galahad/taggers/TaggersControllerTest.kt rename to server/src/test/kotlin/org/ivdnt/galahad/taggers/TaggerStoreControllerTest.kt index f9ea071..a5e3442 100644 --- a/server/src/test/kotlin/org/ivdnt/galahad/taggers/TaggersControllerTest.kt +++ b/server/src/test/kotlin/org/ivdnt/galahad/taggers/TaggerStoreControllerTest.kt @@ -12,7 +12,7 @@ import org.springframework.test.context.ContextConfiguration @WebMvcTest(properties = ["spring.main.allow-bean-definition-overriding=true"]) @ContextConfiguration(classes = [GalahadApplication::class]) -class TaggersControllerTest( +class TaggerStoreControllerTest( @Autowired val ctrl: TaggersController ) { From 62080c0ed27627d7dd0b92deef1c673ac3be4149 Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Fri, 28 Jun 2024 09:20:31 +0200 Subject: [PATCH 2/3] Fix z-index for vue-slider --- .../src/views/annotate/subviews/JobsView.vue | 3 +- server/build.gradle.kts | 153 +++++++++--------- .../ivdnt/galahad/data/document/Documents.kt | 1 - .../port/folia/export/FoliaTextMerger.kt | 2 - 4 files changed, 78 insertions(+), 81 deletions(-) diff --git a/client/src/views/annotate/subviews/JobsView.vue b/client/src/views/annotate/subviews/JobsView.vue index a93110e..3e2de96 100644 --- a/client/src/views/annotate/subviews/JobsView.vue +++ b/client/src/views/annotate/subviews/JobsView.vue @@ -48,7 +48,7 @@ – {{ d.item.tagger.eraTo }}{{ d.item.tagger.eraTo - }} + }} @@ -296,6 +296,7 @@ table button { :deep(.vue-slider) .vue-slider-dot { width: 25px !important; height: 25px !important; + z-index: 1; } :deep(.vue-slider) .vue-slider-dot-handle { diff --git a/server/build.gradle.kts b/server/build.gradle.kts index 7b63c76..daff5f1 100644 --- a/server/build.gradle.kts +++ b/server/build.gradle.kts @@ -1,78 +1,77 @@ -import org.jetbrains.dokka.DokkaConfiguration.Visibility -import org.jetbrains.dokka.gradle.DokkaTask -import org.jetbrains.kotlin.gradle.tasks.KotlinCompile - -plugins { - id("org.springframework.boot") version "3.2.3" - id("io.spring.dependency-management") version "1.1.4" - id("org.jetbrains.dokka") version "1.9.10" - kotlin("jvm") version "1.9.22" - kotlin("plugin.spring") version "1.9.22" - kotlin("plugin.serialization") version "1.9.22" -} - -group = "org.ivdnt" -version = "0.0.2-ALPHA-SNAPSHOT" -java.sourceCompatibility = JavaVersion.VERSION_17 -java.targetCompatibility = JavaVersion.VERSION_17 - -repositories { - mavenCentral() - gradlePluginPortal() -} - -dependencies { - // Spring - implementation("org.springframework.boot:spring-boot-starter-web:3.2.4") - // https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-devtools - implementation("org.springframework.boot:spring-boot-devtools:3.2.3") - - // kotlin - implementation("org.jetbrains.kotlin:kotlin-reflect:1.9.22") - // https://mvnrepository.com/artifact/org.jetbrains.kotlin/kotlin-stdlib - implementation("org.jetbrains.kotlin:kotlin-stdlib:1.9.22") - implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.8.0") - implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.3") // JVM dependency - - // swagger - implementation("org.springdoc:springdoc-openapi-starter-webmvc-ui:2.5.0") - - - implementation("com.beust:klaxon:5.6") - implementation("org.apache.logging.log4j:log4j-api-kotlin:1.2.0") - - // yaml - // https://mvnrepository.com/artifact/org.yaml/snakeyaml - implementation("org.yaml:snakeyaml:2.2") - - // Tests - testImplementation ("org.springframework.boot:spring-boot-starter-test:3.2.3") -} - -tasks.test { - environment(mapOf("profile" to "dev")) - useJUnitPlatform() -} - -tasks.withType { - useJUnitPlatform() - // https://stackoverflow.com/questions/52733942/increase-heap-memory-for-gradle-test -// minHeapSize = "4096m" -// maxHeapSize = "4096m" -// jvmArgs = listOf("-XX:MaxPermSize=1024m") // fails on some IDEs -} - -tasks.withType { - kotlinOptions { - freeCompilerArgs = listOf("-Xjsr305=strict") - jvmTarget = "17" - } -} - -tasks.withType().configureEach { - dokkaSourceSets { - configureEach { - documentedVisibilities.set(setOf(Visibility.PUBLIC, Visibility.PROTECTED, Visibility.PRIVATE, Visibility.INTERNAL)) - } - } +import org.jetbrains.dokka.DokkaConfiguration.Visibility +import org.jetbrains.dokka.gradle.DokkaTask +import org.jetbrains.kotlin.gradle.tasks.KotlinCompile + +plugins { + id("org.springframework.boot") version "3.2.3" + id("io.spring.dependency-management") version "1.1.4" + id("org.jetbrains.dokka") version "1.9.10" + kotlin("jvm") version "1.9.22" + kotlin("plugin.spring") version "1.9.22" + kotlin("plugin.serialization") version "1.9.22" +} + +group = "org.ivdnt" +java.sourceCompatibility = JavaVersion.VERSION_17 +java.targetCompatibility = JavaVersion.VERSION_17 + +repositories { + mavenCentral() + gradlePluginPortal() +} + +dependencies { + // Spring + implementation("org.springframework.boot:spring-boot-starter-web:3.2.4") + // https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-devtools + implementation("org.springframework.boot:spring-boot-devtools:3.2.3") + + // kotlin + implementation("org.jetbrains.kotlin:kotlin-reflect:1.9.22") + // https://mvnrepository.com/artifact/org.jetbrains.kotlin/kotlin-stdlib + implementation("org.jetbrains.kotlin:kotlin-stdlib:1.9.22") + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.8.0") + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.3") // JVM dependency + + // swagger + implementation("org.springdoc:springdoc-openapi-starter-webmvc-ui:2.5.0") + + + implementation("com.beust:klaxon:5.6") + implementation("org.apache.logging.log4j:log4j-api-kotlin:1.2.0") + + // yaml + // https://mvnrepository.com/artifact/org.yaml/snakeyaml + implementation("org.yaml:snakeyaml:2.2") + + // Tests + testImplementation ("org.springframework.boot:spring-boot-starter-test:3.2.3") +} + +tasks.test { + environment(mapOf("profile" to "dev")) + useJUnitPlatform() +} + +tasks.withType { + useJUnitPlatform() + // https://stackoverflow.com/questions/52733942/increase-heap-memory-for-gradle-test +// minHeapSize = "4096m" +// maxHeapSize = "4096m" +// jvmArgs = listOf("-XX:MaxPermSize=1024m") // fails on some IDEs +} + +tasks.withType { + kotlinOptions { + freeCompilerArgs = listOf("-Xjsr305=strict") + jvmTarget = "17" + } +} + +tasks.withType().configureEach { + dokkaSourceSets { + configureEach { + documentedVisibilities.set(setOf(Visibility.PUBLIC, Visibility.PROTECTED, Visibility.PRIVATE, Visibility.INTERNAL)) + } + } } \ No newline at end of file diff --git a/server/src/main/kotlin/org/ivdnt/galahad/data/document/Documents.kt b/server/src/main/kotlin/org/ivdnt/galahad/data/document/Documents.kt index bccfe58..22f43fb 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/data/document/Documents.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/data/document/Documents.kt @@ -28,7 +28,6 @@ class Documents( override fun delete(key: String): Document? { val fullyDeleted: Boolean = workDirectory.resolve(key).deleteRecursively() if (!fullyDeleted) println("Partial deletion of $key") - // TODO remember we also need to delete in associated jobs return readOrNull(key) } diff --git a/server/src/main/kotlin/org/ivdnt/galahad/port/folia/export/FoliaTextMerger.kt b/server/src/main/kotlin/org/ivdnt/galahad/port/folia/export/FoliaTextMerger.kt index 9125da0..025896e 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/port/folia/export/FoliaTextMerger.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/port/folia/export/FoliaTextMerger.kt @@ -41,8 +41,6 @@ class FoliaTextMerger( if (node.nodeType == Node.TEXT_NODE) { child.textContent = text } - // TODO reparseText overwrites embedded t-styles - // never set the offset of more than one space. if (endsWithSpace && text.startsWith(" ")) { offset -= 1 From 761ce3f2ad5e96820bc11ea802d26a56097bad8a Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Thu, 11 Jul 2024 15:32:29 +0200 Subject: [PATCH 3/3] Fix typo --- server/data/taggers/hug-tdn-1600-1900.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/data/taggers/hug-tdn-1600-1900.yaml b/server/data/taggers/hug-tdn-1600-1900.yaml index 12a3f29..5c9070e 100644 --- a/server/data/taggers/hug-tdn-1600-1900.yaml +++ b/server/data/taggers/hug-tdn-1600-1900.yaml @@ -8,8 +8,8 @@ produces: - POS - TOK model: - name: hug-tdn-1400-1600 - href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_1400-1600 + name: hug-tdn-1600-1900 + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_1600-1900 software: name: int-huggingface href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0