From 5c2be32e143f23b5c0af1efe60060119e9eb1330 Mon Sep 17 00:00:00 2001 From: Eshan <60269431+Eshanatnight@users.noreply.github.com> Date: Tue, 2 Apr 2024 09:22:33 +0530 Subject: [PATCH] feat: implement distributed system with ingest and query modes (#730) --- Cargo.lock | 22 + server/Cargo.toml | 1 + server/src/about.rs | 8 +- server/src/analytics.rs | 2 +- server/src/banner.rs | 32 +- server/src/catalog.rs | 72 ++- server/src/cli.rs | 451 ++++++++++++++++ server/src/handlers/http.rs | 418 +++------------ server/src/handlers/http/about.rs | 42 +- server/src/handlers/http/cluster/mod.rs | 403 ++++++++++++++ server/src/handlers/http/cluster/utils.rs | 265 +++++++++ server/src/handlers/http/ingest.rs | 30 +- server/src/handlers/http/logstream.rs | 125 +++-- .../src/handlers/http/modal/ingest_server.rs | 332 ++++++++++++ server/src/handlers/http/modal/mod.rs | 133 +++++ .../src/handlers/http/modal/query_server.rs | 203 +++++++ server/src/handlers/http/modal/server.rs | 487 +++++++++++++++++ .../src/handlers/http/modal/ssl_acceptor.rs | 54 ++ server/src/handlers/http/query.rs | 98 +++- server/src/main.rs | 172 +----- server/src/metadata.rs | 4 +- server/src/metrics/mod.rs | 3 +- server/src/metrics/prom_utils.rs | 87 +++ server/src/migration.rs | 128 ++++- server/src/migration/metadata_migration.rs | 75 ++- server/src/option.rs | 507 ++---------------- server/src/query.rs | 135 ++++- server/src/query/filter_optimizer.rs | 2 +- server/src/rbac/role.rs | 7 + server/src/response.rs | 13 +- server/src/storage.rs | 13 +- server/src/storage/localfs.rs | 211 +++++++- server/src/storage/object_storage.rs | 137 ++++- server/src/storage/s3.rs | 178 +++++- server/src/storage/staging.rs | 20 + server/src/storage/store_metadata.rs | 115 +++- server/src/sync.rs | 112 ++++ server/src/utils.rs | 10 + server/src/utils/arrow/merged_reader.rs | 3 +- 39 files changed, 4012 insertions(+), 1098 deletions(-) create mode 100644 server/src/cli.rs create mode 100644 server/src/handlers/http/cluster/mod.rs create mode 100644 server/src/handlers/http/cluster/utils.rs create mode 100644 server/src/handlers/http/modal/ingest_server.rs create mode 100644 server/src/handlers/http/modal/mod.rs create mode 100644 server/src/handlers/http/modal/query_server.rs create mode 100644 server/src/handlers/http/modal/server.rs create mode 100644 server/src/handlers/http/modal/ssl_acceptor.rs create mode 100644 server/src/metrics/prom_utils.rs create mode 100644 server/src/sync.rs diff --git a/Cargo.lock b/Cargo.lock index 0327f42c..daa85d56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2107,6 +2107,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "0.4.8" @@ -2734,6 +2743,7 @@ dependencies = [ "parquet", "path-clean", "prometheus", + "prometheus-parse", "prost", "prost-build", "rand", @@ -2982,6 +2992,18 @@ dependencies = [ "thiserror", ] +[[package]] +name = "prometheus-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "811031bea65e5a401fb2e1f37d802cca6601e204ac463809a3189352d13b78a5" +dependencies = [ + "chrono", + "itertools 0.12.1", + "once_cell", + "regex", +] + [[package]] name = "prost" version = "0.12.3" diff --git a/server/Cargo.toml b/server/Cargo.toml index 0af2b43c..a456f699 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -104,6 +104,7 @@ serde_repr = "0.1.17" hashlru = { version = "0.11.0", features = ["serde"] } path-clean = "1.0.1" prost = "0.12.3" +prometheus-parse = "0.2.5" [build-dependencies] cargo_toml = "0.15" diff --git a/server/src/about.rs b/server/src/about.rs index f33ac213..9aea9ff2 100644 --- a/server/src/about.rs +++ b/server/src/about.rs @@ -90,10 +90,10 @@ pub fn print_about( eprint!( " {} - Version: \"v{}\"", + Version:\t\t\t\t\t\"v{}\"", "About:".to_string().bold(), current_version, - ); + ); // " " " " if let Some(latest_release) = latest_release { if latest_release.version > current_version { @@ -103,8 +103,8 @@ pub fn print_about( eprintln!( " - Commit: \"{commit_hash}\" - Docs: \"https://logg.ing/docs\"" + Commit:\t\t\t\t\t\t\"{commit_hash}\" + Docs:\t\t\t\t\t\t\"https://logg.ing/docs\"" ); } diff --git a/server/src/analytics.rs b/server/src/analytics.rs index ca8d172d..e1031146 100644 --- a/server/src/analytics.rs +++ b/server/src/analytics.rs @@ -90,7 +90,7 @@ impl Report { cpu_count, memory_total_bytes: mem_total, platform: platform().to_string(), - mode: CONFIG.mode_string().to_string(), + mode: CONFIG.get_storage_mode_string().to_string(), version: current().released_version.to_string(), commit_hash: current().commit_hash, metrics: build_metrics(), diff --git a/server/src/banner.rs b/server/src/banner.rs index 0f1dc512..d9f3cc60 100644 --- a/server/src/banner.rs +++ b/server/src/banner.rs @@ -35,13 +35,13 @@ pub async fn print(config: &Config, meta: &StorageMetadata) { fn print_ascii_art() { let ascii_name = r#" - `7MM"""Mq. *MM `7MM - MM `MM. MM MM - MM ,M9 ,6"Yb. `7Mb,od8 ,pP"Ybd .gP"Ya ,6"Yb. MM,dMMb. MM .gP"Ya - MMmmdM9 8) MM MM' "' 8I `" ,M' Yb 8) MM MM `Mb MM ,M' Yb - MM ,pm9MM MM `YMMMa. 8M"""""" ,pm9MM MM M8 MM 8M"""""" - MM 8M MM MM L. I8 YM. , 8M MM MM. ,M9 MM YM. , - .JMML. `Moo9^Yo..JMML. M9mmmP' `Mbmmd' `Moo9^Yo. P^YbmdP' .JMML. `Mbmmd' + `7MM"""Mq. *MM `7MM + MM `MM. MM MM + MM ,M9 ,6"Yb. `7Mb,od8 ,pP"Ybd .gP"Ya ,6"Yb. MM,dMMb. MM .gP"Ya + MMmmdM9 8) MM MM' "' 8I `" ,M' Yb 8) MM MM `Mb MM ,M' Yb + MM ,pm9MM MM `YMMMa. 8M"""""" ,pm9MM MM M8 MM 8M"""""" + MM 8M MM MM L. I8 YM. , 8M MM MM. ,M9 MM YM. , + .JMML. `Moo9^Yo..JMML. M9mmmP' `Mbmmd' `Moo9^Yo. P^YbmdP' .JMML. `Mbmmd' "#; eprint!("{ascii_name}"); @@ -77,12 +77,14 @@ fn status_info(config: &Config, scheme: &str, id: Uid) { eprintln!( " {} - Address: {} - Credentials: {} - LLM Status: \"{}\"", + Address:\t\t\t\t\t{} + Credentials:\t\t\t\t\t{} + Server Mode:\t\t\t\t\t\"{}\" + LLM Status:\t\t\t\t\t\"{}\"", "Server:".to_string().bold(), address, credentials, + config.parseable.mode.to_str(), llm_status ); } @@ -99,10 +101,10 @@ async fn storage_info(config: &Config) { eprintln!( " {} - Mode: \"{}\" - Staging: \"{}\"", + Storage Mode:\t\t\t\t\t\"{}\" + Staging Path:\t\t\t\t\t\"{}\"", "Storage:".to_string().bold(), - config.mode_string(), + config.get_storage_mode_string(), config.staging_dir().to_string_lossy(), ); @@ -114,7 +116,7 @@ async fn storage_info(config: &Config) { eprintln!( "\ - {:8}Cache: \"{}\", (size: {})", + {:8}Cache:\t\t\t\t\t\"{}\", (size: {})", "", path.display(), size @@ -123,7 +125,7 @@ async fn storage_info(config: &Config) { eprintln!( "\ - {:8}Store: \"{}\", (latency: {:?})", + {:8}Store:\t\t\t\t\t\t\"{}\", (latency: {:?})", "", storage.get_endpoint(), latency diff --git a/server/src/catalog.rs b/server/src/catalog.rs index 2c841b10..0e8716dc 100644 --- a/server/src/catalog.rs +++ b/server/src/catalog.rs @@ -24,7 +24,8 @@ use relative_path::RelativePathBuf; use crate::{ catalog::manifest::Manifest, query::PartialTimeFilter, - storage::{ObjectStorage, ObjectStorageError}, + storage::{ObjectStorage, ObjectStorageError, MANIFEST_FILE}, + utils::get_address, }; use self::{column::Column, snapshot::ManifestItem}; @@ -105,20 +106,67 @@ pub async fn update_snapshot( item.time_lower_bound <= lower_bound && lower_bound < item.time_upper_bound }); + // if the mode in I.S. manifest needs to be created but it is not getting created because + // there is already a pos, to index into stream.json + // We update the manifest referenced by this position // This updates an existing file so there is no need to create a snapshot entry. if let Some(pos) = pos { let info = &mut manifests[pos]; let path = partition_path(stream_name, info.time_lower_bound, info.time_upper_bound); - let Some(mut manifest) = storage.get_manifest(&path).await? else { - return Err(ObjectStorageError::UnhandledError( - "Manifest found in snapshot but not in object-storage" - .to_string() - .into(), - )); - }; - manifest.apply_change(change); - storage.put_manifest(&path, manifest).await?; + + let mut ch = false; + for m in manifests.iter() { + let s = get_address(); + let p = format!("{}.{}.{}", s.0, s.1, MANIFEST_FILE); + if m.manifest_path.contains(&p) { + ch = true; + } + } + if ch { + let Some(mut manifest) = storage.get_manifest(&path).await? else { + return Err(ObjectStorageError::UnhandledError( + "Manifest found in snapshot but not in object-storage" + .to_string() + .into(), + )); + }; + manifest.apply_change(change); + storage.put_manifest(&path, manifest).await?; + } else { + let lower_bound = lower_bound.date_naive().and_time(NaiveTime::MIN).and_utc(); + let upper_bound = lower_bound + .date_naive() + .and_time( + NaiveTime::from_num_seconds_from_midnight_opt( + 23 * 3600 + 59 * 60 + 59, + 999_999_999, + ) + .unwrap(), + ) + .and_utc(); + + let manifest = Manifest { + files: vec![change], + ..Manifest::default() + }; + + let addr = get_address(); + let mainfest_file_name = format!("{}.{}.{}", addr.0, addr.1, MANIFEST_FILE); + let path = + partition_path(stream_name, lower_bound, upper_bound).join(&mainfest_file_name); + storage + .put_object(&path, serde_json::to_vec(&manifest).unwrap().into()) + .await?; + let path = storage.absolute_url(&path); + let new_snapshot_entriy = snapshot::ManifestItem { + manifest_path: path.to_string(), + time_lower_bound: lower_bound, + time_upper_bound: upper_bound, + }; + manifests.push(new_snapshot_entriy); + storage.put_snapshot(stream_name, meta.snapshot).await?; + } } else { let lower_bound = lower_bound.date_naive().and_time(NaiveTime::MIN).and_utc(); let upper_bound = lower_bound @@ -137,7 +185,9 @@ pub async fn update_snapshot( ..Manifest::default() }; - let path = partition_path(stream_name, lower_bound, upper_bound).join("manifest.json"); + let addr = get_address(); + let mainfest_file_name = format!("{}.{}.{}", addr.0, addr.1, MANIFEST_FILE); + let path = partition_path(stream_name, lower_bound, upper_bound).join(&mainfest_file_name); storage .put_object(&path, serde_json::to_vec(&manifest).unwrap().into()) .await?; diff --git a/server/src/cli.rs b/server/src/cli.rs new file mode 100644 index 00000000..691547f8 --- /dev/null +++ b/server/src/cli.rs @@ -0,0 +1,451 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use clap::{value_parser, Arg, ArgGroup, Command, FromArgMatches}; +use std::path::PathBuf; + +use url::Url; + +use crate::{ + oidc::{self, OpenidConfig}, + option::{validation, Compression, Mode}, +}; + +#[derive(Debug, Default)] +pub struct Cli { + /// The location of TLS Cert file + pub tls_cert_path: Option, + + /// The location of TLS Private Key file + pub tls_key_path: Option, + + /// The address on which the http server will listen. + pub address: String, + + /// Base domain under which server is hosted. + /// This information is used by OIDC to refer redirects + pub domain_address: Option, + + /// The local staging path is used as a temporary landing point + /// for incoming events and local cache + pub local_staging_path: PathBuf, + + /// The local cache path is used for speeding up query on latest data + pub local_cache_path: Option, + + /// Size for local cache + pub local_cache_size: u64, + + /// Username for the basic authentication on the server + pub username: String, + + /// Password for the basic authentication on the server + pub password: String, + + /// OpenId configuration + pub openid: Option, + + /// Server should check for update or not + pub check_update: bool, + + /// Server should send anonymous analytics or not + pub send_analytics: bool, + + /// Open AI access key + pub open_ai_key: Option, + + /// Livetail port + pub grpc_port: u16, + + /// Livetail channel capacity + pub livetail_channel_capacity: usize, + + /// Rows in Parquet Rowgroup + pub row_group_size: usize, + + /// Query memory limit in bytes + pub query_memory_pool_size: Option, + + /// Parquet compression algorithm + pub parquet_compression: Compression, + + /// Mode of operation + pub mode: Mode, +} + +impl Cli { + // identifiers for arguments + pub const TLS_CERT: &'static str = "tls-cert-path"; + pub const TLS_KEY: &'static str = "tls-key-path"; + pub const ADDRESS: &'static str = "address"; + pub const DOMAIN_URI: &'static str = "origin"; + pub const STAGING: &'static str = "local-staging-path"; + pub const CACHE: &'static str = "cache-path"; + pub const CACHE_SIZE: &'static str = "cache-size"; + pub const USERNAME: &'static str = "username"; + pub const PASSWORD: &'static str = "password"; + pub const CHECK_UPDATE: &'static str = "check-update"; + pub const SEND_ANALYTICS: &'static str = "send-analytics"; + pub const OPEN_AI_KEY: &'static str = "open-ai-key"; + pub const OPENID_CLIENT_ID: &'static str = "oidc-client"; + pub const OPENID_CLIENT_SECRET: &'static str = "oidc-client-secret"; + pub const OPENID_ISSUER: &'static str = "oidc-issuer"; + pub const GRPC_PORT: &'static str = "grpc-port"; + pub const LIVETAIL_CAPACITY: &'static str = "livetail-capacity"; + // todo : what should this flag be + pub const QUERY_MEM_POOL_SIZE: &'static str = "query-mempool-size"; + pub const ROW_GROUP_SIZE: &'static str = "row-group-size"; + pub const PARQUET_COMPRESSION_ALGO: &'static str = "compression-algo"; + pub const MODE: &'static str = "mode"; + pub const DEFAULT_USERNAME: &'static str = "admin"; + pub const DEFAULT_PASSWORD: &'static str = "admin"; + + pub fn local_stream_data_path(&self, stream_name: &str) -> PathBuf { + self.local_staging_path.join(stream_name) + } + + pub fn get_scheme(&self) -> String { + if self.tls_cert_path.is_some() && self.tls_key_path.is_some() { + return "https".to_string(); + } + "http".to_string() + } + + pub fn create_cli_command_with_clap(name: &'static str) -> Command { + Command::new(name).next_line_help(false) + .arg( + Arg::new(Self::TLS_CERT) + .long(Self::TLS_CERT) + .env("P_TLS_CERT_PATH") + .value_name("PATH") + .value_parser(validation::file_path) + .help("Local path on this device where certificate file is located. Required to enable TLS"), + ) + .arg( + Arg::new(Self::TLS_KEY) + .long(Self::TLS_KEY) + .env("P_TLS_KEY_PATH") + .value_name("PATH") + .value_parser(validation::file_path) + .help("Local path on this device where private key file is located. Required to enable TLS"), + ) + .arg( + Arg::new(Self::ADDRESS) + .long(Self::ADDRESS) + .env("P_ADDR") + .value_name("ADDR:PORT") + .default_value("0.0.0.0:8000") + .value_parser(validation::socket_addr) + .help("Address and port for Parseable HTTP(s) server"), + ) + .arg( + Arg::new(Self::STAGING) + .long(Self::STAGING) + .env("P_STAGING_DIR") + .value_name("DIR") + .default_value("./staging") + .value_parser(validation::canonicalize_path) + .help("Local path on this device to be used as landing point for incoming events") + .next_line_help(true), + ) + .arg( + Arg::new(Self::CACHE) + .long(Self::CACHE) + .env("P_CACHE_DIR") + .value_name("DIR") + .value_parser(validation::canonicalize_path) + .help("Local path on this device to be used for caching data") + .next_line_help(true), + ) + .arg( + Arg::new(Self::CACHE_SIZE) + .long(Self::CACHE_SIZE) + .env("P_CACHE_SIZE") + .value_name("size") + .default_value("1GiB") + .value_parser(validation::cache_size) + .help("Maximum allowed cache size for all streams combined (In human readable format, e.g 1GiB, 2GiB, 100MB)") + .next_line_help(true), + ) + + .arg( + Arg::new(Self::USERNAME) + .long(Self::USERNAME) + .env("P_USERNAME") + .value_name("STRING") + .required(true) + .help("Admin username to be set for this Parseable server"), + ) + .arg( + Arg::new(Self::PASSWORD) + .long(Self::PASSWORD) + .env("P_PASSWORD") + .value_name("STRING") + .required(true) + .help("Admin password to be set for this Parseable server"), + ) + .arg( + Arg::new(Self::CHECK_UPDATE) + .long(Self::CHECK_UPDATE) + .env("P_CHECK_UPDATE") + .value_name("BOOL") + .required(false) + .default_value("true") + .value_parser(value_parser!(bool)) + .help("Enable/Disable checking for new Parseable release"), + ) + .arg( + Arg::new(Self::SEND_ANALYTICS) + .long(Self::SEND_ANALYTICS) + .env("P_SEND_ANONYMOUS_USAGE_DATA") + .value_name("BOOL") + .required(false) + .default_value("true") + .value_parser(value_parser!(bool)) + .help("Enable/Disable anonymous telemetry data collection"), + ) + .arg( + Arg::new(Self::OPEN_AI_KEY) + .long(Self::OPEN_AI_KEY) + .env("P_OPENAI_API_KEY") + .value_name("STRING") + .required(false) + .help("OpenAI key to enable llm features"), + ) + .arg( + Arg::new(Self::OPENID_CLIENT_ID) + .long(Self::OPENID_CLIENT_ID) + .env("P_OIDC_CLIENT_ID") + .value_name("STRING") + .required(false) + .help("Client id for OIDC provider"), + ) + .arg( + Arg::new(Self::OPENID_CLIENT_SECRET) + .long(Self::OPENID_CLIENT_SECRET) + .env("P_OIDC_CLIENT_SECRET") + .value_name("STRING") + .required(false) + .help("Client secret for OIDC provider"), + ) + .arg( + Arg::new(Self::OPENID_ISSUER) + .long(Self::OPENID_ISSUER) + .env("P_OIDC_ISSUER") + .value_name("URl") + .required(false) + .value_parser(validation::url) + .help("OIDC provider's host address"), + ) + .arg( + Arg::new(Self::DOMAIN_URI) + .long(Self::DOMAIN_URI) + .env("P_ORIGIN_URI") + .value_name("URL") + .required(false) + .value_parser(validation::url) + .help("Parseable server global domain address"), + ) + .arg( + Arg::new(Self::GRPC_PORT) + .long(Self::GRPC_PORT) + .env("P_GRPC_PORT") + .value_name("PORT") + .default_value("8001") + .required(false) + .value_parser(value_parser!(u16)) + .help("Port for gRPC server"), + ) + .arg( + Arg::new(Self::LIVETAIL_CAPACITY) + .long(Self::LIVETAIL_CAPACITY) + .env("P_LIVETAIL_CAPACITY") + .value_name("NUMBER") + .default_value("1000") + .required(false) + .value_parser(value_parser!(usize)) + .help("Number of rows in livetail channel"), + ) + .arg( + Arg::new(Self::QUERY_MEM_POOL_SIZE) + .long(Self::QUERY_MEM_POOL_SIZE) + .env("P_QUERY_MEMORY_LIMIT") + .value_name("Gib") + .required(false) + .value_parser(value_parser!(u8)) + .help("Set a fixed memory limit for query"), + ) + .arg( + Arg::new(Self::ROW_GROUP_SIZE) + .long(Self::ROW_GROUP_SIZE) + .env("P_PARQUET_ROW_GROUP_SIZE") + .value_name("NUMBER") + .required(false) + .default_value("16384") + .value_parser(value_parser!(usize)) + .help("Number of rows in a row group"), + ).arg( + Arg::new(Self::MODE) + .long(Self::MODE) + .env("P_MODE") + .value_name("STRING") + .required(false) + .default_value("all") + .value_parser([ + "query", + "ingest", + "all"]) + .help("Mode of operation"), + ) + .arg( + Arg::new(Self::PARQUET_COMPRESSION_ALGO) + .long(Self::PARQUET_COMPRESSION_ALGO) + .env("P_PARQUET_COMPRESSION_ALGO") + .value_name("[UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD]") + .required(false) + .default_value("lz4") + .value_parser([ + "uncompressed", + "snappy", + "gzip", + "lzo", + "brotli", + "lz4", + "zstd"]) + .help("Parquet compression algorithm"), + ).group( + ArgGroup::new("oidc") + .args([Self::OPENID_CLIENT_ID, Self::OPENID_CLIENT_SECRET, Self::OPENID_ISSUER]) + .requires_all([Self::OPENID_CLIENT_ID, Self::OPENID_CLIENT_SECRET, Self::OPENID_ISSUER]) + .multiple(true) + ) + } +} + +impl FromArgMatches for Cli { + fn from_arg_matches(m: &clap::ArgMatches) -> Result { + let mut s: Self = Self::default(); + s.update_from_arg_matches(m)?; + Ok(s) + } + + fn update_from_arg_matches(&mut self, m: &clap::ArgMatches) -> Result<(), clap::Error> { + self.local_cache_path = m.get_one::(Self::CACHE).cloned(); + self.tls_cert_path = m.get_one::(Self::TLS_CERT).cloned(); + self.tls_key_path = m.get_one::(Self::TLS_KEY).cloned(); + self.domain_address = m.get_one::(Self::DOMAIN_URI).cloned(); + + self.address = m + .get_one::(Self::ADDRESS) + .cloned() + .expect("default value for address"); + self.local_staging_path = m + .get_one::(Self::STAGING) + .cloned() + .expect("default value for staging"); + self.local_cache_size = m + .get_one::(Self::CACHE_SIZE) + .cloned() + .expect("default value for cache size"); + self.username = m + .get_one::(Self::USERNAME) + .cloned() + .expect("default for username"); + self.password = m + .get_one::(Self::PASSWORD) + .cloned() + .expect("default for password"); + self.check_update = m + .get_one::(Self::CHECK_UPDATE) + .cloned() + .expect("default for check update"); + self.send_analytics = m + .get_one::(Self::SEND_ANALYTICS) + .cloned() + .expect("default for send analytics"); + self.open_ai_key = m.get_one::(Self::OPEN_AI_KEY).cloned(); + self.grpc_port = m + .get_one::(Self::GRPC_PORT) + .cloned() + .expect("default for livetail port"); + self.livetail_channel_capacity = m + .get_one::(Self::LIVETAIL_CAPACITY) + .cloned() + .expect("default for livetail capacity"); + // converts Gib to bytes before assigning + self.query_memory_pool_size = m + .get_one::(Self::QUERY_MEM_POOL_SIZE) + .cloned() + .map(|gib| gib as usize * 1024usize.pow(3)); + self.row_group_size = m + .get_one::(Self::ROW_GROUP_SIZE) + .cloned() + .expect("default for row_group size"); + self.parquet_compression = match m + .get_one::(Self::PARQUET_COMPRESSION_ALGO) + .expect("default for compression algo") + .as_str() + { + "uncompressed" => Compression::UNCOMPRESSED, + "snappy" => Compression::SNAPPY, + "gzip" => Compression::GZIP, + "lzo" => Compression::LZO, + "brotli" => Compression::BROTLI, + "lz4" => Compression::LZ4, + "zstd" => Compression::ZSTD, + _ => unreachable!(), + }; + + let openid_client_id = m.get_one::(Self::OPENID_CLIENT_ID).cloned(); + let openid_client_secret = m.get_one::(Self::OPENID_CLIENT_SECRET).cloned(); + let openid_issuer = m.get_one::(Self::OPENID_ISSUER).cloned(); + + self.openid = match (openid_client_id, openid_client_secret, openid_issuer) { + (Some(id), Some(secret), Some(issuer)) => { + let origin = if let Some(url) = self.domain_address.clone() { + oidc::Origin::Production(url) + } else { + oidc::Origin::Local { + socket_addr: self.address.clone(), + https: self.tls_cert_path.is_some() && self.tls_key_path.is_some(), + } + }; + Some(OpenidConfig { + id, + secret, + issuer, + origin, + }) + } + _ => None, + }; + + self.mode = match m + .get_one::(Self::MODE) + .expect("Mode not set") + .as_str() + { + "query" => Mode::Query, + "ingest" => Mode::Ingest, + "all" => Mode::All, + _ => unreachable!(), + }; + + Ok(()) + } +} diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index 771eaac7..959eb2ed 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -16,360 +16,112 @@ * */ -use std::fs::File; -use std::io::BufReader; -use std::sync::Arc; - use actix_cors::Cors; -use actix_web::{ - web::{self, resource}, - App, HttpServer, -}; -use actix_web_prometheus::PrometheusMetrics; -use actix_web_static_files::ResourceFiles; -use log::info; -use openid::Discovered; -use rustls::{Certificate, PrivateKey, ServerConfig}; -use rustls_pemfile::{certs, pkcs8_private_keys}; - -use crate::option::CONFIG; -use crate::rbac::role::Action; +use arrow_schema::Schema; +use serde_json::Value; -use self::middleware::{DisAllowRootUser, ModeFilter, RouteExt}; +use self::{cluster::get_ingester_info, query::Query}; -mod about; -mod health_check; -mod ingest; +pub(crate) mod about; +pub mod cluster; +pub(crate) mod health_check; +pub(crate) mod ingest; mod kinesis; -mod llm; -mod logstream; -mod middleware; -mod oidc; +pub(crate) mod llm; +pub(crate) mod logstream; +pub(crate) mod middleware; +pub mod modal; +pub(crate) mod oidc; mod otel; -mod query; -mod rbac; -mod role; - -include!(concat!(env!("OUT_DIR"), "/generated.rs")); - -const MAX_EVENT_PAYLOAD_SIZE: usize = 10485760; -const API_BASE_PATH: &str = "/api"; -const API_VERSION: &str = "v1"; - -pub async fn run_http( - prometheus: PrometheusMetrics, - oidc_client: Option, -) -> anyhow::Result<()> { - let oidc_client = match oidc_client { - Some(config) => { - let client = config - .connect(&format!("{API_BASE_PATH}/{API_VERSION}/o/code")) - .await?; - Some(Arc::new(client)) - } - None => None, - }; +pub(crate) mod query; +pub(crate) mod rbac; +pub(crate) mod role; - let create_app = move || { - App::new() - .wrap(prometheus.clone()) - .configure(|cfg| configure_routes(cfg, oidc_client.clone())) - .wrap(actix_web::middleware::Logger::default()) - .wrap(actix_web::middleware::Compress::default()) - .wrap(cross_origin_config()) - .wrap(ModeFilter) - }; - - let ssl_acceptor = match ( - &CONFIG.parseable.tls_cert_path, - &CONFIG.parseable.tls_key_path, - ) { - (Some(cert), Some(key)) => { - // init server config builder with safe defaults - let config = ServerConfig::builder() - .with_safe_defaults() - .with_no_client_auth(); - - // load TLS key/cert files - let cert_file = &mut BufReader::new(File::open(cert)?); - let key_file = &mut BufReader::new(File::open(key)?); - - // convert files to key/cert objects - let cert_chain = certs(cert_file)?.into_iter().map(Certificate).collect(); - - let mut keys: Vec = pkcs8_private_keys(key_file)? - .into_iter() - .map(PrivateKey) - .collect(); - - // exit if no keys could be parsed - if keys.is_empty() { - anyhow::bail!("Could not locate PKCS 8 private keys."); - } +pub const MAX_EVENT_PAYLOAD_SIZE: usize = 10485760; +pub const API_BASE_PATH: &str = "api"; +pub const API_VERSION: &str = "v1"; - let server_config = config.with_single_cert(cert_chain, keys.remove(0))?; +pub(crate) fn base_path() -> String { + format!("/{API_BASE_PATH}/{API_VERSION}") +} - Some(server_config) - } - (_, _) => None, - }; +pub fn metrics_path() -> String { + format!("{}/metrics", base_path()) +} - // concurrent workers equal to number of cores on the cpu - let http_server = HttpServer::new(create_app).workers(num_cpus::get()); - if let Some(config) = ssl_acceptor { - http_server - .bind_rustls(&CONFIG.parseable.address, config)? - .run() - .await?; +pub(crate) fn cross_origin_config() -> Cors { + if cfg!(feature = "debug") { + Cors::permissive().block_on_origin_mismatch(false) } else { - http_server.bind(&CONFIG.parseable.address)?.run().await?; + Cors::default().block_on_origin_mismatch(false) } - - Ok(()) } -pub fn configure_routes( - cfg: &mut web::ServiceConfig, - oidc_client: Option>>, -) { - let generated = generate(); - - //log stream API - let logstream_api = web::scope("/{logstream}") - .service( - web::resource("") - // PUT "/logstream/{logstream}" ==> Create log stream - .route( - web::put() - .to(logstream::put_stream) - .authorize_for_stream(Action::CreateStream), - ) - // POST "/logstream/{logstream}" ==> Post logs to given log stream - .route( - web::post() - .to(ingest::post_event) - .authorize_for_stream(Action::Ingest), - ) - // DELETE "/logstream/{logstream}" ==> Delete log stream - .route( - web::delete() - .to(logstream::delete) - .authorize_for_stream(Action::DeleteStream), - ) - .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), - ) - .service( - // GET "/logstream/{logstream}/info" ==> Get info for given log stream - web::resource("/info").route( - web::get() - .to(logstream::get_stream_info) - .authorize_for_stream(Action::GetStream), - ), - ) - .service( - web::resource("/alert") - // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream - .route( - web::put() - .to(logstream::put_alert) - .authorize_for_stream(Action::PutAlert), - ) - // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream - .route( - web::get() - .to(logstream::get_alert) - .authorize_for_stream(Action::GetAlert), - ), - ) - .service( - // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream - web::resource("/schema").route( - web::get() - .to(logstream::schema) - .authorize_for_stream(Action::GetSchema), - ), - ) - .service( - // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream - web::resource("/stats").route( - web::get() - .to(logstream::get_stats) - .authorize_for_stream(Action::GetStats), - ), - ) - .service( - web::resource("/retention") - // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_retention) - .authorize_for_stream(Action::PutRetention), - ) - // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_retention) - .authorize_for_stream(Action::GetRetention), - ), - ) - .service( - web::resource("/cache") - // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_enable_cache) - .authorize_for_stream(Action::PutCacheEnabled), - ) - // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_cache_enabled) - .authorize_for_stream(Action::GetCacheEnabled), - ), - ); - - // User API - let user_api = web::scope("/user") - .service( - web::resource("") - // GET /user => List all users - .route(web::get().to(rbac::list_users).authorize(Action::ListUser)), - ) - .service( - web::resource("/{username}") - // PUT /user/{username} => Create a new user - .route(web::post().to(rbac::post_user).authorize(Action::PutUser)) - // DELETE /user/{username} => Delete a user - .route( - web::delete() - .to(rbac::delete_user) - .authorize(Action::DeleteUser), - ) - .wrap(DisAllowRootUser), - ) - .service( - web::resource("/{username}/role") - // PUT /user/{username}/roles => Put roles for user - .route( - web::put() - .to(rbac::put_role) - .authorize(Action::PutUserRoles) - .wrap(DisAllowRootUser), - ) - .route( - web::get() - .to(rbac::get_role) - .authorize_for_user(Action::GetUserRoles), - ), - ) - .service( - web::resource("/{username}/generate-new-password") - // POST /user/{username}/generate-new-password => reset password for this user - .route( - web::post() - .to(rbac::post_gen_password) - .authorize(Action::PutUser) - .wrap(DisAllowRootUser), - ), - ); +pub fn base_path_without_preceding_slash() -> String { + format!("{API_BASE_PATH}/{API_VERSION}") +} - let llm_query_api = web::scope("/llm").service( - web::resource("").route( - web::post() - .to(llm::make_llm_request) - .authorize(Action::QueryLLM), - ), - ); +pub async fn fetch_schema(stream_name: &str) -> anyhow::Result { + let mut res = vec![]; + let ima = get_ingester_info().await.unwrap(); - let role_api = web::scope("/role") - // GET Role List - .service(resource("").route(web::get().to(role::list).authorize(Action::ListRole))) - .service( - // PUT and GET Default Role - resource("/default") - .route(web::put().to(role::put_default).authorize(Action::PutRole)) - .route(web::get().to(role::get_default).authorize(Action::GetRole)), - ) - .service( - // PUT, GET, DELETE Roles - resource("/{name}") - .route(web::put().to(role::put).authorize(Action::PutRole)) - .route(web::delete().to(role::delete).authorize(Action::DeleteRole)) - .route(web::get().to(role::get).authorize(Action::GetRole)), + for im in ima { + let uri = format!( + "{}{}/logstream/{}/schema", + im.domain_name, + base_path_without_preceding_slash(), + stream_name ); + let reqw = reqwest::Client::new() + .get(uri) + .header(http::header::AUTHORIZATION, im.token.clone()) + .header(http::header::CONTENT_TYPE, "application/json") + .send() + .await?; - let mut oauth_api = web::scope("/o") - .service(resource("/login").route(web::get().to(oidc::login))) - .service(resource("/logout").route(web::get().to(oidc::logout))) - .service(resource("/code").route(web::get().to(oidc::reply_login))); - - if let Some(client) = oidc_client { - info!("Registered oidc client"); - oauth_api = oauth_api.app_data(web::Data::from(client)) + if reqw.status().is_success() { + let v = serde_json::from_slice(&reqw.bytes().await?)?; + res.push(v); + } } - // Deny request if username is same as the env variable P_USERNAME. - cfg.service( - // Base path "{url}/api/v1" - web::scope(&base_path()) - // .wrap(PathFilter) - // POST "/query" ==> Get results of the SQL query passed in request body - .service( - web::resource("/query") - .route(web::post().to(query::query).authorize(Action::Query)), - ) - // POST "/ingest" ==> Post logs to given log stream based on header - .service( - web::resource("/ingest") - .route( - web::post() - .to(ingest::ingest) - .authorize_for_stream(Action::Ingest), - ) - .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), - ) - // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command - .service(web::resource("/liveness").route(web::get().to(health_check::liveness))) - // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes - .service(web::resource("/readiness").route(web::get().to(health_check::readiness))) - // GET "/about" ==> Returns information about instance - .service( - web::resource("/about") - .route(web::get().to(about::about).authorize(Action::GetAbout)), - ) - .service( - web::scope("/logstream") - .service( - // GET "/logstream" ==> Get list of all Log Streams on the server - web::resource("") - .route(web::get().to(logstream::list).authorize(Action::ListStream)), - ) - .service( - // logstream API - logstream_api, - ), - ) - .service(user_api) - .service(llm_query_api) - .service(oauth_api) - .service(role_api), - ) - // GET "/" ==> Serve the static frontend directory - .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); -} - -fn base_path() -> String { - format!("{API_BASE_PATH}/{API_VERSION}") -} + let new_schema = Schema::try_merge(res)?; -pub fn metrics_path() -> String { - format!("{}/metrics", base_path()) + Ok(new_schema) } -fn cross_origin_config() -> Cors { - if cfg!(feature = "debug") { - Cors::permissive().block_on_origin_mismatch(false) - } else { - Cors::default().block_on_origin_mismatch(false) +pub async fn send_query_request_to_ingester(query: &Query) -> anyhow::Result> { + // send the query request to the ingester + let mut res = vec![]; + let ima = get_ingester_info().await.unwrap(); + + for im in ima.iter() { + let uri = format!( + "{}{}/{}", + im.domain_name, + base_path_without_preceding_slash(), + "query" + ); + let reqw = reqwest::Client::new() + .post(uri) + .json(query) + .header(http::header::AUTHORIZATION, im.token.clone()) + .header(http::header::CONTENT_TYPE, "application/json") + .send() + .await; + + if let Ok(reqw) = reqw { + // do i need to do a success check?? + let v: Value = serde_json::from_slice(&reqw.bytes().await?)?; + // the value returned is an array of json objects + // so it needs to be flattened + if let Some(arr) = v.as_array() { + for val in arr { + res.push(val.to_owned()) + } + } + } } + + Ok(res) } diff --git a/server/src/handlers/http/about.rs b/server/src/handlers/http/about.rs index 3f42ccc4..347cd0d3 100644 --- a/server/src/handlers/http/about.rs +++ b/server/src/handlers/http/about.rs @@ -20,9 +20,34 @@ use actix_web::web::Json; use human_size::SpecificSize; use serde_json::json; -use crate::{about, option::CONFIG, storage::StorageMetadata, utils::update}; +use crate::{ + about, + option::{Mode, CONFIG}, + storage::StorageMetadata, + utils::update, +}; use std::path::PathBuf; +/// { +/// "version": current_version, +/// "uiVersion": ui_version, +/// "commit": commit, +/// "deploymentId": deployment_id, +/// "updateAvailable": update_available, +/// "latestVersion": latest_release, +/// "llmActive": is_llm_active, +/// "llmProvider": llm_provider, +/// "oidcActive": is_oidc_active, +/// "license": "AGPL-3.0-only", +/// "mode": mode, +/// "staging": staging, +/// "cache": cache_details, +/// "grpcPort": grpc_port, +/// "store": { +/// "type": CONFIG.get_storage_mode_string(), +/// "path": store_endpoint +/// } +/// } pub async fn about() -> Json { let meta = StorageMetadata::global(); @@ -40,11 +65,15 @@ pub async fn about() -> Json { let current_version = format!("v{}", current_release.released_version); let commit = current_release.commit_hash; let deployment_id = meta.deployment_id.to_string(); - let mode = CONFIG.mode_string(); - let staging = CONFIG.staging_dir(); + let mode = CONFIG.parseable.mode.to_str(); + let staging = if CONFIG.parseable.mode == Mode::Query { + "".to_string() + } else { + CONFIG.staging_dir().display().to_string() + }; let grpc_port = CONFIG.parseable.grpc_port; - let store = CONFIG.storage().get_endpoint(); + let store_endpoint = CONFIG.storage().get_endpoint(); let is_llm_active = &CONFIG.parseable.open_ai_key.is_some(); let llm_provider = is_llm_active.then_some("OpenAI"); let is_oidc_active = CONFIG.parseable.openid.is_some(); @@ -80,6 +109,9 @@ pub async fn about() -> Json { "staging": staging, "cache": cache_details, "grpcPort": grpc_port, - "store": store + "store": { + "type": CONFIG.get_storage_mode_string(), + "path": store_endpoint + } })) } diff --git a/server/src/handlers/http/cluster/mod.rs b/server/src/handlers/http/cluster/mod.rs new file mode 100644 index 00000000..a5a8c864 --- /dev/null +++ b/server/src/handlers/http/cluster/mod.rs @@ -0,0 +1,403 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +pub mod utils; + +use crate::handlers::http::cluster::utils::{ + check_liveness, ingester_meta_filename, to_url_string, +}; +use crate::handlers::http::ingest::PostError; +use crate::handlers::http::logstream::error::StreamError; +use crate::handlers::{STATIC_SCHEMA_FLAG, TIME_PARTITION_KEY}; +use crate::option::CONFIG; + +use crate::metrics::prom_utils::Metrics; +use crate::storage::ObjectStorageError; +use crate::storage::PARSEABLE_ROOT_DIRECTORY; +use actix_web::http::header; +use actix_web::{HttpRequest, Responder}; +use bytes::Bytes; +use http::StatusCode; +use itertools::Itertools; +use relative_path::RelativePathBuf; +use serde_json::Value as JsonValue; +use url::Url; + +type IngesterMetadataArr = Vec; + +use super::base_path_without_preceding_slash; + +use super::modal::IngesterMetadata; + +// forward the request to all ingesters to keep them in sync +pub async fn sync_streams_with_ingesters( + stream_name: &str, + time_partition: &str, + static_schema: &str, + schema: Bytes, +) -> Result<(), StreamError> { + let ingester_infos = get_ingester_info().await.map_err(|err| { + log::error!("Fatal: failed to get ingester info: {:?}", err); + StreamError::Anyhow(err) + })?; + + let mut errored = false; + for ingester in ingester_infos.iter() { + let url = format!( + "{}{}/logstream/{}", + ingester.domain_name, + base_path_without_preceding_slash(), + stream_name + ); + + match send_stream_sync_request( + &url, + ingester.clone(), + time_partition, + static_schema, + schema.clone(), + ) + .await + { + Ok(_) => continue, + Err(_) => { + errored = true; + break; + } + } + } + + if errored { + for ingester in ingester_infos { + let url = format!( + "{}{}/logstream/{}", + ingester.domain_name, + base_path_without_preceding_slash(), + stream_name + ); + + // roll back the stream creation + send_stream_rollback_request(&url, ingester.clone()).await?; + } + + // this might be a bit too much + return Err(StreamError::Custom { + msg: "Failed to sync stream with ingesters".to_string(), + status: StatusCode::INTERNAL_SERVER_ERROR, + }); + } + + Ok(()) +} + +/// get the cumulative stats from all ingesters +pub async fn fetch_stats_from_ingesters( + stream_name: &str, +) -> Result, StreamError> { + let mut stats = Vec::new(); + + let ingester_infos = get_ingester_info().await.map_err(|err| { + log::error!("Fatal: failed to get ingester info: {:?}", err); + StreamError::Anyhow(err) + })?; + + for ingester in ingester_infos { + let url = format!( + "{}{}/logstream/{}/stats", + ingester.domain_name, + base_path_without_preceding_slash(), + stream_name + ); + + match utils::send_stats_request(&url, ingester.clone()).await { + Ok(Some(res)) => { + match serde_json::from_str::(&res.text().await.unwrap()) { + Ok(stat) => stats.push(stat), + Err(err) => { + log::error!( + "Could not parse stats from ingester: {}\n Error: {:?}", + ingester.domain_name, + err + ); + continue; + } + } + } + Ok(None) => { + log::error!("Ingester at {} is not reachable", &ingester.domain_name); + continue; + } + Err(err) => { + log::error!( + "Fatal: failed to fetch stats from ingester: {}\n Error: {:?}", + ingester.domain_name, + err + ); + return Err(err); + } + } + } + + Ok(stats) +} + +async fn send_stream_sync_request( + url: &str, + ingester: IngesterMetadata, + time_partition: &str, + static_schema: &str, + schema: Bytes, +) -> Result<(), StreamError> { + if !utils::check_liveness(&ingester.domain_name).await { + return Ok(()); + } + + let client = reqwest::Client::new(); + let res = client + .put(url) + .header(header::CONTENT_TYPE, "application/json") + .header(TIME_PARTITION_KEY, time_partition) + .header(STATIC_SCHEMA_FLAG, static_schema) + .header(header::AUTHORIZATION, ingester.token) + .body(schema) + .send() + .await + .map_err(|err| { + log::error!( + "Fatal: failed to forward create stream request to ingester: {}\n Error: {:?}", + ingester.domain_name, + err + ); + StreamError::Network(err) + })?; + + if !res.status().is_success() { + log::error!( + "failed to forward create stream request to ingester: {}\nResponse Returned: {:?}", + ingester.domain_name, + res + ); + return Err(StreamError::Network(res.error_for_status().unwrap_err())); + } + + Ok(()) +} + +/// send a rollback request to all ingesters +async fn send_stream_rollback_request( + url: &str, + ingester: IngesterMetadata, +) -> Result<(), StreamError> { + if !utils::check_liveness(&ingester.domain_name).await { + return Ok(()); + } + + let client = reqwest::Client::new(); + let resp = client + .delete(url) + .header(header::CONTENT_TYPE, "application/json") + .header(header::AUTHORIZATION, ingester.token) + .send() + .await + .map_err(|err| { + // log the error and return a custom error + log::error!( + "Fatal: failed to rollback stream creation: {}\n Error: {:?}", + ingester.domain_name, + err + ); + StreamError::Network(err) + })?; + + // if the response is not successful, log the error and return a custom error + // this could be a bit too much, but we need to be sure it covers all cases + if !resp.status().is_success() { + log::error!( + "failed to rollback stream creation: {}\nResponse Returned: {:?}", + ingester.domain_name, + resp + ); + return Err(StreamError::Custom { + msg: format!( + "failed to rollback stream creation: {}\nResponse Returned: {:?}", + ingester.domain_name, + resp.text().await.unwrap_or_default() + ), + status: StatusCode::INTERNAL_SERVER_ERROR, + }); + } + + Ok(()) +} + +pub async fn get_cluster_info() -> Result { + let ingester_infos = get_ingester_info().await.map_err(|err| { + log::error!("Fatal: failed to get ingester info: {:?}", err); + StreamError::Anyhow(err) + })?; + + let mut infos = vec![]; + + for ingester in ingester_infos { + let uri = Url::parse(&format!( + "{}{}/about", + ingester.domain_name, + base_path_without_preceding_slash() + )) + .expect("should always be a valid url"); + + let resp = reqwest::Client::new() + .get(uri) + .header(header::AUTHORIZATION, ingester.token.clone()) + .header(header::CONTENT_TYPE, "application/json") + .send() + .await; + + let (reachable, staging_path, error, status) = if let Ok(resp) = resp { + let status = Some(resp.status().to_string()); + + let resp_data = resp.bytes().await.map_err(|err| { + log::error!("Fatal: failed to parse ingester info to bytes: {:?}", err); + StreamError::Network(err) + })?; + + let sp = serde_json::from_slice::(&resp_data) + .map_err(|err| { + log::error!("Fatal: failed to parse ingester info: {:?}", err); + StreamError::SerdeError(err) + })? + .get("staging") + .unwrap() + .as_str() + .unwrap() + .to_string(); + + (true, sp, None, status) + } else { + ( + false, + "".to_owned(), + resp.as_ref().err().map(|e| e.to_string()), + resp.unwrap_err().status().map(|s| s.to_string()), + ) + }; + + infos.push(utils::ClusterInfo::new( + &ingester.domain_name, + reachable, + staging_path, + CONFIG.storage().get_endpoint(), + error, + status, + )); + } + + Ok(actix_web::HttpResponse::Ok().json(infos)) +} + +pub async fn get_cluster_metrics() -> Result { + let ingester_metadata = get_ingester_info().await.map_err(|err| { + log::error!("Fatal: failed to get ingester info: {:?}", err); + PostError::Invalid(err) + })?; + + let mut dresses = vec![]; + + for ingester in ingester_metadata { + let uri = Url::parse(&format!( + "{}{}/metrics", + &ingester.domain_name, + base_path_without_preceding_slash() + )) + .unwrap(); + + let res = reqwest::Client::new() + .get(uri) + .header(header::CONTENT_TYPE, "application/json") + .send() + .await; + + if let Ok(res) = res { + let text = res.text().await.map_err(PostError::NetworkError)?; + let lines: Vec> = + text.lines().map(|line| Ok(line.to_owned())).collect_vec(); + + let sample = prometheus_parse::Scrape::parse(lines.into_iter()) + .map_err(|err| PostError::CustomError(err.to_string()))? + .samples; + + dresses.push(Metrics::from_prometheus_samples( + sample, + ingester.domain_name, + )); + } else { + log::warn!( + "Failed to fetch metrics from ingester: {}\n", + ingester.domain_name, + ); + } + } + + Ok(actix_web::HttpResponse::Ok().json(dresses)) +} + +// update the .query.json file and return the new IngesterMetadataArr +pub async fn get_ingester_info() -> anyhow::Result { + let store = CONFIG.storage().get_object_store(); + + let root_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); + let arr = store + .get_objects(Some(&root_path)) + .await? + .iter() + // this unwrap will most definateley shoot me in the foot later + .map(|x| serde_json::from_slice::(x).unwrap_or_default()) + .collect_vec(); + + Ok(arr) +} + +pub async fn remove_ingester(req: HttpRequest) -> Result { + let domain_name: String = req.match_info().get("ingester").unwrap().parse().unwrap(); + let domain_name = to_url_string(domain_name); + + if check_liveness(&domain_name).await { + return Err(PostError::Invalid(anyhow::anyhow!("Node Online"))); + } + + let ingester_meta_filename = ingester_meta_filename(&domain_name); + let object_store = CONFIG.storage().get_object_store(); + let msg = match object_store + .try_delete_ingester_meta(ingester_meta_filename) + .await + { + Ok(_) => { + format!("Node {} Removed Successfully", domain_name) + } + Err(err) => { + if matches!(err, ObjectStorageError::IoError(_)) { + format!("Node {} Not Found", domain_name) + } else { + format!("Error Removing Node {}\n Reason: {}", domain_name, err) + } + } + }; + + log::info!("{}", &msg); + Ok((msg, StatusCode::OK)) +} diff --git a/server/src/handlers/http/cluster/utils.rs b/server/src/handlers/http/cluster/utils.rs new file mode 100644 index 00000000..1dea1c9e --- /dev/null +++ b/server/src/handlers/http/cluster/utils.rs @@ -0,0 +1,265 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use crate::handlers::http::{logstream::error::StreamError, modal::IngesterMetadata}; +use actix_web::http::header; +use chrono::{DateTime, Utc}; +use http::StatusCode; +use itertools::Itertools; +use reqwest::Response; +use serde::{Deserialize, Serialize}; +use url::Url; + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct QueriedStats { + pub stream: String, + pub time: DateTime, + pub ingestion: IngestionStats, + pub storage: StorageStats, +} + +impl QueriedStats { + pub fn new( + stream: &str, + time: DateTime, + ingestion: IngestionStats, + storage: StorageStats, + ) -> Self { + Self { + stream: stream.to_string(), + time, + ingestion, + storage, + } + } +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct ClusterInfo { + domain_name: String, + reachable: bool, + staging_path: String, + storage_path: String, + error: Option, // error message if the ingester is not reachable + status: Option, // status message if the ingester is reachable +} + +impl ClusterInfo { + pub fn new( + domain_name: &str, + reachable: bool, + staging_path: String, + storage_path: String, + error: Option, + status: Option, + ) -> Self { + Self { + domain_name: domain_name.to_string(), + reachable, + staging_path, + storage_path, + error, + status, + } + } +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct IngestionStats { + pub count: u64, + pub size: String, + pub format: String, +} + +impl IngestionStats { + pub fn new(count: u64, size: String, format: &str) -> Self { + Self { + count, + size, + format: format.to_string(), + } + } +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct StorageStats { + size: String, + format: String, +} + +impl StorageStats { + pub fn new(size: String, format: &str) -> Self { + Self { + size, + format: format.to_string(), + } + } +} + +pub fn merge_quried_stats(stats: Vec) -> QueriedStats { + // get the actual creation time + // let min_creation_time = stats + // .iter() + // .map(|x| x.creation_time.parse::>().unwrap()) + // .min() + // .unwrap(); // should never be None + + // get the stream name + let stream_name = stats[0].stream.clone(); + + // get the first event at + // let min_first_event_at = stats + // .iter() + // .map(|x| match x.first_event_at.as_ref() { + // we can directly unwrap here because + // we are sure that the first_event_at is a valid date + // Some(fea) => fea.parse::>().unwrap(), + // None => Utc::now(), // current time ie the max time + // }) + // .min() + // .unwrap(); // should never be None + + let min_time = stats.iter().map(|x| x.time).min().unwrap_or_else(Utc::now); + + let cumulative_ingestion = + stats + .iter() + .map(|x| &x.ingestion) + .fold(IngestionStats::default(), |acc, x| IngestionStats { + count: acc.count + x.count, + size: format!( + "{} Bytes", + acc.size.split(' ').collect_vec()[0] + .parse::() + .unwrap_or_default() + + x.size.split(' ').collect_vec()[0] + .parse::() + .unwrap_or_default() + ), + format: x.format.clone(), + }); + + let cumulative_storage = + stats + .iter() + .map(|x| &x.storage) + .fold(StorageStats::default(), |acc, x| StorageStats { + size: format!( + "{} Bytes", + acc.size.split(' ').collect_vec()[0] + .parse::() + .unwrap_or_default() + + x.size.split(' ').collect_vec()[0] + .parse::() + .unwrap_or_default() + ), + format: x.format.clone(), + }); + + QueriedStats::new( + &stream_name, + min_time, + cumulative_ingestion, + cumulative_storage, + ) +} + +pub async fn check_liveness(domain_name: &str) -> bool { + let uri = match Url::parse(&format!("{}liveness", domain_name)) { + Ok(uri) => uri, + Err(err) => { + log::error!("Node Indentifier Failed To Parse: {}", err); + return false; + } + }; + + let reqw = reqwest::Client::new() + .get(uri) + .header(header::CONTENT_TYPE, "application/json") + .send() + .await; + + reqw.is_ok() +} + +/// send a request to the ingester to fetch its stats +pub async fn send_stats_request( + url: &str, + ingester: IngesterMetadata, +) -> Result, StreamError> { + if !check_liveness(&ingester.domain_name).await { + return Ok(None); + } + + let client = reqwest::Client::new(); + let res = client + .get(url) + .header(header::CONTENT_TYPE, "application/json") + .header(header::AUTHORIZATION, ingester.token) + .send() + .await + .map_err(|err| { + log::error!( + "Fatal: failed to fetch stats from ingester: {}\n Error: {:?}", + ingester.domain_name, + err + ); + + StreamError::Network(err) + })?; + + if !res.status().is_success() { + log::error!( + "failed to forward create stream request to ingester: {}\nResponse Returned: {:?}", + ingester.domain_name, + res + ); + return Err(StreamError::Custom { + msg: format!( + "failed to forward create stream request to ingester: {}\nResponse Returned: {:?}", + ingester.domain_name, + res.text().await.unwrap_or_default() + ), + status: StatusCode::INTERNAL_SERVER_ERROR, + }); + } + + Ok(Some(res)) +} + +/// domain_name needs to be http://ip:port +pub fn ingester_meta_filename(domain_name: &str) -> String { + if domain_name.starts_with("http://") | domain_name.starts_with("https://") { + let url = Url::parse(domain_name).unwrap(); + return format!( + "ingester.{}.{}.json", + url.host_str().unwrap(), + url.port().unwrap() + ); + } + format!("ingester.{}.json", domain_name) +} + +pub fn to_url_string(str: String) -> String { + // if the str is already a url i am guessing that it will end in '/' + if str.starts_with("http://") || str.starts_with("https://") { + return str; + } + + format!("http://{}/", str) +} diff --git a/server/src/handlers/http/ingest.rs b/server/src/handlers/http/ingest.rs index ae147276..59b0c9f4 100644 --- a/server/src/handlers/http/ingest.rs +++ b/server/src/handlers/http/ingest.rs @@ -28,6 +28,8 @@ use crate::handlers::{ STREAM_NAME_HEADER_KEY, }; use crate::metadata::STREAM_INFO; +use crate::option::{Mode, CONFIG}; +use crate::storage::ObjectStorageError; use crate::utils::header_parsing::{collect_labelled_headers, ParseHeaderError}; use actix_web::{http::header::ContentType, HttpRequest, HttpResponse}; use arrow_schema::{Field, Schema}; @@ -152,8 +154,23 @@ pub async fn create_stream_if_not_exists(stream_name: &str) -> Result<(), PostEr if STREAM_INFO.stream_exists(stream_name) { return Ok(()); } - super::logstream::create_stream(stream_name.to_string(), "", "", Arc::new(Schema::empty())) - .await?; + match &CONFIG.parseable.mode { + Mode::All | Mode::Query => { + super::logstream::create_stream( + stream_name.to_string(), + "", + "", + Arc::new(Schema::empty()), + ) + .await?; + } + Mode::Ingest => { + return Err(PostError::Invalid(anyhow::anyhow!( + "Stream {} not found. Has it been created?", + stream_name + ))); + } + } Ok(()) } @@ -171,6 +188,12 @@ pub enum PostError { Invalid(#[from] anyhow::Error), #[error("{0}")] CreateStream(#[from] CreateStreamError), + #[error("Error: {0}")] + CustomError(String), + #[error("Error: {0}")] + NetworkError(#[from] reqwest::Error), + #[error("ObjectStorageError: {0}")] + ObjectStorageError(#[from] ObjectStorageError), } impl actix_web::ResponseError for PostError { @@ -185,6 +208,9 @@ impl actix_web::ResponseError for PostError { } PostError::CreateStream(_) => StatusCode::INTERNAL_SERVER_ERROR, PostError::StreamNotFound(_) => StatusCode::NOT_FOUND, + PostError::CustomError(_) => StatusCode::INTERNAL_SERVER_ERROR, + PostError::NetworkError(_) => StatusCode::INTERNAL_SERVER_ERROR, + PostError::ObjectStorageError(_) => StatusCode::INTERNAL_SERVER_ERROR, } } diff --git a/server/src/handlers/http/logstream.rs b/server/src/handlers/http/logstream.rs index f93fc44c..217e3741 100644 --- a/server/src/handlers/http/logstream.rs +++ b/server/src/handlers/http/logstream.rs @@ -20,11 +20,14 @@ use self::error::{CreateStreamError, StreamError}; use crate::alerts::Alerts; use crate::handlers::{STATIC_SCHEMA_FLAG, TIME_PARTITION_KEY}; use crate::metadata::STREAM_INFO; -use crate::option::CONFIG; +use crate::option::{Mode, CONFIG}; use crate::static_schema::{convert_static_schema_to_arrow_schema, StaticSchema}; use crate::storage::{retention::Retention, LogStream, StorageDir, StreamInfo}; use crate::{catalog, event, stats}; use crate::{metadata, validator}; + +use super::cluster::utils::{merge_quried_stats, IngestionStats, QueriedStats, StorageStats}; +use super::cluster::{fetch_stats_from_ingesters, sync_streams_with_ingesters}; use actix_web::http::StatusCode; use actix_web::{web, HttpRequest, Responder}; use arrow_schema::{Field, Schema}; @@ -141,34 +144,32 @@ pub async fn put_stream(req: HttpRequest, body: Bytes) -> Result Result let stats = stats::get_current_stats(&stream_name, "json") .ok_or(StreamError::StreamNotFound(stream_name.clone()))?; + let ingester_stats = if CONFIG.parseable.mode == Mode::Query { + Some(fetch_stats_from_ingesters(&stream_name).await?) + } else { + None + }; + + let hash_map = STREAM_INFO.read().unwrap(); + let stream_meta = &hash_map + .get(&stream_name) + .ok_or(StreamError::StreamNotFound(stream_name.clone()))?; + let time = Utc::now(); - let stats = serde_json::json!({ - "stream": stream_name, - "time": time, - "ingestion": { - "count": stats.events, - "size": format!("{} {}", stats.ingestion, "Bytes"), - "format": "json" - }, - "storage": { - "size": format!("{} {}", stats.storage, "Bytes"), - "format": "parquet" + let stats = match &stream_meta.first_event_at { + Some(_) => { + let ingestion_stats = IngestionStats::new( + stats.events, + format!("{} {}", stats.ingestion, "Bytes"), + "json", + ); + let storage_stats = + StorageStats::new(format!("{} {}", stats.storage, "Bytes"), "parquet"); + + QueriedStats::new(&stream_name, time, ingestion_stats, storage_stats) } - }); + + None => { + let ingestion_stats = IngestionStats::new( + stats.events, + format!("{} {}", stats.ingestion, "Bytes"), + "json", + ); + let storage_stats = + StorageStats::new(format!("{} {}", stats.storage, "Bytes"), "parquet"); + + QueriedStats::new(&stream_name, time, ingestion_stats, storage_stats) + } + }; + let stats = if let Some(mut ingester_stats) = ingester_stats { + ingester_stats.push(stats); + merge_quried_stats(ingester_stats) + } else { + stats + }; + + let stats = serde_json::to_value(stats).unwrap(); Ok((web::Json(stats), StatusCode::OK)) } @@ -437,9 +469,21 @@ pub async fn get_stream_info(req: HttpRequest) -> Result StatusCode { + match kind { + serde_json::error::Category::Io => StatusCode::INTERNAL_SERVER_ERROR, + serde_json::error::Category::Syntax => StatusCode::BAD_REQUEST, + serde_json::error::Category::Data => StatusCode::INTERNAL_SERVER_ERROR, + serde_json::error::Category::Eof => StatusCode::BAD_REQUEST, + } +} + pub mod error { use actix_web::http::header::ContentType; @@ -451,6 +495,9 @@ pub mod error { validator::error::{AlertValidationError, StreamNameValidationError}, }; + #[allow(unused)] + use super::classify_json_error; + #[derive(Debug, thiserror::Error)] pub enum CreateStreamError { #[error("Stream name validation failed due to {0}")] @@ -495,6 +542,10 @@ pub mod error { InvalidRetentionConfig(serde_json::Error), #[error("{msg}")] Custom { msg: String, status: StatusCode }, + #[error("Error: {0}")] + Anyhow(#[from] anyhow::Error), + #[error("Network Error: {0}")] + Network(#[from] reqwest::Error), #[error("Could not deserialize into JSON object, {0}")] SerdeError(#[from] serde_json::Error), } @@ -520,6 +571,10 @@ pub mod error { StreamError::InvalidAlertMessage(_, _) => StatusCode::BAD_REQUEST, StreamError::InvalidRetentionConfig(_) => StatusCode::BAD_REQUEST, StreamError::SerdeError(_) => StatusCode::BAD_REQUEST, + StreamError::Anyhow(_) => StatusCode::INTERNAL_SERVER_ERROR, + StreamError::Network(err) => { + err.status().unwrap_or(StatusCode::INTERNAL_SERVER_ERROR) + } } } @@ -534,6 +589,10 @@ pub mod error { fn from(value: MetadataError) -> Self { match value { MetadataError::StreamMetaNotFound(s) => StreamError::StreamNotFound(s), + MetadataError::StandaloneWithDistributed(s) => StreamError::Custom { + msg: s, + status: StatusCode::INTERNAL_SERVER_ERROR, + }, } } } diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs new file mode 100644 index 00000000..63665e11 --- /dev/null +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -0,0 +1,332 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use crate::analytics; +use crate::banner; +use crate::handlers::http::logstream; +use crate::handlers::http::middleware::RouteExt; +use crate::handlers::http::MAX_EVENT_PAYLOAD_SIZE; +use crate::localcache::LocalCacheManager; +use crate::metadata; +use crate::metrics; +use crate::rbac; +use crate::rbac::role::Action; +use crate::storage; +use crate::storage::object_storage::ingester_metadata_path; +use crate::storage::object_storage::parseable_json_path; +use crate::storage::ObjectStorageError; +use crate::sync; + +use super::server::Server; +use super::ssl_acceptor::get_ssl_acceptor; +use super::IngesterMetadata; +use super::OpenIdClient; +use super::ParseableServer; +use super::DEFAULT_VERSION; + +use actix_web::body::MessageBody; +use actix_web::Scope; +use actix_web::{web, App, HttpServer}; +use actix_web_prometheus::PrometheusMetrics; +use async_trait::async_trait; +use base64::Engine; +use itertools::Itertools; +use relative_path::RelativePathBuf; +use url::Url; + +use crate::{ + handlers::http::{base_path, cross_origin_config}, + option::CONFIG, +}; + +#[derive(Default)] +pub struct IngestServer; + +#[async_trait(?Send)] +impl ParseableServer for IngestServer { + // we dont need oidc client here its just here to satisfy the trait + async fn start( + &self, + prometheus: PrometheusMetrics, + _oidc_client: Option, + ) -> anyhow::Result<()> { + // set the ingester metadata + self.set_ingester_metadata().await?; + + // get the ssl stuff + let ssl = get_ssl_acceptor( + &CONFIG.parseable.tls_cert_path, + &CONFIG.parseable.tls_key_path, + )?; + + // fn that creates the app + let create_app_fn = move || { + App::new() + .wrap(prometheus.clone()) + .configure(|config| IngestServer::configure_routes(config, None)) + .wrap(actix_web::middleware::Logger::default()) + .wrap(actix_web::middleware::Compress::default()) + .wrap(cross_origin_config()) + }; + + // concurrent workers equal to number of logical cores + let http_server = HttpServer::new(create_app_fn).workers(num_cpus::get()); + + if let Some(config) = ssl { + http_server + .bind_rustls(&CONFIG.parseable.address, config)? + .run() + .await?; + } else { + http_server.bind(&CONFIG.parseable.address)?.run().await?; + } + + Ok(()) + } + + /// implement the init method will just invoke the initialize method + async fn init(&self) -> anyhow::Result<()> { + self.validate()?; + // check for querier state. Is it there, or was it there in the past + self.check_querier_state().await?; + // to get the .parseable.json file in staging + self.validate_credentials().await?; + + let metadata = storage::resolve_parseable_metadata().await?; + banner::print(&CONFIG, &metadata).await; + rbac::map::init(&metadata); + // set the info in the global metadata + metadata.set_global(); + + self.initialize().await + } + + fn validate(&self) -> anyhow::Result<()> { + if CONFIG.get_storage_mode_string() == "Local drive" { + return Err(anyhow::Error::msg( + // Error Message can be better + "Ingest Server cannot be started in local storage mode. Please start the server in a supported storage mode.", + )); + } + + Ok(()) + } +} + +impl IngestServer { + // configure the api routes + fn configure_routes(config: &mut web::ServiceConfig, _oidc_client: Option) { + config + .service( + // Base path "{url}/api/v1" + web::scope(&base_path()) + .service(Server::get_query_factory()) + .service(Server::get_ingest_factory()) + .service(Self::logstream_api()) + .service(Server::get_about_factory()), + ) + .service(Server::get_liveness_factory()) + .service(Server::get_readiness_factory()); + } + + fn logstream_api() -> Scope { + web::scope("/logstream") + .service( + // GET "/logstream" ==> Get list of all Log Streams on the server + web::resource("") + .route(web::get().to(logstream::list).authorize(Action::ListStream)), + ) + .service( + web::scope("/{logstream}") + .service( + web::resource("") + // PUT "/logstream/{logstream}" ==> Create log stream + .route( + web::put() + .to(logstream::put_stream) + .authorize_for_stream(Action::CreateStream), + ) + // DELETE "/logstream/{logstream}" ==> Delete log stream + .route( + web::delete() + .to(logstream::delete) + .authorize_for_stream(Action::DeleteStream), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), + ) + .service( + // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream + web::resource("/schema").route( + web::get() + .to(logstream::schema) + .authorize_for_stream(Action::GetSchema), + ), + ) + .service( + // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream + web::resource("/stats").route( + web::get() + .to(logstream::get_stats) + .authorize_for_stream(Action::GetStats), + ), + ), + ) + } + + // create the ingester metadata and put the .ingester.json file in the object store + async fn set_ingester_metadata(&self) -> anyhow::Result<()> { + let store = CONFIG.storage().get_object_store(); + + let sock = Server::get_server_address(); + let path = ingester_metadata_path(sock.ip().to_string(), sock.port().to_string()); + + if store.get_object(&path).await.is_ok() { + println!("Ingester metadata already exists"); + return Ok(()); + }; + + let scheme = CONFIG.parseable.get_scheme(); + let resource = IngesterMetadata::new( + sock.port().to_string(), + CONFIG + .parseable + .domain_address + .clone() + .unwrap_or_else(|| { + Url::parse(&format!("{}://{}:{}", scheme, sock.ip(), sock.port())).unwrap() + }) + .to_string(), + DEFAULT_VERSION.to_string(), + store.get_bucket_name(), + &CONFIG.parseable.username, + &CONFIG.parseable.password, + ); + + let resource = serde_json::to_string(&resource) + .unwrap() + .try_into_bytes() + .unwrap(); + + store.put_object(&path, resource).await?; + + Ok(()) + } + + // check for querier state. Is it there, or was it there in the past + // this should happen before the set the ingester metadata + async fn check_querier_state(&self) -> anyhow::Result<(), ObjectStorageError> { + // how do we check for querier state? + // based on the work flow of the system, the querier will always need to start first + // i.e the querier will create the `.parseable.json` file + + let store = CONFIG.storage().get_object_store(); + let path = parseable_json_path(); + + match store.get_object(&path).await { + Ok(_) => Ok(()), + Err(_) => Err(ObjectStorageError::Custom( + "Query Server has not been started yet. Please start the querier server first." + .to_string(), + )), + } + } + + async fn validate_credentials(&self) -> anyhow::Result<()> { + // check if your creds match with others + let store = CONFIG.storage().get_object_store(); + let base_path = RelativePathBuf::from(""); + let ingester_metadata = store + .get_objects(Some(&base_path)) + .await? + .iter() + // this unwrap will most definateley shoot me in the foot later + .map(|x| serde_json::from_slice::(x).unwrap_or_default()) + .collect_vec(); + + if !ingester_metadata.is_empty() { + let check = ingester_metadata[0].token.clone(); + + let token = base64::prelude::BASE64_STANDARD.encode(format!( + "{}:{}", + CONFIG.parseable.username, CONFIG.parseable.password + )); + + let token = format!("Basic {}", token); + + if check != token { + log::error!("Credentials do not match with other ingesters. Please check your credentials and try again."); + return Err(anyhow::anyhow!("Credentials do not match with other ingesters. Please check your credentials and try again.")); + } + } + + Ok(()) + } + + async fn initialize(&self) -> anyhow::Result<()> { + if let Some(cache_manager) = LocalCacheManager::global() { + cache_manager + .validate(CONFIG.parseable.local_cache_size) + .await?; + }; + + let prometheus = metrics::build_metrics_handler(); + CONFIG.storage().register_store_metrics(&prometheus); + + let storage = CONFIG.storage().get_object_store(); + if let Err(err) = metadata::STREAM_INFO.load(&*storage).await { + log::warn!("could not populate local metadata. {:?}", err); + } + + metrics::fetch_stats_from_storage().await; + + let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync(); + let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = + sync::object_store_sync(); + + // all internal data structures populated now. + // start the analytics scheduler if enabled + if CONFIG.parseable.send_analytics { + analytics::init_analytics_scheduler(); + } + let app = self.start(prometheus, CONFIG.parseable.openid.clone()); + tokio::pin!(app); + loop { + tokio::select! { + e = &mut app => { + // actix server finished .. stop other threads and stop the server + remote_sync_inbox.send(()).unwrap_or(()); + localsync_inbox.send(()).unwrap_or(()); + localsync_handler.join().unwrap_or(()); + remote_sync_handler.join().unwrap_or(()); + return e + }, + _ = &mut localsync_outbox => { + // crash the server if localsync fails for any reason + // panic!("Local Sync thread died. Server will fail now!") + return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable")) + }, + _ = &mut remote_sync_outbox => { + // remote_sync failed, this is recoverable by just starting remote_sync thread again + remote_sync_handler.join().unwrap_or(()); + (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync(); + } + + }; + } + } +} diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs new file mode 100644 index 00000000..5881b3bf --- /dev/null +++ b/server/src/handlers/http/modal/mod.rs @@ -0,0 +1,133 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +pub mod ingest_server; +pub mod query_server; +pub mod server; +pub mod ssl_acceptor; + +use std::sync::Arc; + +use actix_web_prometheus::PrometheusMetrics; +use async_trait::async_trait; +use openid::Discovered; + +use crate::oidc; +use base64::Engine; +use serde::Deserialize; +use serde::Serialize; +pub type OpenIdClient = Arc>; + +// to be decided on what the Default version should be +pub const DEFAULT_VERSION: &str = "v3"; + +include!(concat!(env!("OUT_DIR"), "/generated.rs")); + +#[async_trait(?Send)] +pub trait ParseableServer { + // async fn validate(&self) -> Result<(), ObjectStorageError>; + + /// configure the server + async fn start( + &self, + prometheus: PrometheusMetrics, + oidc_client: Option, + ) -> anyhow::Result<()>; + + async fn init(&self) -> anyhow::Result<()>; + + fn validate(&self) -> anyhow::Result<()>; +} + +#[derive(Serialize, Debug, Deserialize, Default, Clone, Eq, PartialEq)] +pub struct IngesterMetadata { + pub version: String, + pub port: String, + pub domain_name: String, + pub bucket_name: String, + pub token: String, +} + +impl IngesterMetadata { + pub fn new( + port: String, + domain_name: String, + version: String, + bucket_name: String, + username: &str, + password: &str, + ) -> Self { + let token = base64::prelude::BASE64_STANDARD.encode(format!("{}:{}", username, password)); + + let token = format!("Basic {}", token); + + Self { + port, + domain_name, + version, + bucket_name, + token, + } + } +} + +#[cfg(test)] +mod test { + use actix_web::body::MessageBody; + use rstest::rstest; + + use super::{IngesterMetadata, DEFAULT_VERSION}; + + #[rstest] + fn test_deserialize_resource() { + let lhs: IngesterMetadata = IngesterMetadata::new( + "8000".to_string(), + "https://localhost:8000".to_string(), + DEFAULT_VERSION.to_string(), + "somebucket".to_string(), + "admin", + "admin", + ); + + let rhs = serde_json::from_slice::(br#"{"version":"v3","port":"8000","domain_name":"https://localhost:8000","bucket_name":"somebucket","token":"Basic YWRtaW46YWRtaW4="}"#).unwrap(); + + assert_eq!(rhs, lhs); + } + + #[rstest] + fn test_serialize_resource() { + let im = IngesterMetadata::new( + "8000".to_string(), + "https://localhost:8000".to_string(), + DEFAULT_VERSION.to_string(), + "somebucket".to_string(), + "admin", + "admin", + ); + + let lhs = serde_json::to_string(&im) + .unwrap() + .try_into_bytes() + .unwrap(); + let rhs = br#"{"version":"v3","port":"8000","domain_name":"https://localhost:8000","bucket_name":"somebucket","token":"Basic YWRtaW46YWRtaW4="}"# + .try_into_bytes() + .unwrap(); + + assert_eq!(lhs, rhs); + } +} diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs new file mode 100644 index 00000000..ca554dbf --- /dev/null +++ b/server/src/handlers/http/modal/query_server.rs @@ -0,0 +1,203 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use crate::handlers::http::cluster::utils::check_liveness; +use crate::handlers::http::cluster::{self, get_ingester_info}; +use crate::handlers::http::middleware::RouteExt; +use crate::handlers::http::{base_path, cross_origin_config, API_BASE_PATH, API_VERSION}; + +use crate::rbac::role::Action; +use crate::{analytics, banner, metadata, metrics, migration, rbac, storage}; +use actix_web::web; +use actix_web::web::ServiceConfig; +use actix_web::{App, HttpServer}; +use async_trait::async_trait; +use std::sync::Arc; + +use crate::option::CONFIG; + +use super::server::Server; +use super::ssl_acceptor::get_ssl_acceptor; +use super::{OpenIdClient, ParseableServer}; + +#[derive(Default, Debug)] +pub struct QueryServer; + +#[async_trait(?Send)] +impl ParseableServer for QueryServer { + async fn start( + &self, + prometheus: actix_web_prometheus::PrometheusMetrics, + oidc_client: Option, + ) -> anyhow::Result<()> { + let data = get_ingester_info().await?; + + // on subsequent runs, the qurier should check if the ingester is up and running or not + for ingester in data.iter() { + if !check_liveness(&ingester.domain_name).await { + eprintln!("Ingester at {} is not reachable", &ingester.domain_name); + } else { + println!("Ingester at {} is up and running", &ingester.domain_name); + } + } + + let oidc_client = match oidc_client { + Some(config) => { + let client = config + .connect(&format!("{API_BASE_PATH}/{API_VERSION}/o/code")) + .await?; + Some(Arc::new(client)) + } + + None => None, + }; + + let ssl = get_ssl_acceptor( + &CONFIG.parseable.tls_cert_path, + &CONFIG.parseable.tls_key_path, + )?; + + let create_app_fn = move || { + App::new() + .wrap(prometheus.clone()) + .configure(|config| QueryServer::configure_routes(config, oidc_client.clone())) + .wrap(actix_web::middleware::Logger::default()) + .wrap(actix_web::middleware::Compress::default()) + .wrap(cross_origin_config()) + }; + + // concurrent workers equal to number of cores on the cpu + let http_server = HttpServer::new(create_app_fn).workers(num_cpus::get()); + if let Some(config) = ssl { + http_server + .bind_rustls(&CONFIG.parseable.address, config)? + .run() + .await?; + } else { + http_server.bind(&CONFIG.parseable.address)?.run().await?; + } + + Ok(()) + } + + /// implementation of init should just invoke a call to initialize + async fn init(&self) -> anyhow::Result<()> { + self.validate()?; + migration::run_file_migration(&CONFIG).await?; + CONFIG.validate_storage().await?; + migration::run_metadata_migration(&CONFIG).await?; + let metadata = storage::resolve_parseable_metadata().await?; + banner::print(&CONFIG, &metadata).await; + // initialize the rbac map + rbac::map::init(&metadata); + // keep metadata info in mem + metadata.set_global(); + self.initialize().await + } + + fn validate(&self) -> anyhow::Result<()> { + if CONFIG.get_storage_mode_string() == "Local drive" { + return Err(anyhow::anyhow!( + "Query Server cannot be started in local storage mode. Please start the server in a supported storage mode.", + )); + } + + Ok(()) + } +} + +impl QueryServer { + // configure the api routes + fn configure_routes(config: &mut ServiceConfig, oidc_client: Option) { + config + .service( + web::scope(&base_path()) + // POST "/query" ==> Get results of the SQL query passed in request body + .service(Server::get_query_factory()) + .service(Server::get_liveness_factory()) + .service(Server::get_readiness_factory()) + .service(Server::get_about_factory()) + .service(Server::get_logstream_webscope()) + .service(Server::get_user_webscope()) + .service(Server::get_llm_webscope()) + .service(Server::get_oauth_webscope(oidc_client)) + .service(Server::get_user_role_webscope()) + .service(Self::get_cluster_info_web_scope()), + ) + .service(Server::get_generated()); + } + + fn get_cluster_info_web_scope() -> actix_web::Scope { + web::scope("/cluster") + .service( + // GET "/cluster/info" ==> Get info of the cluster + web::resource("/info").route( + web::get() + .to(cluster::get_cluster_info) + .authorize(Action::ListCluster), + ), + ) + // GET "/cluster/metrics" ==> Get metrics of the cluster + .service( + web::resource("/metrics").route( + web::get() + .to(cluster::get_cluster_metrics) + .authorize(Action::ListClusterMetrics), + ), + ) + // DELETE "/cluster/{ingester_domain:port}" ==> Delete an ingester from the cluster + .service( + web::scope("/{ingester}").service( + web::resource("").route( + web::delete() + .to(cluster::remove_ingester) + .authorize(Action::DeleteIngester), + ), + ), + ) + } + + /// initialize the server, run migrations as needed and start the server + async fn initialize(&self) -> anyhow::Result<()> { + let prometheus = metrics::build_metrics_handler(); + CONFIG.storage().register_store_metrics(&prometheus); + + migration::run_migration(&CONFIG).await?; + + let storage = CONFIG.storage().get_object_store(); + if let Err(e) = metadata::STREAM_INFO.load(&*storage).await { + log::warn!("could not populate local metadata. {:?}", e); + } + + // track all parquet files already in the data directory + storage::retention::load_retention_from_global(); + // load data from stats back to prometheus metrics + metrics::fetch_stats_from_storage().await; + + // all internal data structures populated now. + // start the analytics scheduler if enabled + if CONFIG.parseable.send_analytics { + analytics::init_analytics_scheduler(); + } + + self.start(prometheus, CONFIG.parseable.openid.clone()) + .await?; + + Ok(()) + } +} diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs new file mode 100644 index 00000000..01ed5553 --- /dev/null +++ b/server/src/handlers/http/modal/server.rs @@ -0,0 +1,487 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use crate::analytics; +use crate::banner; +use crate::handlers; +use crate::handlers::http::about; +use crate::handlers::http::base_path; +use crate::handlers::http::health_check; +use crate::handlers::http::query; +use crate::handlers::http::API_BASE_PATH; +use crate::handlers::http::API_VERSION; +use crate::localcache::LocalCacheManager; +use crate::metadata; +use crate::metrics; +use crate::migration; +use crate::rbac; +use crate::storage; +use crate::sync; +use std::net::SocketAddr; +use std::{fs::File, io::BufReader, sync::Arc}; + +use actix_web::web::resource; +use actix_web::Resource; +use actix_web::Scope; +use actix_web::{web, App, HttpServer}; +use actix_web_prometheus::PrometheusMetrics; +use actix_web_static_files::ResourceFiles; +use async_trait::async_trait; + +use rustls::{Certificate, PrivateKey, ServerConfig}; +use rustls_pemfile::{certs, pkcs8_private_keys}; + +use crate::{ + handlers::http::{ + self, cross_origin_config, ingest, llm, logstream, + middleware::{DisAllowRootUser, RouteExt}, + oidc, role, MAX_EVENT_PAYLOAD_SIZE, + }, + option::CONFIG, + rbac::role::Action, +}; + +// use super::generate; +use super::generate; +use super::OpenIdClient; +use super::ParseableServer; + +#[derive(Default)] +pub struct Server; + +#[async_trait(?Send)] +impl ParseableServer for Server { + async fn start( + &self, + prometheus: PrometheusMetrics, + oidc_client: Option, + ) -> anyhow::Result<()> { + let oidc_client = match oidc_client { + Some(config) => { + let client = config + .connect(&format!("{API_BASE_PATH}/{API_VERSION}/o/code")) + .await?; + Some(Arc::new(client)) + } + None => None, + }; + + let create_app_fn = move || { + App::new() + .wrap(prometheus.clone()) + .configure(|cfg| Server::configure_routes(cfg, oidc_client.clone())) + .wrap(actix_web::middleware::Logger::default()) + .wrap(actix_web::middleware::Compress::default()) + .wrap(cross_origin_config()) + }; + + let ssl_acceptor = match ( + &CONFIG.parseable.tls_cert_path, + &CONFIG.parseable.tls_key_path, + ) { + (Some(cert), Some(key)) => { + // init server config builder with safe defaults + let config = ServerConfig::builder() + .with_safe_defaults() + .with_no_client_auth(); + + // load TLS key/cert files + let cert_file = &mut BufReader::new(File::open(cert)?); + let key_file = &mut BufReader::new(File::open(key)?); + + // convert files to key/cert objects + let cert_chain = certs(cert_file)?.into_iter().map(Certificate).collect(); + + let mut keys: Vec = pkcs8_private_keys(key_file)? + .into_iter() + .map(PrivateKey) + .collect(); + + // exit if no keys could be parsed + if keys.is_empty() { + anyhow::bail!("Could not locate PKCS 8 private keys."); + } + + let server_config = config.with_single_cert(cert_chain, keys.remove(0))?; + + Some(server_config) + } + (_, _) => None, + }; + + // concurrent workers equal to number of cores on the cpu + let http_server = HttpServer::new(create_app_fn).workers(num_cpus::get()); + if let Some(config) = ssl_acceptor { + http_server + .bind_rustls(&CONFIG.parseable.address, config)? + .run() + .await?; + } else { + http_server.bind(&CONFIG.parseable.address)?.run().await?; + } + + Ok(()) + } + + /// implementation of init should just invoke a call to initialize + async fn init(&self) -> anyhow::Result<()> { + self.validate()?; + migration::run_file_migration(&CONFIG).await?; + CONFIG.validate_storage().await?; + migration::run_metadata_migration(&CONFIG).await?; + let metadata = storage::resolve_parseable_metadata().await?; + banner::print(&CONFIG, &metadata).await; + rbac::map::init(&metadata); + metadata.set_global(); + self.initialize().await + } + + fn validate(&self) -> anyhow::Result<()> { + Ok(()) + } +} + +impl Server { + fn configure_routes(config: &mut web::ServiceConfig, oidc_client: Option) { + // there might be a bug in the configure routes method + config + .service( + web::scope(&base_path()) + // POST "/query" ==> Get results of the SQL query passed in request body + .service(Self::get_query_factory()) + .service(Self::get_ingest_factory()) + .service(Self::get_liveness_factory()) + .service(Self::get_readiness_factory()) + .service(Self::get_about_factory()) + .service(Self::get_logstream_webscope()) + .service(Self::get_user_webscope()) + .service(Self::get_llm_webscope()) + .service(Self::get_oauth_webscope(oidc_client)) + .service(Self::get_user_role_webscope()), + ) + .service(Self::get_generated()); + } + + // get the query factory + pub fn get_query_factory() -> Resource { + web::resource("/query").route(web::post().to(query::query).authorize(Action::Query)) + } + + // get the logstream web scope + pub fn get_logstream_webscope() -> Scope { + web::scope("/logstream") + .service( + // GET "/logstream" ==> Get list of all Log Streams on the server + web::resource("") + .route(web::get().to(logstream::list).authorize(Action::ListStream)), + ) + .service( + web::scope("/{logstream}") + .service( + web::resource("") + // PUT "/logstream/{logstream}" ==> Create log stream + .route( + web::put() + .to(logstream::put_stream) + .authorize_for_stream(Action::CreateStream), + ) + // POST "/logstream/{logstream}" ==> Post logs to given log stream + .route( + web::post() + .to(ingest::post_event) + .authorize_for_stream(Action::Ingest), + ) + // DELETE "/logstream/{logstream}" ==> Delete log stream + .route( + web::delete() + .to(logstream::delete) + .authorize_for_stream(Action::DeleteStream), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), + ) + .service( + // GET "/logstream/{logstream}/info" ==> Get info for given log stream + web::resource("/info").route( + web::get() + .to(logstream::get_stream_info) + .authorize_for_stream(Action::GetStream), + ), + ) + .service( + web::resource("/alert") + // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream + .route( + web::put() + .to(logstream::put_alert) + .authorize_for_stream(Action::PutAlert), + ) + // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream + .route( + web::get() + .to(logstream::get_alert) + .authorize_for_stream(Action::GetAlert), + ), + ) + .service( + // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream + web::resource("/schema").route( + web::get() + .to(logstream::schema) + .authorize_for_stream(Action::GetSchema), + ), + ) + .service( + // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream + web::resource("/stats").route( + web::get() + .to(logstream::get_stats) + .authorize_for_stream(Action::GetStats), + ), + ) + .service( + web::resource("/retention") + // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_retention) + .authorize_for_stream(Action::PutRetention), + ) + // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_retention) + .authorize_for_stream(Action::GetRetention), + ), + ) + .service( + web::resource("/cache") + // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_enable_cache) + .authorize_for_stream(Action::PutCacheEnabled), + ) + // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_cache_enabled) + .authorize_for_stream(Action::GetCacheEnabled), + ), + ), + ) + } + + // get the factory for the ingest route + pub fn get_ingest_factory() -> Resource { + web::resource("/ingest") + .route( + web::post() + .to(ingest::ingest) + .authorize_for_stream(Action::Ingest), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)) + } + + // get the oauth webscope + pub fn get_oauth_webscope(oidc_client: Option) -> Scope { + let oauth = web::scope("/o") + .service(resource("/login").route(web::get().to(oidc::login))) + .service(resource("/logout").route(web::get().to(oidc::logout))) + .service(resource("/code").route(web::get().to(oidc::reply_login))); + + if let Some(client) = oidc_client { + oauth.app_data(web::Data::from(client)) + } else { + oauth + } + } + + // get the role webscope + pub fn get_user_role_webscope() -> Scope { + web::scope("/role") + // GET Role List + .service(resource("").route(web::get().to(role::list).authorize(Action::ListRole))) + .service( + // PUT and GET Default Role + resource("/default") + .route(web::put().to(role::put_default).authorize(Action::PutRole)) + .route(web::get().to(role::get_default).authorize(Action::GetRole)), + ) + .service( + // PUT, GET, DELETE Roles + resource("/{name}") + .route(web::put().to(role::put).authorize(Action::PutRole)) + .route(web::delete().to(role::delete).authorize(Action::DeleteRole)) + .route(web::get().to(role::get).authorize(Action::GetRole)), + ) + } + + // get the user webscope + pub fn get_user_webscope() -> Scope { + web::scope("/user") + .service( + web::resource("") + // GET /user => List all users + .route( + web::get() + .to(http::rbac::list_users) + .authorize(Action::ListUser), + ), + ) + .service( + web::resource("/{username}") + // PUT /user/{username} => Create a new user + .route( + web::post() + .to(http::rbac::post_user) + .authorize(Action::PutUser), + ) + // DELETE /user/{username} => Delete a user + .route( + web::delete() + .to(http::rbac::delete_user) + .authorize(Action::DeleteUser), + ) + .wrap(DisAllowRootUser), + ) + .service( + web::resource("/{username}/role") + // PUT /user/{username}/roles => Put roles for user + .route( + web::put() + .to(http::rbac::put_role) + .authorize(Action::PutUserRoles) + .wrap(DisAllowRootUser), + ) + .route( + web::get() + .to(http::rbac::get_role) + .authorize_for_user(Action::GetUserRoles), + ), + ) + .service( + web::resource("/{username}/generate-new-password") + // POST /user/{username}/generate-new-password => reset password for this user + .route( + web::post() + .to(http::rbac::post_gen_password) + .authorize(Action::PutUser) + .wrap(DisAllowRootUser), + ), + ) + } + + // get the llm webscope + pub fn get_llm_webscope() -> Scope { + web::scope("/llm").service( + web::resource("").route( + web::post() + .to(llm::make_llm_request) + .authorize(Action::QueryLLM), + ), + ) + } + + // get the live check + // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command + pub fn get_liveness_factory() -> Resource { + web::resource("/liveness").route(web::get().to(health_check::liveness)) + } + + // get the readiness check + // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes + pub fn get_readiness_factory() -> Resource { + web::resource("/readiness").route(web::get().to(health_check::readiness)) + } + + // get the about factory + pub fn get_about_factory() -> Resource { + web::resource("/about").route(web::get().to(about::about).authorize(Action::GetAbout)) + } + + // GET "/" ==> Serve the static frontend directory + pub fn get_generated() -> ResourceFiles { + ResourceFiles::new("/", generate()).resolve_not_found_to_root() + } + + async fn initialize(&self) -> anyhow::Result<()> { + if let Some(cache_manager) = LocalCacheManager::global() { + cache_manager + .validate(CONFIG.parseable.local_cache_size) + .await?; + }; + + let prometheus = metrics::build_metrics_handler(); + CONFIG.storage().register_store_metrics(&prometheus); + + migration::run_migration(&CONFIG).await?; + + let storage = CONFIG.storage().get_object_store(); + if let Err(err) = metadata::STREAM_INFO.load(&*storage).await { + log::warn!("could not populate local metadata. {:?}", err); + } + + storage::retention::load_retention_from_global(); + metrics::fetch_stats_from_storage().await; + + let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync(); + let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = + sync::object_store_sync(); + + if CONFIG.parseable.send_analytics { + analytics::init_analytics_scheduler(); + } + + tokio::spawn(handlers::livetail::server()); + + let app = self.start(prometheus, CONFIG.parseable.openid.clone()); + + tokio::pin!(app); + loop { + tokio::select! { + e = &mut app => { + // actix server finished .. stop other threads and stop the server + remote_sync_inbox.send(()).unwrap_or(()); + localsync_inbox.send(()).unwrap_or(()); + localsync_handler.join().unwrap_or(()); + remote_sync_handler.join().unwrap_or(()); + return e + }, + _ = &mut localsync_outbox => { + // crash the server if localsync fails for any reason + // panic!("Local Sync thread died. Server will fail now!") + return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable")) + }, + _ = &mut remote_sync_outbox => { + // remote_sync failed, this is recoverable by just starting remote_sync thread again + remote_sync_handler.join().unwrap_or(()); + (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync(); + } + }; + } + } + + #[inline(always)] + pub fn get_server_address() -> SocketAddr { + // this might cause an issue down the line + // best is to make the Cli Struct better, but thats a chore + (CONFIG.parseable.address.clone()) + .parse::() + .unwrap() + } +} diff --git a/server/src/handlers/http/modal/ssl_acceptor.rs b/server/src/handlers/http/modal/ssl_acceptor.rs new file mode 100644 index 00000000..6b51113b --- /dev/null +++ b/server/src/handlers/http/modal/ssl_acceptor.rs @@ -0,0 +1,54 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use std::{fs::File, io::BufReader, path::PathBuf}; + +use itertools::Itertools; +use rustls::{Certificate, PrivateKey, ServerConfig}; +use rustls_pemfile::{certs, pkcs8_private_keys}; + +pub fn get_ssl_acceptor( + tls_cert: &Option, + tls_key: &Option, +) -> anyhow::Result> { + match (tls_cert, tls_key) { + (Some(cert), Some(key)) => { + let server_config = ServerConfig::builder() + .with_safe_defaults() + .with_no_client_auth(); + + let cert_file = &mut BufReader::new(File::open(cert)?); + let key_file = &mut BufReader::new(File::open(key)?); + let cert_chain = certs(cert_file)?.into_iter().map(Certificate).collect_vec(); + + let mut keys = pkcs8_private_keys(key_file)? + .into_iter() + .map(PrivateKey) + .collect_vec(); + + if keys.is_empty() { + anyhow::bail!("Could not locate PKCS 8 private keys."); + } + + Ok(Some( + server_config.with_single_cert(cert_chain, keys.remove(0))?, + )) + } + (_, _) => Ok(None), + } +} diff --git a/server/src/handlers/http/query.rs b/server/src/handlers/http/query.rs index d7896b94..62b7b649 100644 --- a/server/src/handlers/http/query.rs +++ b/server/src/handlers/http/query.rs @@ -20,24 +20,35 @@ use actix_web::http::header::ContentType; use actix_web::web::{self, Json}; use actix_web::{FromRequest, HttpRequest, Responder}; use chrono::{DateTime, Utc}; +use datafusion::common::tree_node::TreeNode; use datafusion::error::DataFusionError; use datafusion::execution::context::SessionState; use futures_util::Future; use http::StatusCode; use std::collections::HashMap; use std::pin::Pin; +use std::sync::Arc; use std::time::Instant; +use crate::event::error::EventError; +use crate::handlers::http::fetch_schema; + +use crate::event::commit_schema; use crate::metrics::QUERY_EXECUTE_TIME; +use crate::option::{Mode, CONFIG}; use crate::query::error::ExecuteError; -use crate::query::QUERY_SESSION; +use crate::query::{TableScanVisitor, QUERY_SESSION}; use crate::rbac::role::{Action, Permission}; use crate::rbac::Users; use crate::response::QueryResponse; +use crate::storage::object_storage::commit_schema_to_storage; +use crate::storage::ObjectStorageError; use crate::utils::actix::extract_session_key_from_req; +use super::send_query_request_to_ingester; + /// Query Request through http endpoint. -#[derive(Debug, serde::Deserialize)] +#[derive(Debug, serde::Deserialize, serde::Serialize)] #[serde(rename_all = "camelCase")] pub struct Query { query: String, @@ -52,10 +63,46 @@ pub struct Query { } pub async fn query(req: HttpRequest, query_request: Query) -> Result { - let creds = extract_session_key_from_req(&req).expect("expects basic auth"); - let permissions = Users.get_permissions(&creds); let session_state = QUERY_SESSION.state(); + + // get the logical plan and extract the table name + let raw_logical_plan = session_state + .create_logical_plan(&query_request.query) + .await?; + // create a visitor to extract the table name + let mut visitor = TableScanVisitor::default(); + let _ = raw_logical_plan.visit(&mut visitor); + let table_name = visitor.into_inner().pop().unwrap(); + + if CONFIG.parseable.mode == Mode::Query { + if let Ok(new_schema) = fetch_schema(&table_name).await { + commit_schema_to_storage(&table_name, new_schema.clone()) + .await + .map_err(QueryError::ObjectStorage)?; + commit_schema(&table_name, Arc::new(new_schema)).map_err(QueryError::EventError)?; + } + } + let mut query = into_query(&query_request, &session_state).await?; + + // ? run this code only if the query start time and now is less than 1 minute + margin + let mmem = if CONFIG.parseable.mode == Mode::Query { + // create a new query to send to the ingesters + if let Some(que) = transform_query_for_ingester(&query_request) { + let vals = send_query_request_to_ingester(&que) + .await + .map_err(|err| QueryError::Custom(err.to_string()))?; + Some(vals) + } else { + None + } + } else { + None + }; + + let creds = extract_session_key_from_req(&req).expect("expects basic auth"); + let permissions = Users.get_permissions(&creds); + // check authorization of this query if it references physical table; let table_name = query.table_name(); if let Some(ref table) = table_name { @@ -100,7 +147,7 @@ pub async fn query(req: HttpRequest, query_request: Query) -> Result Option { + if query.query.is_empty() { + return None; + } + + if query.start_time.is_empty() { + return None; + } + + if query.end_time.is_empty() { + return None; + } + + let end_time: DateTime = if query.end_time == "now" { + Utc::now() + } else { + DateTime::parse_from_rfc3339(&query.end_time) + .ok()? + .with_timezone(&Utc) + }; + + let start_time = end_time - chrono::Duration::minutes(1); + // when transforming the query, the ingesters are forced to return an array of values + let q = Query { + query: query.query.clone(), + fields: false, + filter_tags: query.filter_tags.clone(), + send_null: query.send_null, + start_time: start_time.to_rfc3339(), + end_time: end_time.to_rfc3339(), + }; + + Some(q) +} + #[derive(Debug, thiserror::Error)] pub enum QueryError { #[error("Query cannot be empty")] @@ -206,6 +288,12 @@ pub enum QueryError { Datafusion(#[from] DataFusionError), #[error("Execution Error: {0}")] Execute(#[from] ExecuteError), + #[error("Error: {0}")] + Custom(String), + #[error("ObjectStorage Error: {0}")] + ObjectStorage(#[from] ObjectStorageError), + #[error("Evern Error: {0}")] + EventError(#[from] EventError), } impl actix_web::ResponseError for QueryError { diff --git a/server/src/main.rs b/server/src/main.rs index 6f3e30d3..04d6ed8b 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -16,20 +16,12 @@ * */ -use clokwerk::{AsyncScheduler, Job, Scheduler, TimeUnits}; -use thread_priority::{ThreadBuilder, ThreadPriority}; -use tokio::sync::oneshot; -use tokio::sync::oneshot::error::TryRecvError; - -use std::panic::{catch_unwind, AssertUnwindSafe}; -use std::thread::{self, JoinHandle}; -use std::time::Duration; - mod about; mod alerts; mod analytics; mod banner; mod catalog; +mod cli; mod event; mod handlers; mod livetail; @@ -45,161 +37,37 @@ mod response; mod static_schema; mod stats; mod storage; +mod sync; mod utils; mod validator; -use option::CONFIG; +use std::sync::Arc; + +use handlers::http::modal::ParseableServer; +use option::{Mode, CONFIG}; -use crate::localcache::LocalCacheManager; +use crate::{ + handlers::http::modal::{ + ingest_server::IngestServer, query_server::QueryServer, server::Server, + }, + // localcache::LocalCacheManager, +}; pub const STORAGE_UPLOAD_INTERVAL: u32 = 60; #[actix_web::main] async fn main() -> anyhow::Result<()> { env_logger::init(); - let storage = CONFIG.storage().get_object_store(); - CONFIG.validate().await?; - migration::run_metadata_migration(&CONFIG).await?; - let metadata = storage::resolve_parseable_metadata().await?; - banner::print(&CONFIG, &metadata).await; - rbac::map::init(&metadata); - metadata.set_global(); - if let Some(cache_manager) = LocalCacheManager::global() { - cache_manager - .validate(CONFIG.parseable.local_cache_size) - .await?; - }; - let prometheus = metrics::build_metrics_handler(); - CONFIG.storage().register_store_metrics(&prometheus); - - migration::run_migration(&CONFIG).await?; - if let Err(e) = metadata::STREAM_INFO.load(&*storage).await { - log::warn!("could not populate local metadata. {:?}", e); - } + // these are empty ptrs so mem footprint should be minimal + let server: Arc = match CONFIG.parseable.mode { + Mode::Query => Arc::new(QueryServer), - // track all parquet files already in the data directory - storage::retention::load_retention_from_global(); - // load data from stats back to prometheus metrics - metrics::load_from_stats_from_storage().await; + Mode::Ingest => Arc::new(IngestServer), - let (localsync_handler, mut localsync_outbox, localsync_inbox) = run_local_sync(); - let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = - object_store_sync(); - - // all internal data structures populated now. - // start the analytics scheduler if enabled - if CONFIG.parseable.send_analytics { - analytics::init_analytics_scheduler(); - } - - tokio::spawn(handlers::livetail::server()); - - let app = handlers::http::run_http(prometheus, CONFIG.parseable.openid.clone()); - tokio::pin!(app); - loop { - tokio::select! { - e = &mut app => { - // actix server finished .. stop other threads and stop the server - remote_sync_inbox.send(()).unwrap_or(()); - localsync_inbox.send(()).unwrap_or(()); - localsync_handler.join().unwrap_or(()); - remote_sync_handler.join().unwrap_or(()); - return e - }, - _ = &mut localsync_outbox => { - // crash the server if localsync fails for any reason - // panic!("Local Sync thread died. Server will fail now!") - return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable")) - }, - _ = &mut remote_sync_outbox => { - // remote_sync failed, this is recoverable by just starting remote_sync thread again - remote_sync_handler.join().unwrap_or(()); - (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = object_store_sync(); - } - - }; - } -} - -fn object_store_sync() -> (JoinHandle<()>, oneshot::Receiver<()>, oneshot::Sender<()>) { - let (outbox_tx, outbox_rx) = oneshot::channel::<()>(); - let (inbox_tx, inbox_rx) = oneshot::channel::<()>(); - let mut inbox_rx = AssertUnwindSafe(inbox_rx); - let handle = thread::spawn(move || { - let res = catch_unwind(move || { - let rt = actix_web::rt::System::new(); - rt.block_on(async { - let mut scheduler = AsyncScheduler::new(); - scheduler - .every(STORAGE_UPLOAD_INTERVAL.seconds()) - // Extra time interval is added so that this schedular does not race with local sync. - .plus(5u32.seconds()) - .run(|| async { - if let Err(e) = CONFIG.storage().get_object_store().sync().await { - log::warn!("failed to sync local data with object store. {:?}", e); - } - }); - - loop { - tokio::time::sleep(Duration::from_secs(1)).await; - scheduler.run_pending().await; - match AssertUnwindSafe(|| inbox_rx.try_recv())() { - Ok(_) => break, - Err(TryRecvError::Empty) => continue, - Err(TryRecvError::Closed) => { - // should be unreachable but breaking anyways - break; - } - } - } - }) - }); - - if res.is_err() { - outbox_tx.send(()).unwrap(); - } - }); - - (handle, outbox_rx, inbox_tx) -} - -fn run_local_sync() -> (JoinHandle<()>, oneshot::Receiver<()>, oneshot::Sender<()>) { - let (outbox_tx, outbox_rx) = oneshot::channel::<()>(); - let (inbox_tx, inbox_rx) = oneshot::channel::<()>(); - let mut inbox_rx = AssertUnwindSafe(inbox_rx); - - let handle = ThreadBuilder::default() - .name("local-sync") - .priority(ThreadPriority::Max) - .spawn(move |priority_result| { - if priority_result.is_err() { - log::warn!("Max priority cannot be set for sync thread. Make sure that user/program is allowed to set thread priority.") - } - let res = catch_unwind(move || { - let mut scheduler = Scheduler::new(); - scheduler - .every((storage::LOCAL_SYNC_INTERVAL as u32).seconds()) - .run(move || crate::event::STREAM_WRITERS.unset_all()); - - loop { - thread::sleep(Duration::from_millis(50)); - scheduler.run_pending(); - match AssertUnwindSafe(|| inbox_rx.try_recv())() { - Ok(_) => break, - Err(TryRecvError::Empty) => continue, - Err(TryRecvError::Closed) => { - // should be unreachable but breaking anyways - break; - } - } - } - }); + Mode::All => Arc::new(Server), + }; - if res.is_err() { - outbox_tx.send(()).unwrap(); - } - }) - .unwrap(); + server.init().await?; - (handle, outbox_rx, inbox_tx) + Ok(()) } diff --git a/server/src/metadata.rs b/server/src/metadata.rs index e4c0c9b2..7bb38e62 100644 --- a/server/src/metadata.rs +++ b/server/src/metadata.rs @@ -209,7 +209,7 @@ impl StreamInfo { for stream in storage.list_streams().await? { let alerts = storage.get_alerts(&stream.name).await?; - let schema = storage.get_schema(&stream.name).await?; + let schema = storage.get_schema_on_server_start(&stream.name).await?; let meta = storage.get_stream_metadata(&stream.name).await?; let schema = update_schema_from_staging(&stream.name, schema); @@ -288,6 +288,8 @@ pub mod error { pub enum MetadataError { #[error("Metadata for stream {0} not found. Please create the stream and try again")] StreamMetaNotFound(String), + #[error("Metadata Error: {0}")] + StandaloneWithDistributed(String), } #[derive(Debug, thiserror::Error)] diff --git a/server/src/metrics/mod.rs b/server/src/metrics/mod.rs index 05e6baf8..3e337123 100644 --- a/server/src/metrics/mod.rs +++ b/server/src/metrics/mod.rs @@ -16,6 +16,7 @@ * */ +pub mod prom_utils; pub mod storage; use actix_web_prometheus::{PrometheusMetrics, PrometheusMetricsBuilder}; @@ -133,7 +134,7 @@ fn prom_process_metrics(metrics: &PrometheusMetrics) { #[cfg(not(target_os = "linux"))] fn prom_process_metrics(_metrics: &PrometheusMetrics) {} -pub async fn load_from_stats_from_storage() { +pub async fn fetch_stats_from_storage() { for stream_name in STREAM_INFO.list_streams() { let stats = CONFIG .storage() diff --git a/server/src/metrics/prom_utils.rs b/server/src/metrics/prom_utils.rs new file mode 100644 index 00000000..72442a96 --- /dev/null +++ b/server/src/metrics/prom_utils.rs @@ -0,0 +1,87 @@ +use prometheus_parse::Sample as PromSample; +use prometheus_parse::Value as PromValue; +use serde::Serialize; +use serde_json::Error as JsonError; +use serde_json::Value as JsonValue; + +use crate::handlers::http::modal::server::Server; + +#[derive(Debug, Serialize, Clone)] +pub struct Metrics { + address: String, + parseable_events_ingested: f64, // all streams + parseable_staging_files: f64, + process_resident_memory_bytes: f64, + parseable_storage_size: StorageMetrics, +} + +#[derive(Debug, Serialize, Default, Clone)] +struct StorageMetrics { + staging: f64, + data: f64, +} + +impl Default for Metrics { + fn default() -> Self { + let socket = Server::get_server_address(); + let address = format!("http://{}:{}", socket.ip(), socket.port()); + Metrics { + address, + parseable_events_ingested: 0.0, + parseable_staging_files: 0.0, + process_resident_memory_bytes: 0.0, + parseable_storage_size: StorageMetrics::default(), + } + } +} + +impl Metrics { + fn new(address: String) -> Self { + Metrics { + address, + parseable_events_ingested: 0.0, + parseable_staging_files: 0.0, + process_resident_memory_bytes: 0.0, + parseable_storage_size: StorageMetrics::default(), + } + } +} + +impl Metrics { + pub fn from_prometheus_samples(samples: Vec, address: String) -> Self { + let mut prom_dress = Metrics::new(address); + + for sample in samples { + if &sample.metric == "parseable_events_ingested" { + if let PromValue::Counter(val) = sample.value { + prom_dress.parseable_events_ingested += val; + } + } else if sample.metric == "parseable_staging_files" { + if let PromValue::Gauge(val) = sample.value { + prom_dress.parseable_staging_files += val; + } + } else if sample.metric == "process_resident_memory_bytes" { + if let PromValue::Gauge(val) = sample.value { + prom_dress.process_resident_memory_bytes += val; + } + } else if sample.metric == "parseable_storage_size" { + if sample.labels.get("type").unwrap() == "data" { + if let PromValue::Gauge(val) = sample.value { + prom_dress.parseable_storage_size.data += val; + } + } else if sample.labels.get("type").unwrap() == "staging" { + if let PromValue::Gauge(val) = sample.value { + prom_dress.parseable_storage_size.staging += val; + } + } + } + } + + prom_dress + } + + #[allow(unused)] + pub fn to_json(&self) -> Result { + serde_json::to_value(self) + } +} diff --git a/server/src/migration.rs b/server/src/migration.rs index 5484e84c..1207bc30 100644 --- a/server/src/migration.rs +++ b/server/src/migration.rs @@ -21,15 +21,20 @@ mod metadata_migration; mod schema_migration; mod stream_metadata_migration; -use std::fs::OpenOptions; +use std::{fs::OpenOptions, sync::Arc}; use bytes::Bytes; +use itertools::Itertools; use relative_path::RelativePathBuf; use serde::Serialize; use crate::{ option::Config, - storage::{ObjectStorage, ObjectStorageError}, + storage::{ + object_storage::{parseable_json_path, stream_json_path}, + ObjectStorage, ObjectStorageError, PARSEABLE_METADATA_FILE_NAME, PARSEABLE_ROOT_DIRECTORY, + SCHEMA_FILE_NAME, STREAM_ROOT_DIRECTORY, + }, }; /// Migrate the metdata from v1 or v2 to v3 @@ -46,6 +51,7 @@ pub async fn run_metadata_migration(config: &Config) -> anyhow::Result<()> { .and_then(|version| version.as_str()) } + // if storage metadata is none do nothing if let Some(storage_metadata) = storage_metadata { match get_version(&storage_metadata) { Some("v1") => { @@ -56,10 +62,15 @@ pub async fn run_metadata_migration(config: &Config) -> anyhow::Result<()> { let metadata = metadata_migration::v2_v3(storage_metadata); put_remote_metadata(&*object_store, &metadata).await?; } + Some("v3") => { + let mdata = metadata_migration::update_v3(storage_metadata); + put_remote_metadata(&*object_store, &mdata).await?; + } _ => (), } } + // if staging metadata is none do nothing if let Some(staging_metadata) = staging_metadata { match get_version(&staging_metadata) { Some("v1") => { @@ -70,6 +81,10 @@ pub async fn run_metadata_migration(config: &Config) -> anyhow::Result<()> { let metadata = metadata_migration::v2_v3(staging_metadata); put_staging_metadata(config, &metadata)?; } + Some("v3") => { + let mdata = metadata_migration::update_v3(staging_metadata); + put_staging_metadata(config, &mdata)?; + } _ => (), } } @@ -77,6 +92,7 @@ pub async fn run_metadata_migration(config: &Config) -> anyhow::Result<()> { Ok(()) } +/// run the migration for all streams pub async fn run_migration(config: &Config) -> anyhow::Result<()> { let storage = config.storage().get_object_store(); let streams = storage.list_streams().await?; @@ -89,7 +105,8 @@ pub async fn run_migration(config: &Config) -> anyhow::Result<()> { } async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow::Result<()> { - let path = RelativePathBuf::from_iter([stream, ".stream.json"]); + let path = stream_json_path(stream); + let stream_metadata = storage.get_object(&path).await?; let stream_metadata: serde_json::Value = serde_json::from_slice(&stream_metadata).expect("stream.json is valid json"); @@ -106,7 +123,8 @@ async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow:: .put_object(&path, to_bytes(&new_stream_metadata)) .await?; - let schema_path = RelativePathBuf::from_iter([stream, ".schema"]); + let schema_path = + RelativePathBuf::from_iter([stream, STREAM_ROOT_DIRECTORY, SCHEMA_FILE_NAME]); let schema = storage.get_object(&schema_path).await?; let schema = serde_json::from_slice(&schema).ok(); let map = schema_migration::v1_v3(schema)?; @@ -118,7 +136,8 @@ async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow:: .put_object(&path, to_bytes(&new_stream_metadata)) .await?; - let schema_path = RelativePathBuf::from_iter([stream, ".schema"]); + let schema_path = + RelativePathBuf::from_iter([stream, STREAM_ROOT_DIRECTORY, SCHEMA_FILE_NAME]); let schema = storage.get_object(&schema_path).await?; let schema = serde_json::from_slice(&schema)?; let map = schema_migration::v2_v3(schema)?; @@ -138,7 +157,8 @@ fn to_bytes(any: &(impl ?Sized + Serialize)) -> Bytes { } pub fn get_staging_metadata(config: &Config) -> anyhow::Result> { - let path = config.staging_dir().join(".parseable.json"); + let path = parseable_json_path().to_path(config.staging_dir()); + let bytes = match std::fs::read(path) { Ok(bytes) => bytes, Err(err) => match err.kind() { @@ -147,13 +167,14 @@ pub fn get_staging_metadata(config: &Config) -> anyhow::Result anyhow::Result> { - let path = RelativePathBuf::from_iter([".parseable.json"]); + let path = parseable_json_path(); match storage.get_object(&path).await { Ok(bytes) => Ok(Some( serde_json::from_slice(&bytes).expect("parseable config is valid json"), @@ -172,13 +193,13 @@ pub async fn put_remote_metadata( storage: &dyn ObjectStorage, metadata: &serde_json::Value, ) -> anyhow::Result<()> { - let path = RelativePathBuf::from_iter([".parseable.json"]); + let path = parseable_json_path(); let metadata = serde_json::to_vec(metadata)?.into(); Ok(storage.put_object(&path, metadata).await?) } pub fn put_staging_metadata(config: &Config, metadata: &serde_json::Value) -> anyhow::Result<()> { - let path = config.staging_dir().join(".parseable.json"); + let path = parseable_json_path().to_path(config.staging_dir()); let mut file = OpenOptions::new() .create(true) .truncate(true) @@ -187,3 +208,92 @@ pub fn put_staging_metadata(config: &Config, metadata: &serde_json::Value) -> an serde_json::to_writer(&mut file, metadata)?; Ok(()) } + +pub async fn run_file_migration(config: &Config) -> anyhow::Result<()> { + let object_store = config.storage().get_object_store(); + + let old_meta_file_path = RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME); + + // if this errors that means migrations is already done + if let Err(err) = object_store.get_object(&old_meta_file_path).await { + if matches!(err, ObjectStorageError::NoSuchKey(_)) { + return Ok(()); + } + return Err(err.into()); + } + + run_meta_file_migration(&object_store, old_meta_file_path).await?; + run_stream_files_migration(object_store).await?; + + Ok(()) +} + +async fn run_meta_file_migration( + object_store: &Arc, + old_meta_file_path: RelativePathBuf, +) -> anyhow::Result<()> { + log::info!("Migrating metadata files to new location"); + + // get the list of all meta files + let mut meta_files = object_store.get_ingester_meta_file_paths().await?; + meta_files.push(old_meta_file_path); + + for file in meta_files { + match object_store.get_object(&file).await { + Ok(bytes) => { + // we can unwrap here because we know the file exists + let new_path = RelativePathBuf::from_iter([ + PARSEABLE_ROOT_DIRECTORY, + file.file_name().unwrap(), + ]); + object_store.put_object(&new_path, bytes).await?; + object_store.delete_object(&file).await?; + } + Err(err) => { + // if error is not a no such key error, something weird happened + // so return the error + if !matches!(err, ObjectStorageError::NoSuchKey(_)) { + return Err(err.into()); + } + } + } + } + + Ok(()) +} + +async fn run_stream_files_migration( + object_store: Arc, +) -> anyhow::Result<()> { + let streams = object_store + .list_old_streams() + .await? + .into_iter() + .map(|stream| stream.name) + .collect_vec(); + + for stream in streams { + let paths = object_store.get_stream_file_paths(&stream).await?; + + for path in paths { + match object_store.get_object(&path).await { + Ok(bytes) => { + let new_path = RelativePathBuf::from_iter([ + stream.as_str(), + STREAM_ROOT_DIRECTORY, + path.file_name().unwrap(), + ]); + object_store.put_object(&new_path, bytes).await?; + object_store.delete_object(&path).await?; + } + Err(err) => { + if !matches!(err, ObjectStorageError::NoSuchKey(_)) { + return Err(err.into()); + } + } + } + } + } + + Ok(()) +} diff --git a/server/src/migration/metadata_migration.rs b/server/src/migration/metadata_migration.rs index 36507a28..cbeee200 100644 --- a/server/src/migration/metadata_migration.rs +++ b/server/src/migration/metadata_migration.rs @@ -17,22 +17,54 @@ */ use rand::distributions::DistString; -use serde_json::{Map, Value}; +use serde_json::{Map, Value as JsonValue}; -pub fn v1_v3(mut storage_metadata: serde_json::Value) -> Value { +use crate::option::CONFIG; + +/* +v1 +{ + "version": "v1", + "mode": "drive" + "user": string, + "staging": "string", + "storage": "string", + "deployment_id": "string" + "stream": string, + "default_role": null +} +*/ +pub fn v1_v3(mut storage_metadata: JsonValue) -> JsonValue { let metadata = storage_metadata.as_object_mut().unwrap(); - *metadata.get_mut("version").unwrap() = Value::String("v3".to_string()); + *metadata.get_mut("version").unwrap() = JsonValue::String("v3".to_string()); metadata.remove("user"); metadata.remove("stream"); - metadata.insert("users".to_string(), Value::Array(vec![])); - metadata.insert("streams".to_string(), Value::Array(vec![])); - metadata.insert("roles".to_string(), Value::Array(vec![])); + metadata.insert("users".to_string(), JsonValue::Array(vec![])); + metadata.insert("streams".to_string(), JsonValue::Array(vec![])); + metadata.insert("roles".to_string(), JsonValue::Array(vec![])); + metadata.insert( + "server_mode".to_string(), + JsonValue::String(CONFIG.parseable.mode.to_string()), + ); storage_metadata } -pub fn v2_v3(mut storage_metadata: serde_json::Value) -> Value { +/* +v2 +{ + "version": "v2", + "users": [ + { + "role": ["privilege1", "privilege2", ...] + }, + ... + ] + ... +} +*/ +pub fn v2_v3(mut storage_metadata: JsonValue) -> JsonValue { let metadata = storage_metadata.as_object_mut().unwrap(); - *metadata.get_mut("version").unwrap() = Value::String("v3".to_string()); + *metadata.get_mut("version").unwrap() = JsonValue::String("v3".to_string()); let users = metadata .get_mut("users") .expect("users field is present") @@ -46,7 +78,7 @@ pub fn v2_v3(mut storage_metadata: serde_json::Value) -> Value { // user is an object let user = user.as_object_mut().unwrap(); // take out privileges - let Value::Array(privileges) = user.remove("role").expect("role exists for v2") else { + let JsonValue::Array(privileges) = user.remove("role").expect("role exists for v2") else { panic!("privileges is an arrray") }; @@ -55,15 +87,34 @@ pub fn v2_v3(mut storage_metadata: serde_json::Value) -> Value { if !privileges.is_empty() { let role_name = rand::distributions::Alphanumeric.sample_string(&mut rand::thread_rng(), 8); - privileges_map.push((role_name.clone(), Value::Array(privileges))); - roles.push(Value::from(role_name)); + privileges_map.push((role_name.clone(), JsonValue::Array(privileges))); + roles.push(JsonValue::from(role_name)); } user.insert("roles".to_string(), roles.into()); } metadata.insert( "roles".to_string(), - Value::Object(Map::from_iter(privileges_map)), + JsonValue::Object(Map::from_iter(privileges_map)), + ); + metadata.insert( + "server_mode".to_string(), + JsonValue::String(CONFIG.parseable.mode.to_string()), ); storage_metadata } + +// maybe rename +pub fn update_v3(mut storage_metadata: JsonValue) -> JsonValue { + let metadata = storage_metadata.as_object_mut().unwrap(); + let sm = metadata.get("server_mode"); + + if sm.is_none() || sm.unwrap().as_str().unwrap() == "All" { + metadata.insert( + "server_mode".to_string(), + JsonValue::String(CONFIG.parseable.mode.to_string()), + ); + } + + storage_metadata +} diff --git a/server/src/option.rs b/server/src/option.rs index 5d713f28..43bed851 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -17,18 +17,17 @@ */ use clap::error::ErrorKind; -use clap::{command, value_parser, Arg, ArgGroup, Args, Command, FromArgMatches}; +use clap::{command, Args, Command, FromArgMatches}; use once_cell::sync::Lazy; use parquet::basic::{BrotliLevel, GzipLevel, ZstdLevel}; use std::env; use std::path::PathBuf; use std::sync::Arc; -use url::Url; -use crate::oidc::{self, OpenidConfig}; +use crate::cli::Cli; +use crate::storage::object_storage::parseable_json_path; use crate::storage::{FSConfig, ObjectStorageError, ObjectStorageProvider, S3Config}; - pub const MIN_CACHE_SIZE_BYTES: u64 = 1000u64.pow(3); // 1 GiB pub const JOIN_COMMUNITY: &str = "Join us on Parseable Slack community for questions : https://logg.ing/community"; @@ -36,18 +35,18 @@ pub static CONFIG: Lazy> = Lazy::new(|| Arc::new(Config::new())); #[derive(Debug)] pub struct Config { - pub parseable: Server, + pub parseable: Cli, storage: Arc, pub storage_name: &'static str, } impl Config { fn new() -> Self { - let cli = parseable_cli_command().get_matches(); + let cli = create_parseable_cli_command().get_matches(); match cli.subcommand() { Some(("local-store", m)) => { - let server = match Server::from_arg_matches(m) { - Ok(server) => server, + let cli = match Cli::from_arg_matches(m) { + Ok(cli) => cli, Err(err) => err.exit(), }; let storage = match FSConfig::from_arg_matches(m) { @@ -55,8 +54,8 @@ impl Config { Err(err) => err.exit(), }; - if server.local_staging_path == storage.root { - parseable_cli_command() + if cli.local_staging_path == storage.root { + create_parseable_cli_command() .error( ErrorKind::ValueValidation, "Cannot use same path for storage and staging", @@ -64,8 +63,8 @@ impl Config { .exit() } - if server.local_cache_path.is_some() { - parseable_cli_command() + if cli.local_cache_path.is_some() { + create_parseable_cli_command() .error( ErrorKind::ValueValidation, "Cannot use cache with local-store subcommand.", @@ -74,14 +73,14 @@ impl Config { } Config { - parseable: server, + parseable: cli, storage: Arc::new(storage), storage_name: "drive", } } Some(("s3-store", m)) => { - let server = match Server::from_arg_matches(m) { - Ok(server) => server, + let cli = match Cli::from_arg_matches(m) { + Ok(cli) => cli, Err(err) => err.exit(), }; let storage = match S3Config::from_arg_matches(m) { @@ -90,7 +89,7 @@ impl Config { }; Config { - parseable: server, + parseable: cli, storage: Arc::new(storage), storage_name: "s3", } @@ -99,9 +98,11 @@ impl Config { } } - pub async fn validate(&self) -> Result<(), ObjectStorageError> { + // validate the storage, if the proper path for staging directory is provided + // if the proper data directory is provided, or s3 bucket is provided etc + pub async fn validate_storage(&self) -> Result<(), ObjectStorageError> { let obj_store = self.storage.get_object_store(); - let rel_path = relative_path::RelativePathBuf::from(".parseable.json"); + let rel_path = parseable_json_path(); let has_parseable_json = obj_store.get_object(&rel_path).await.is_ok(); @@ -118,7 +119,7 @@ impl Config { return Ok(()); } - if self.mode_string() == "Local drive" { + if self.get_storage_mode_string() == "Local drive" { return Err(ObjectStorageError::Custom(format!("Could not start the server because directory '{}' contains stale data, please use an empty directory, and restart the server.\n{}", self.storage.get_endpoint(), JOIN_COMMUNITY))); } @@ -143,34 +144,33 @@ impl Config { } pub fn is_default_creds(&self) -> bool { - self.parseable.username == Server::DEFAULT_USERNAME - && self.parseable.password == Server::DEFAULT_PASSWORD + self.parseable.username == Cli::DEFAULT_USERNAME + && self.parseable.password == Cli::DEFAULT_PASSWORD } // returns the string representation of the storage mode // drive --> Local drive // s3 --> S3 bucket - pub fn mode_string(&self) -> &str { - let mut mode = "S3 bucket"; + pub fn get_storage_mode_string(&self) -> &str { if self.storage_name == "drive" { - mode = "Local drive"; + return "Local drive"; } - mode + "S3 bucket" } } -fn parseable_cli_command() -> Command { - let local = Server::get_clap_command("local-store"); +fn create_parseable_cli_command() -> Command { + let local = Cli::create_cli_command_with_clap("local-store"); let local = ::augment_args_for_update(local); let local = local - .mut_arg(Server::USERNAME, |arg| { - arg.required(false).default_value(Server::DEFAULT_USERNAME) + .mut_arg(Cli::USERNAME, |arg| { + arg.required(false).default_value(Cli::DEFAULT_USERNAME) }) - .mut_arg(Server::PASSWORD, |arg| { - arg.required(false).default_value(Server::DEFAULT_PASSWORD) + .mut_arg(Cli::PASSWORD, |arg| { + arg.required(false).default_value(Cli::DEFAULT_PASSWORD) }); - let s3 = Server::get_clap_command("s3-store"); + let s3 = Cli::create_cli_command_with_clap("s3-store"); let s3 = ::augment_args_for_update(s3); command!() @@ -190,434 +190,37 @@ fn parseable_cli_command() -> Command { .subcommands([local, s3]) } -#[derive(Debug, Default)] -pub struct Server { - /// The location of TLS Cert file - pub tls_cert_path: Option, - - /// The location of TLS Private Key file - pub tls_key_path: Option, - - /// The address on which the http server will listen. - pub address: String, - - /// Base domain under which server is hosted. - /// This information is used by OIDC to refer redirects - pub domain_address: Option, - - /// The local staging path is used as a temporary landing point - /// for incoming events and local cache - pub local_staging_path: PathBuf, - - /// The local cache path is used for speeding up query on latest data - pub local_cache_path: Option, - - /// Size for local cache - pub local_cache_size: u64, - - /// Username for the basic authentication on the server - pub username: String, - - /// Password for the basic authentication on the server - pub password: String, - - /// OpenId configuration - pub openid: Option, - - /// Server should check for update or not - pub check_update: bool, - - /// Server should send anonymous analytics or not - pub send_analytics: bool, - - /// Open AI access key - pub open_ai_key: Option, - - /// Livetail port - pub grpc_port: u16, - - /// Livetail channel capacity - pub livetail_channel_capacity: usize, - - /// Rows in Parquet Rowgroup - pub row_group_size: usize, - - /// Query memory limit in bytes - pub query_memory_pool_size: Option, - - /// Parquet compression algorithm - pub parquet_compression: Compression, - - /// Mode of operation - pub mode: Mode, -} - -impl FromArgMatches for Server { - fn from_arg_matches(m: &clap::ArgMatches) -> Result { - let mut s: Self = Self::default(); - s.update_from_arg_matches(m)?; - Ok(s) - } - - fn update_from_arg_matches(&mut self, m: &clap::ArgMatches) -> Result<(), clap::Error> { - self.local_cache_path = m.get_one::(Self::CACHE).cloned(); - self.tls_cert_path = m.get_one::(Self::TLS_CERT).cloned(); - self.tls_key_path = m.get_one::(Self::TLS_KEY).cloned(); - self.domain_address = m.get_one::(Self::DOMAIN_URI).cloned(); - let openid_client_id = m.get_one::(Self::OPENID_CLIENT_ID).cloned(); - let openid_client_secret = m.get_one::(Self::OPENID_CLIENT_SECRET).cloned(); - let openid_issuer = m.get_one::(Self::OPENID_ISSUER).cloned(); - - self.address = m - .get_one::(Self::ADDRESS) - .cloned() - .expect("default value for address"); - self.local_staging_path = m - .get_one::(Self::STAGING) - .cloned() - .expect("default value for staging"); - self.local_cache_size = m - .get_one::(Self::CACHE_SIZE) - .cloned() - .expect("default value for cache size"); - self.username = m - .get_one::(Self::USERNAME) - .cloned() - .expect("default for username"); - self.password = m - .get_one::(Self::PASSWORD) - .cloned() - .expect("default for password"); - self.check_update = m - .get_one::(Self::CHECK_UPDATE) - .cloned() - .expect("default for check update"); - self.send_analytics = m - .get_one::(Self::SEND_ANALYTICS) - .cloned() - .expect("default for send analytics"); - self.open_ai_key = m.get_one::(Self::OPEN_AI_KEY).cloned(); - self.grpc_port = m - .get_one::(Self::GRPC_PORT) - .cloned() - .expect("default for livetail port"); - self.livetail_channel_capacity = m - .get_one::(Self::LIVETAIL_CAPACITY) - .cloned() - .expect("default for livetail capacity"); - // converts Gib to bytes before assigning - self.query_memory_pool_size = m - .get_one::(Self::QUERY_MEM_POOL_SIZE) - .cloned() - .map(|gib| gib as usize * 1024usize.pow(3)); - self.row_group_size = m - .get_one::(Self::ROW_GROUP_SIZE) - .cloned() - .expect("default for row_group size"); - self.parquet_compression = match m - .get_one::(Self::PARQUET_COMPRESSION_ALGO) - .expect("default for compression algo") - .as_str() - { - "uncompressed" => Compression::UNCOMPRESSED, - "snappy" => Compression::SNAPPY, - "gzip" => Compression::GZIP, - "lzo" => Compression::LZO, - "brotli" => Compression::BROTLI, - "lz4" => Compression::LZ4, - "zstd" => Compression::ZSTD, - _ => unreachable!(), - }; - - self.openid = match (openid_client_id, openid_client_secret, openid_issuer) { - (Some(id), Some(secret), Some(issuer)) => { - let origin = if let Some(url) = self.domain_address.clone() { - oidc::Origin::Production(url) - } else { - oidc::Origin::Local { - socket_addr: self.address.clone(), - https: self.tls_cert_path.is_some() && self.tls_key_path.is_some(), - } - }; - Some(OpenidConfig { - id, - secret, - issuer, - origin, - }) - } - _ => None, - }; - - self.mode = match m - .get_one::(Self::MODE) - .expect("Mode not set") - .as_str() - { - "query" => Mode::Query, - "ingest" => Mode::Ingest, - "all" => Mode::All, - _ => unreachable!(), - }; - - Ok(()) - } +#[derive(Debug, Default, Eq, PartialEq)] +pub enum Mode { + Query, + Ingest, + #[default] + All, } -impl Server { - // identifiers for arguments - pub const TLS_CERT: &'static str = "tls-cert-path"; - pub const TLS_KEY: &'static str = "tls-key-path"; - pub const ADDRESS: &'static str = "address"; - pub const DOMAIN_URI: &'static str = "origin"; - pub const STAGING: &'static str = "local-staging-path"; - pub const CACHE: &'static str = "cache-path"; - pub const CACHE_SIZE: &'static str = "cache-size"; - pub const USERNAME: &'static str = "username"; - pub const PASSWORD: &'static str = "password"; - pub const CHECK_UPDATE: &'static str = "check-update"; - pub const SEND_ANALYTICS: &'static str = "send-analytics"; - pub const OPEN_AI_KEY: &'static str = "open-ai-key"; - pub const OPENID_CLIENT_ID: &'static str = "oidc-client"; - pub const OPENID_CLIENT_SECRET: &'static str = "oidc-client-secret"; - pub const OPENID_ISSUER: &'static str = "oidc-issuer"; - pub const GRPC_PORT: &'static str = "grpc-port"; - pub const LIVETAIL_CAPACITY: &'static str = "livetail-capacity"; - // todo : what should this flag be - pub const QUERY_MEM_POOL_SIZE: &'static str = "query-mempool-size"; - pub const ROW_GROUP_SIZE: &'static str = "row-group-size"; - pub const PARQUET_COMPRESSION_ALGO: &'static str = "compression-algo"; - pub const MODE: &'static str = "mode"; - pub const DEFAULT_USERNAME: &'static str = "admin"; - pub const DEFAULT_PASSWORD: &'static str = "admin"; - - pub fn local_stream_data_path(&self, stream_name: &str) -> PathBuf { - self.local_staging_path.join(stream_name) - } - - pub fn get_scheme(&self) -> String { - if self.tls_cert_path.is_some() && self.tls_key_path.is_some() { - return "https".to_string(); +impl Mode { + pub fn to_str(&self) -> &str { + match self { + Mode::Query => "Query", + Mode::Ingest => "Ingest", + Mode::All => "All", } - "http".to_string() } - pub fn get_clap_command(name: &'static str) -> Command { - Command::new(name).next_line_help(false) - .arg( - Arg::new(Self::TLS_CERT) - .long(Self::TLS_CERT) - .env("P_TLS_CERT_PATH") - .value_name("PATH") - .value_parser(validation::file_path) - .help("Local path on this device where certificate file is located. Required to enable TLS"), - ) - .arg( - Arg::new(Self::TLS_KEY) - .long(Self::TLS_KEY) - .env("P_TLS_KEY_PATH") - .value_name("PATH") - .value_parser(validation::file_path) - .help("Local path on this device where private key file is located. Required to enable TLS"), - ) - .arg( - Arg::new(Self::ADDRESS) - .long(Self::ADDRESS) - .env("P_ADDR") - .value_name("ADDR:PORT") - .default_value("0.0.0.0:8000") - .value_parser(validation::socket_addr) - .help("Address and port for Parseable HTTP(s) server"), - ) - .arg( - Arg::new(Self::STAGING) - .long(Self::STAGING) - .env("P_STAGING_DIR") - .value_name("DIR") - .default_value("./staging") - .value_parser(validation::canonicalize_path) - .help("Local path on this device to be used as landing point for incoming events") - .next_line_help(true), - ) - .arg( - Arg::new(Self::CACHE) - .long(Self::CACHE) - .env("P_CACHE_DIR") - .value_name("DIR") - .value_parser(validation::canonicalize_path) - .help("Local path on this device to be used for caching data") - .next_line_help(true), - ) - .arg( - Arg::new(Self::CACHE_SIZE) - .long(Self::CACHE_SIZE) - .env("P_CACHE_SIZE") - .value_name("size") - .default_value("1GiB") - .value_parser(validation::cache_size) - .help("Maximum allowed cache size for all streams combined (In human readable format, e.g 1GiB, 2GiB, 100MB)") - .next_line_help(true), - ) - .arg( - Arg::new(Self::USERNAME) - .long(Self::USERNAME) - .env("P_USERNAME") - .value_name("STRING") - .required(true) - .help("Admin username to be set for this Parseable server"), - ) - .arg( - Arg::new(Self::PASSWORD) - .long(Self::PASSWORD) - .env("P_PASSWORD") - .value_name("STRING") - .required(true) - .help("Admin password to be set for this Parseable server"), - ) - .arg( - Arg::new(Self::CHECK_UPDATE) - .long(Self::CHECK_UPDATE) - .env("P_CHECK_UPDATE") - .value_name("BOOL") - .required(false) - .default_value("true") - .value_parser(value_parser!(bool)) - .help("Enable/Disable checking for new Parseable release"), - ) - .arg( - Arg::new(Self::SEND_ANALYTICS) - .long(Self::SEND_ANALYTICS) - .env("P_SEND_ANONYMOUS_USAGE_DATA") - .value_name("BOOL") - .required(false) - .default_value("true") - .value_parser(value_parser!(bool)) - .help("Enable/Disable anonymous telemetry data collection"), - ) - .arg( - Arg::new(Self::OPEN_AI_KEY) - .long(Self::OPEN_AI_KEY) - .env("P_OPENAI_API_KEY") - .value_name("STRING") - .required(false) - .help("OpenAI key to enable llm features"), - ) - .arg( - Arg::new(Self::OPENID_CLIENT_ID) - .long(Self::OPENID_CLIENT_ID) - .env("P_OIDC_CLIENT_ID") - .value_name("STRING") - .required(false) - .help("Client id for OIDC provider"), - ) - .arg( - Arg::new(Self::OPENID_CLIENT_SECRET) - .long(Self::OPENID_CLIENT_SECRET) - .env("P_OIDC_CLIENT_SECRET") - .value_name("STRING") - .required(false) - .help("Client secret for OIDC provider"), - ) - .arg( - Arg::new(Self::OPENID_ISSUER) - .long(Self::OPENID_ISSUER) - .env("P_OIDC_ISSUER") - .value_name("URl") - .required(false) - .value_parser(validation::url) - .help("OIDC provider's host address"), - ) - .arg( - Arg::new(Self::DOMAIN_URI) - .long(Self::DOMAIN_URI) - .env("P_ORIGIN_URI") - .value_name("URL") - .required(false) - .value_parser(validation::url) - .help("Parseable server global domain address"), - ) - .arg( - Arg::new(Self::GRPC_PORT) - .long(Self::GRPC_PORT) - .env("P_GRPC_PORT") - .value_name("PORT") - .default_value("8001") - .required(false) - .value_parser(value_parser!(u16)) - .help("Port for gRPC server"), - ) - .arg( - Arg::new(Self::LIVETAIL_CAPACITY) - .long(Self::LIVETAIL_CAPACITY) - .env("P_LIVETAIL_CAPACITY") - .value_name("NUMBER") - .default_value("1000") - .required(false) - .value_parser(value_parser!(usize)) - .help("Number of rows in livetail channel"), - ) - .arg( - Arg::new(Self::QUERY_MEM_POOL_SIZE) - .long(Self::QUERY_MEM_POOL_SIZE) - .env("P_QUERY_MEMORY_LIMIT") - .value_name("Gib") - .required(false) - .value_parser(value_parser!(u8)) - .help("Set a fixed memory limit for query"), - ) - .arg( - Arg::new(Self::ROW_GROUP_SIZE) - .long(Self::ROW_GROUP_SIZE) - .env("P_PARQUET_ROW_GROUP_SIZE") - .value_name("NUMBER") - .required(false) - .default_value("16384") - .value_parser(value_parser!(usize)) - .help("Number of rows in a row group"), - ).arg( - Arg::new(Self::MODE) - .long(Self::MODE) - .env("P_MODE") - .value_name("STRING") - .required(false) - .default_value("all") - .value_parser([ - "query", - "ingest", - "all"]) - .help("Mode of operation"), - ) - .arg( - Arg::new(Self::PARQUET_COMPRESSION_ALGO) - .long(Self::PARQUET_COMPRESSION_ALGO) - .env("P_PARQUET_COMPRESSION_ALGO") - .value_name("[UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD]") - .required(false) - .default_value("lz4") - .value_parser([ - "uncompressed", - "snappy", - "gzip", - "lzo", - "brotli", - "lz4", - "zstd"]) - .help("Parquet compression algorithm"), - ).group( - ArgGroup::new("oidc") - .args([Self::OPENID_CLIENT_ID, Self::OPENID_CLIENT_SECRET, Self::OPENID_ISSUER]) - .requires_all([Self::OPENID_CLIENT_ID, Self::OPENID_CLIENT_SECRET, Self::OPENID_ISSUER]) - .multiple(true) - ) + pub fn from_string(mode: &str) -> Result { + match mode { + "Query" => Ok(Mode::Query), + "Ingest" => Ok(Mode::Ingest), + "All" => Ok(Mode::All), + x => Err(format!("Invalid mode: {}", x)), + } } } -#[derive(Debug, Default, Eq, PartialEq)] -pub enum Mode { - Query, - Ingest, - #[default] - All, +impl ToString for Mode { + fn to_string(&self) -> String { + self.to_str().to_string() + } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] diff --git a/server/src/query.rs b/server/src/query.rs index ef168c72..c16c18f2 100644 --- a/server/src/query.rs +++ b/server/src/query.rs @@ -33,6 +33,7 @@ use datafusion::logical_expr::{Explain, Filter, LogicalPlan, PlanType, ToStringi use datafusion::prelude::*; use itertools::Itertools; use once_cell::sync::Lazy; +use serde_json::{json, Value}; use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -121,6 +122,10 @@ impl Query { .cloned() .collect_vec(); + if fields.is_empty() { + return Ok((vec![], fields)); + } + let results = df.collect().await?; Ok((results, fields)) } @@ -170,12 +175,12 @@ impl Query { } #[derive(Debug, Default)] -struct TableScanVisitor { +pub(crate) struct TableScanVisitor { tables: Vec, } impl TableScanVisitor { - fn into_inner(self) -> Vec { + pub fn into_inner(self) -> Vec { self.tables } } @@ -331,6 +336,50 @@ fn time_from_path(path: &Path) -> DateTime { .unwrap() } +pub fn flatten_objects_for_count(objects: Vec) -> Vec { + if objects.is_empty() { + return objects; + } + + // check if all the keys start with "COUNT" + let flag = objects.iter().all(|obj| { + obj.as_object() + .unwrap() + .keys() + .all(|key| key.starts_with("COUNT")) + }) && objects.iter().all(|obj| { + obj.as_object() + .unwrap() + .keys() + .all(|key| key == objects[0].as_object().unwrap().keys().next().unwrap()) + }); + + if flag { + let mut accum = 0u64; + let key = objects[0] + .as_object() + .unwrap() + .keys() + .next() + .unwrap() + .clone(); + + for obj in objects { + let count = obj.as_object().unwrap().keys().fold(0, |acc, key| { + let value = obj.as_object().unwrap().get(key).unwrap().as_u64().unwrap(); + acc + value + }); + accum += count; + } + + vec![json!({ + key: accum + })] + } else { + objects + } +} + pub mod error { use crate::storage::ObjectStorageError; use datafusion::error::DataFusionError; @@ -346,6 +395,10 @@ pub mod error { #[cfg(test)] mod tests { + use serde_json::json; + + use crate::query::flatten_objects_for_count; + use super::time_from_path; use std::path::PathBuf; @@ -355,4 +408,82 @@ mod tests { let time = time_from_path(path.as_path()); assert_eq!(time.timestamp(), 1640995200); } + + #[test] + fn test_flat_simple() { + let val = vec![ + json!({ + "COUNT(*)": 1 + }), + json!({ + "COUNT(*)": 2 + }), + json!({ + "COUNT(*)": 3 + }), + ]; + + let out = flatten_objects_for_count(val); + assert_eq!(out, vec![json!({"COUNT(*)": 6})]); + } + + #[test] + fn test_flat_empty() { + let val = vec![]; + let out = flatten_objects_for_count(val.clone()); + assert_eq!(val, out); + } + + #[test] + fn test_flat_same_multi() { + let val = vec![json!({"COUNT(ALPHA)": 1}), json!({"COUNT(ALPHA)": 2})]; + let out = flatten_objects_for_count(val.clone()); + assert_eq!(vec![json!({"COUNT(ALPHA)": 3})], out); + } + + #[test] + fn test_flat_diff_multi() { + let val = vec![json!({"COUNT(ALPHA)": 1}), json!({"COUNT(BETA)": 2})]; + let out = flatten_objects_for_count(val.clone()); + assert_eq!(out, val); + } + + #[test] + fn test_flat_fail() { + let val = vec![ + json!({ + "Num": 1 + }), + json!({ + "Num": 2 + }), + json!({ + "Num": 3 + }), + ]; + + let out = flatten_objects_for_count(val.clone()); + assert_eq!(val, out); + } + + #[test] + fn test_flat_multi_key() { + let val = vec![ + json!({ + "Num": 1, + "COUNT(*)": 1 + }), + json!({ + "Num": 2, + "COUNT(*)": 2 + }), + json!({ + "Num": 3, + "COUNT(*)": 3 + }), + ]; + + let out = flatten_objects_for_count(val.clone()); + assert_eq!(val, out); + } } diff --git a/server/src/query/filter_optimizer.rs b/server/src/query/filter_optimizer.rs index fc087a2e..d78d0fce 100644 --- a/server/src/query/filter_optimizer.rs +++ b/server/src/query/filter_optimizer.rs @@ -26,7 +26,7 @@ use datafusion::{ scalar::ScalarValue, }; -/// Rewrites logical plan for source using projection and filter +/// Rewrites logical plan for source using projection and filter pub struct FilterOptimizerRule { pub column: String, pub literals: Vec, diff --git a/server/src/rbac/role.rs b/server/src/rbac/role.rs index b53e0cde..47b34ad4 100644 --- a/server/src/rbac/role.rs +++ b/server/src/rbac/role.rs @@ -45,6 +45,9 @@ pub enum Action { ListRole, GetAbout, QueryLLM, + ListCluster, + ListClusterMetrics, + DeleteIngester, All, } @@ -110,6 +113,9 @@ impl RoleBuilder { | Action::PutAlert | Action::GetAlert | Action::All => Permission::Stream(action, self.stream.clone().unwrap()), + Action::ListCluster => Permission::Unit(action), + Action::ListClusterMetrics => Permission::Unit(action), + Action::DeleteIngester => Permission::Unit(action), }; perms.push(perm); } @@ -220,6 +226,7 @@ pub mod model { Action::GetAlert, Action::GetAbout, Action::QueryLLM, + Action::ListCluster, ], stream: None, tag: None, diff --git a/server/src/response.rs b/server/src/response.rs index 18b86d78..6275864b 100644 --- a/server/src/response.rs +++ b/server/src/response.rs @@ -22,6 +22,8 @@ use datafusion::arrow::record_batch::RecordBatch; use itertools::Itertools; use serde_json::{json, Value}; +use crate::query::flatten_objects_for_count; + pub struct QueryResponse { pub records: Vec, pub fields: Vec, @@ -30,7 +32,7 @@ pub struct QueryResponse { } impl QueryResponse { - pub fn to_http(&self) -> impl Responder { + pub fn to_http(&self, imem: Option>) -> impl Responder { log::info!("{}", "Returning query results"); let records: Vec<&RecordBatch> = self.records.iter().collect(); let mut json_records = record_batches_to_json_rows(&records).unwrap(); @@ -43,7 +45,14 @@ impl QueryResponse { } } } - let values = json_records.into_iter().map(Value::Object).collect_vec(); + let mut values = json_records.into_iter().map(Value::Object).collect_vec(); + + if let Some(mut imem) = imem { + values.append(&mut imem); + } + + let values = flatten_objects_for_count(values); + let response = if self.with_fields { json!({ "fields": self.fields, diff --git a/server/src/storage.rs b/server/src/storage.rs index cb619424..b1f61902 100644 --- a/server/src/storage.rs +++ b/server/src/storage.rs @@ -24,7 +24,7 @@ use std::fmt::Debug; mod localfs; mod metrics_layer; -mod object_storage; +pub(crate) mod object_storage; pub mod retention; mod s3; pub mod staging; @@ -40,6 +40,15 @@ pub use store_metadata::{ use self::retention::Retention; pub use self::staging::StorageDir; +// metadata file names in a Stream prefix +pub const STREAM_METADATA_FILE_NAME: &str = ".stream.json"; +pub const PARSEABLE_METADATA_FILE_NAME: &str = ".parseable.json"; +pub const STREAM_ROOT_DIRECTORY: &str = ".stream"; +pub const PARSEABLE_ROOT_DIRECTORY: &str = ".parseable"; +pub const SCHEMA_FILE_NAME: &str = ".schema"; +pub const ALERT_FILE_NAME: &str = ".alert.json"; +pub const MANIFEST_FILE: &str = "manifest.json"; + /// local sync interval to move data.records to /tmp dir of that stream. /// 60 sec is a reasonable value. pub const LOCAL_SYNC_INTERVAL: u64 = 60; @@ -187,6 +196,8 @@ pub enum ObjectStorageError { #[error("Unhandled Error: {0}")] UnhandledError(Box), + #[error("Error: {0}")] + PathError(relative_path::FromPathError), #[allow(dead_code)] #[error("Authentication Error: {0}")] diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index e0880cff..73f75d5b 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -27,14 +27,17 @@ use bytes::Bytes; use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeConfig}; use fs_extra::file::CopyOptions; use futures::{stream::FuturesUnordered, TryStreamExt}; -use relative_path::RelativePath; +use relative_path::{RelativePath, RelativePathBuf}; use tokio::fs::{self, DirEntry}; use tokio_stream::wrappers::ReadDirStream; use crate::metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::option::validation; -use super::{object_storage, LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider}; +use super::{ + LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, + SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, +}; #[derive(Debug, Clone, clap::Args)] #[command( @@ -74,6 +77,7 @@ impl ObjectStorageProvider for FSConfig { } pub struct LocalFS { + // absolute path of the data directory root: PathBuf, } @@ -110,6 +114,122 @@ impl ObjectStorage for LocalFS { res } + async fn get_ingester_meta_file_paths( + &self, + ) -> Result, ObjectStorageError> { + let time = Instant::now(); + + let mut path_arr = vec![]; + let mut entries = fs::read_dir(&self.root).await?; + + while let Some(entry) = entries.next_entry().await? { + let flag = entry + .path() + .file_name() + .unwrap_or_default() + .to_str() + .unwrap_or_default() + .contains("ingester"); + + if flag { + path_arr.push( + RelativePathBuf::from_path(entry.path().file_name().unwrap()) + .map_err(ObjectStorageError::PathError)?, + ); + } + } + + let time = time.elapsed().as_secs_f64(); + REQUEST_RESPONSE_TIME + .with_label_values(&["GET", "200"]) // this might not be the right status code + .observe(time); + + Ok(path_arr) + } + + async fn get_stream_file_paths( + &self, + stream_name: &str, + ) -> Result, ObjectStorageError> { + let time = Instant::now(); + let mut path_arr = vec![]; + + // = data/stream_name + let stream_dir_path = self.path_in_root(&RelativePathBuf::from(stream_name)); + let mut entries = fs::read_dir(&stream_dir_path).await?; + + while let Some(entry) = entries.next_entry().await? { + let flag = entry + .path() + .file_name() + .unwrap_or_default() + .to_str() + .unwrap_or_default() + .contains("ingester"); + + if flag { + path_arr.push(RelativePathBuf::from_iter([ + stream_name, + entry.path().file_name().unwrap().to_str().unwrap(), + ])); + } + } + + path_arr.push(RelativePathBuf::from_iter([ + stream_name, + STREAM_METADATA_FILE_NAME, + ])); + path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); + + let time = time.elapsed().as_secs_f64(); + REQUEST_RESPONSE_TIME + .with_label_values(&["GET", "200"]) // this might not be the right status code + .observe(time); + + Ok(path_arr) + } + + async fn get_objects( + &self, + base_path: Option<&RelativePath>, + ) -> Result, ObjectStorageError> { + let time = Instant::now(); + + let prefix = if let Some(path) = base_path { + path.to_path(&self.root) + } else { + self.root.clone() + }; + + let mut entries = fs::read_dir(&prefix).await?; + let mut res = Vec::new(); + while let Some(entry) = entries.next_entry().await? { + let ingester_file = entry + .path() + .file_name() + .unwrap_or_default() + .to_str() + .unwrap_or_default() + .contains("ingester"); + + if !ingester_file { + continue; + } + + let file = fs::read(entry.path()).await?; + res.push(file.into()); + } + + // maybe change the return code + let status = if res.is_empty() { "200" } else { "400" }; + let time = time.elapsed().as_secs_f64(); + REQUEST_RESPONSE_TIME + .with_label_values(&["GET", status]) + .observe(time); + + Ok(res) + } + async fn put_object( &self, path: &RelativePath, @@ -138,6 +258,12 @@ impl ObjectStorage for LocalFS { Ok(()) } + async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { + let path = self.path_in_root(path); + tokio::fs::remove_file(path).await?; + Ok(()) + } + async fn check(&self) -> Result<(), ObjectStorageError> { fs::create_dir_all(&self.root) .await @@ -149,8 +275,16 @@ impl ObjectStorage for LocalFS { Ok(fs::remove_dir_all(path).await?) } + async fn try_delete_ingester_meta( + &self, + ingester_filename: String, + ) -> Result<(), ObjectStorageError> { + let path = self.root.join(ingester_filename); + Ok(fs::remove_file(path).await?) + } + async fn list_streams(&self) -> Result, ObjectStorageError> { - let ignore_dir = &["lost+found"]; + let ignore_dir = &["lost+found", PARSEABLE_ROOT_DIRECTORY]; let directories = ReadDirStream::new(fs::read_dir(&self.root).await?); let entries: Vec = directories.try_collect().await?; let entries = entries @@ -169,6 +303,26 @@ impl ObjectStorage for LocalFS { Ok(logstreams) } + async fn list_old_streams(&self) -> Result, ObjectStorageError> { + let ignore_dir = &["lost+found", PARSEABLE_ROOT_DIRECTORY]; + let directories = ReadDirStream::new(fs::read_dir(&self.root).await?); + let entries: Vec = directories.try_collect().await?; + let entries = entries + .into_iter() + .map(|entry| dir_with_old_stream(entry, ignore_dir)); + + let logstream_dirs: Vec> = + FuturesUnordered::from_iter(entries).try_collect().await?; + + let logstreams = logstream_dirs + .into_iter() + .flatten() + .map(|name| LogStream { name }) + .collect(); + + Ok(logstreams) + } + async fn list_dirs(&self) -> Result, ObjectStorageError> { let dirs = ReadDirStream::new(fs::read_dir(&self.root).await?) .try_collect::>() @@ -228,6 +382,50 @@ impl ObjectStorage for LocalFS { fn store_url(&self) -> url::Url { url::Url::parse("file:///").unwrap() } + + fn get_bucket_name(&self) -> String { + self.root + .iter() + .last() + .unwrap() + .to_str() + .unwrap() + .to_string() + } +} + +async fn dir_with_old_stream( + entry: DirEntry, + ignore_dirs: &[&str], +) -> Result, ObjectStorageError> { + let dir_name = entry + .path() + .file_name() + .expect("valid path") + .to_str() + .expect("valid unicode") + .to_owned(); + + if ignore_dirs.contains(&dir_name.as_str()) { + return Ok(None); + } + + if entry.file_type().await?.is_dir() { + let path = entry.path(); + + // even in ingest mode, we should only look for the global stream metadata file + let stream_json_path = path.join(STREAM_METADATA_FILE_NAME); + + if stream_json_path.exists() { + Ok(Some(dir_name)) + } else { + let err: Box = + format!("found {}", entry.path().display()).into(); + Err(ObjectStorageError::UnhandledError(err)) + } + } else { + Ok(None) + } } async fn dir_with_stream( @@ -248,7 +446,12 @@ async fn dir_with_stream( if entry.file_type().await?.is_dir() { let path = entry.path(); - let stream_json_path = path.join(object_storage::STREAM_METADATA_FILE_NAME); + + // even in ingest mode, we should only look for the global stream metadata file + let stream_json_path = path + .join(STREAM_ROOT_DIRECTORY) + .join(STREAM_METADATA_FILE_NAME); + if stream_json_path.exists() { Ok(Some(dir_name)) } else { diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index 85765661..efc10c54 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -20,7 +20,13 @@ use super::{ retention::Retention, staging::convert_disk_files_to_parquet, LogStream, ObjectStorageError, ObjectStoreFormat, Permisssion, StorageDir, StorageMetadata, }; +use super::{ + ALERT_FILE_NAME, MANIFEST_FILE, PARSEABLE_METADATA_FILE_NAME, PARSEABLE_ROOT_DIRECTORY, + SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, +}; +use crate::option::Mode; +use crate::utils::get_address; use crate::{ alerts::Alerts, catalog::{self, manifest::Manifest, snapshot::Snapshot}, @@ -49,13 +55,6 @@ use std::{ time::{Duration, Instant}, }; -// metadata file names in a Stream prefix -pub(super) const STREAM_METADATA_FILE_NAME: &str = ".stream.json"; -pub(super) const PARSEABLE_METADATA_FILE_NAME: &str = ".parseable.json"; -const SCHEMA_FILE_NAME: &str = ".schema"; -const ALERT_FILE_NAME: &str = ".alert.json"; -const MANIFEST_FILE: &str = "manifest.json"; - pub trait ObjectStorageProvider: StorageMetrics + std::fmt::Debug { fn get_datafusion_runtime(&self) -> RuntimeConfig; fn get_object_store(&self) -> Arc; @@ -66,6 +65,11 @@ pub trait ObjectStorageProvider: StorageMetrics + std::fmt::Debug { #[async_trait] pub trait ObjectStorage: Sync + 'static { async fn get_object(&self, path: &RelativePath) -> Result; + // want to make it more generic with a filter function + async fn get_objects( + &self, + base_path: Option<&RelativePath>, + ) -> Result, ObjectStorageError>; async fn put_object( &self, path: &RelativePath, @@ -75,16 +79,28 @@ pub trait ObjectStorage: Sync + 'static { async fn check(&self) -> Result<(), ObjectStorageError>; async fn delete_stream(&self, stream_name: &str) -> Result<(), ObjectStorageError>; async fn list_streams(&self) -> Result, ObjectStorageError>; + async fn list_old_streams(&self) -> Result, ObjectStorageError>; async fn list_dirs(&self) -> Result, ObjectStorageError>; async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError>; async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError>; - + async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError>; + async fn get_ingester_meta_file_paths( + &self, + ) -> Result, ObjectStorageError>; + async fn get_stream_file_paths( + &self, + stream_name: &str, + ) -> Result, ObjectStorageError>; + async fn try_delete_ingester_meta( + &self, + ingester_filename: String, + ) -> Result<(), ObjectStorageError>; /// Returns the amount of time taken by the `ObjectStore` to perform a get /// call. async fn get_latency(&self) -> Duration { // It's Ok to `unwrap` here. The hardcoded value will always Result in // an `Ok`. - let path = RelativePathBuf::from_path(".parseable.json").unwrap(); + let path = RelativePathBuf::from_path(PARSEABLE_METADATA_FILE_NAME).unwrap(); let start = Instant::now(); let _ = self.get_object(&path).await; @@ -183,6 +199,16 @@ pub trait ObjectStorage: Sync + 'static { .await } + async fn get_schema_on_server_start( + &self, + stream_name: &str, + ) -> Result { + let schema_path = + RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY, SCHEMA_FILE_NAME]); + let schema_map = self.get_object(&schema_path).await?; + Ok(serde_json::from_slice(&schema_map)?) + } + async fn get_schema(&self, stream_name: &str) -> Result { let schema_map = self.get_object(&schema_path(stream_name)).await?; Ok(serde_json::from_slice(&schema_map)?) @@ -209,7 +235,31 @@ pub trait ObjectStorage: Sync + 'static { &self, stream_name: &str, ) -> Result { - let stream_metadata = self.get_object(&stream_json_path(stream_name)).await?; + let stream_metadata = match self.get_object(&stream_json_path(stream_name)).await { + Ok(data) => data, + Err(_) => { + // ! this is hard coded for now + let bytes = self + .get_object(&RelativePathBuf::from_iter([ + stream_name, + STREAM_ROOT_DIRECTORY, + STREAM_METADATA_FILE_NAME, + ])) + .await?; + + let mut config = serde_json::from_slice::(&bytes) + .expect("parseable config is valid json"); + + if CONFIG.parseable.mode == Mode::Ingest { + config.stats = Stats::default(); + config.snapshot.manifest_list = vec![]; + } + + self.put_stream_manifest(stream_name, &config).await?; + bytes + } + }; + Ok(serde_json::from_slice(&stream_metadata).expect("parseable config is valid json")) } @@ -222,6 +272,22 @@ pub trait ObjectStorage: Sync + 'static { self.put_object(&path, to_bytes(manifest)).await } + /// for future use + async fn get_stats_for_first_time( + &self, + stream_name: &str, + ) -> Result { + let path = RelativePathBuf::from_iter([stream_name, STREAM_METADATA_FILE_NAME]); + let stream_metadata = self.get_object(&path).await?; + let stream_metadata: Value = + serde_json::from_slice(&stream_metadata).expect("parseable config is valid json"); + let stats = &stream_metadata["stats"]; + + let stats = serde_json::from_value(stats.clone()).unwrap_or_default(); + + Ok(stats) + } + async fn get_stats(&self, stream_name: &str) -> Result { let stream_metadata = self.get_object(&stream_json_path(stream_name)).await?; let stream_metadata: Value = @@ -278,6 +344,7 @@ pub trait ObjectStorage: Sync + 'static { } } + // get the manifest info async fn get_manifest( &self, path: &RelativePath, @@ -306,6 +373,7 @@ pub trait ObjectStorage: Sync + 'static { self.put_object(&path, to_bytes(&manifest)).await } + // gets the snapshot of the stream async fn get_object_store_format( &self, stream: &str, @@ -425,9 +493,12 @@ pub trait ObjectStorage: Sync + 'static { Ok(()) } + + // pick a better name + fn get_bucket_name(&self) -> String; } -async fn commit_schema_to_storage( +pub async fn commit_schema_to_storage( stream_name: &str, schema: Schema, ) -> Result<(), ObjectStorageError> { @@ -446,17 +517,39 @@ fn to_bytes(any: &(impl ?Sized + serde::Serialize)) -> Bytes { #[inline(always)] fn schema_path(stream_name: &str) -> RelativePathBuf { - RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME]) + match CONFIG.parseable.mode { + Mode::Ingest => { + let (ip, port) = get_address(); + let file_name = format!(".ingester.{}.{}{}", ip, port, SCHEMA_FILE_NAME); + + RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY, &file_name]) + } + Mode::All | Mode::Query => { + RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY, SCHEMA_FILE_NAME]) + } + } } #[inline(always)] -fn stream_json_path(stream_name: &str) -> RelativePathBuf { - RelativePathBuf::from_iter([stream_name, STREAM_METADATA_FILE_NAME]) +pub fn stream_json_path(stream_name: &str) -> RelativePathBuf { + match &CONFIG.parseable.mode { + Mode::Ingest => { + let (ip, port) = get_address(); + let file_name = format!(".ingester.{}.{}{}", ip, port, STREAM_METADATA_FILE_NAME); + RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY, &file_name]) + } + Mode::Query | Mode::All => RelativePathBuf::from_iter([ + stream_name, + STREAM_ROOT_DIRECTORY, + STREAM_METADATA_FILE_NAME, + ]), + } } +/// path will be ".parseable/.parsable.json" #[inline(always)] -fn parseable_json_path() -> RelativePathBuf { - RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME) +pub fn parseable_json_path() -> RelativePathBuf { + RelativePathBuf::from_iter([PARSEABLE_ROOT_DIRECTORY, PARSEABLE_METADATA_FILE_NAME]) } #[inline(always)] @@ -466,5 +559,15 @@ fn alert_json_path(stream_name: &str) -> RelativePathBuf { #[inline(always)] fn manifest_path(prefix: &str) -> RelativePathBuf { - RelativePathBuf::from_iter([prefix, MANIFEST_FILE]) + let addr = get_address(); + let mainfest_file_name = format!("{}.{}.{}", addr.0, addr.1, MANIFEST_FILE); + RelativePathBuf::from_iter([prefix, &mainfest_file_name]) +} + +#[inline(always)] +pub fn ingester_metadata_path(ip: String, port: String) -> RelativePathBuf { + RelativePathBuf::from_iter([ + PARSEABLE_ROOT_DIRECTORY, + &format!("ingester.{}.{}.json", ip, port), + ]) } diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index ef1144f1..27ec949f 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -29,7 +29,7 @@ use object_store::aws::{AmazonS3, AmazonS3Builder, AmazonS3ConfigKey, Checksum}; use object_store::limit::LimitStore; use object_store::path::Path as StorePath; use object_store::{ClientOptions, ObjectStore}; -use relative_path::RelativePath; +use relative_path::{RelativePath, RelativePathBuf}; use tokio::fs::OpenOptions; use tokio::io::{AsyncReadExt, AsyncWriteExt}; @@ -39,10 +39,13 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use crate::metrics::storage::{s3::REQUEST_RESPONSE_TIME, StorageMetrics}; -use crate::storage::{LogStream, ObjectStorage, ObjectStorageError}; +use crate::storage::{LogStream, ObjectStorage, ObjectStorageError, PARSEABLE_ROOT_DIRECTORY}; use super::metrics_layer::MetricLayer; -use super::{object_storage, ObjectStorageProvider}; +use super::{ + ObjectStorageProvider, PARSEABLE_METADATA_FILE_NAME, SCHEMA_FILE_NAME, + STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, +}; // in bytes const MULTIPART_UPLOAD_SIZE: usize = 1024 * 1024 * 100; @@ -197,6 +200,7 @@ impl ObjectStorageProvider for S3Config { Arc::new(S3 { client: s3, bucket: self.bucket_name.clone(), + root: StorePath::from(""), }) } @@ -209,20 +213,21 @@ impl ObjectStorageProvider for S3Config { } } -fn to_path(path: &RelativePath) -> StorePath { +fn to_object_store_path(path: &RelativePath) -> StorePath { StorePath::from(path.as_str()) } pub struct S3 { client: LimitStore, bucket: String, + root: StorePath, } impl S3 { async fn _get_object(&self, path: &RelativePath) -> Result { let instant = Instant::now(); - let resp = self.client.get(&to_path(path)).await; + let resp = self.client.get(&to_object_store_path(path)).await; match resp { Ok(resp) => { @@ -249,7 +254,7 @@ impl S3 { resource: Bytes, ) -> Result<(), ObjectStorageError> { let time = Instant::now(); - let resp = self.client.put(&to_path(path), resource).await; + let resp = self.client.put(&to_object_store_path(path), resource).await; let status = if resp.is_ok() { "200" } else { "400" }; let time = time.elapsed().as_secs_f64(); REQUEST_RESPONSE_TIME @@ -292,19 +297,23 @@ impl S3 { async fn _list_streams(&self) -> Result, ObjectStorageError> { let resp = self.client.list_with_delimiter(None).await?; - let common_prefixes = resp.common_prefixes; + let common_prefixes = resp.common_prefixes; // get all dirs // return prefixes at the root level let dirs: Vec<_> = common_prefixes .iter() .filter_map(|path| path.parts().next()) .map(|name| name.as_ref().to_string()) + .filter(|x| x != PARSEABLE_ROOT_DIRECTORY) .collect(); let stream_json_check = FuturesUnordered::new(); for dir in &dirs { - let key = format!("{}/{}", dir, object_storage::STREAM_METADATA_FILE_NAME); + let key = format!( + "{}/{}/{}", + dir, STREAM_ROOT_DIRECTORY, STREAM_METADATA_FILE_NAME + ); let task = async move { self.client.head(&StorePath::from(key)).await.map(|_| ()) }; stream_json_check.push(task); } @@ -403,6 +412,102 @@ impl ObjectStorage for S3 { Ok(self._get_object(path).await?) } + // TBD is this the right way or the api calls are too many? + async fn get_objects( + &self, + base_path: Option<&RelativePath>, + ) -> Result, ObjectStorageError> { + let instant = Instant::now(); + + let prefix = if let Some(base_path) = base_path { + to_object_store_path(base_path) + } else { + self.root.clone() + }; + + let mut list_stream = self.client.list(Some(&prefix)).await?; + + let mut res = vec![]; + + while let Some(meta) = list_stream.next().await.transpose()? { + let ingester_file = meta.location.filename().unwrap().starts_with("ingester"); + + if !ingester_file { + continue; + } + + let byts = self + .get_object( + RelativePath::from_path(meta.location.as_ref()) + .map_err(ObjectStorageError::PathError)?, + ) + .await?; + + res.push(byts); + } + + let instant = instant.elapsed().as_secs_f64(); + REQUEST_RESPONSE_TIME + .with_label_values(&["GET", "200"]) + .observe(instant); + + Ok(res) + } + + async fn get_ingester_meta_file_paths( + &self, + ) -> Result, ObjectStorageError> { + let time = Instant::now(); + let mut path_arr = vec![]; + let mut object_stream = self.client.list(Some(&self.root)).await?; + + while let Some(meta) = object_stream.next().await.transpose()? { + let flag = meta.location.filename().unwrap().starts_with("ingester"); + + if flag { + path_arr.push(RelativePathBuf::from(meta.location.as_ref())); + } + } + + let time = time.elapsed().as_secs_f64(); + REQUEST_RESPONSE_TIME + .with_label_values(&["GET", "200"]) + .observe(time); + + Ok(path_arr) + } + + async fn get_stream_file_paths( + &self, + stream_name: &str, + ) -> Result, ObjectStorageError> { + let time = Instant::now(); + let mut path_arr = vec![]; + let path = to_object_store_path(&RelativePathBuf::from(stream_name)); + let mut object_stream = self.client.list(Some(&path)).await?; + + while let Some(meta) = object_stream.next().await.transpose()? { + let flag = meta.location.filename().unwrap().starts_with(".ingester"); + + if flag { + path_arr.push(RelativePathBuf::from(meta.location.as_ref())); + } + } + + path_arr.push(RelativePathBuf::from_iter([ + stream_name, + STREAM_METADATA_FILE_NAME, + ])); + path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); + + let time = time.elapsed().as_secs_f64(); + REQUEST_RESPONSE_TIME + .with_label_values(&["GET", "200"]) + .observe(time); + + Ok(path_arr) + } + async fn put_object( &self, path: &RelativePath, @@ -421,10 +526,14 @@ impl ObjectStorage for S3 { Ok(()) } + async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { + Ok(self.client.delete(&to_object_store_path(path)).await?) + } + async fn check(&self) -> Result<(), ObjectStorageError> { Ok(self .client - .head(&object_storage::PARSEABLE_METADATA_FILE_NAME.into()) + .head(&PARSEABLE_METADATA_FILE_NAME.into()) .await .map(|_| ())?) } @@ -435,12 +544,59 @@ impl ObjectStorage for S3 { Ok(()) } + async fn try_delete_ingester_meta( + &self, + ingester_filename: String, + ) -> Result<(), ObjectStorageError> { + let file = RelativePathBuf::from(&ingester_filename); + match self.client.delete(&to_object_store_path(&file)).await { + Ok(_) => Ok(()), + Err(err) => { + // if the object is not found, it is not an error + // the given url path was incorrect + if matches!(err, object_store::Error::NotFound { .. }) { + log::error!("Node does not exist"); + Err(err.into()) + } else { + log::error!("Error deleting ingester meta file: {:?}", err); + Err(err.into()) + } + } + } + } + async fn list_streams(&self) -> Result, ObjectStorageError> { let streams = self._list_streams().await?; Ok(streams) } + async fn list_old_streams(&self) -> Result, ObjectStorageError> { + let resp = self.client.list_with_delimiter(None).await?; + + let common_prefixes = resp.common_prefixes; // get all dirs + + // return prefixes at the root level + let dirs: Vec<_> = common_prefixes + .iter() + .filter_map(|path| path.parts().next()) + .map(|name| name.as_ref().to_string()) + .filter(|x| x != PARSEABLE_ROOT_DIRECTORY) + .collect(); + + let stream_json_check = FuturesUnordered::new(); + + for dir in &dirs { + let key = format!("{}/{}", dir, STREAM_METADATA_FILE_NAME); + let task = async move { self.client.head(&StorePath::from(key)).await.map(|_| ()) }; + stream_json_check.push(task); + } + + stream_json_check.try_collect().await?; + + Ok(dirs.into_iter().map(|name| LogStream { name }).collect()) + } + async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError> { let streams = self._list_dates(stream_name).await?; @@ -482,6 +638,10 @@ impl ObjectStorage for S3 { .map(|name| name.as_ref().to_string()) .collect::>()) } + + fn get_bucket_name(&self) -> String { + self.bucket.clone() + } } impl From for ObjectStorageError { diff --git a/server/src/storage/staging.rs b/server/src/storage/staging.rs index 65c016ef..2dfc11b0 100644 --- a/server/src/storage/staging.rs +++ b/server/src/storage/staging.rs @@ -20,6 +20,7 @@ use std::{ collections::HashMap, fs, + net::SocketAddr, path::{Path, PathBuf}, process, sync::Arc, @@ -43,6 +44,7 @@ use crate::{ storage::OBJECT_STORE_DATA_GRANULARITY, utils::{self, arrow::merged_reader::MergedReverseRecordReader}, }; + const ARROW_FILE_EXTENSION: &str = "data.arrows"; const PARQUET_FILE_EXTENSION: &str = "data.parquet"; @@ -158,6 +160,24 @@ impl StorageDir { fn arrow_path_to_parquet(path: &Path) -> PathBuf { let filename = path.file_name().unwrap().to_str().unwrap(); let (_, filename) = filename.split_once('.').unwrap(); + + let port = CONFIG + .parseable + .address + .clone() + .parse::() + .unwrap() + .port(); + let filename = filename.rsplit_once('.').unwrap(); + let filename = format!("{}.{}.{}", filename.0, port, filename.1); + /* + let file_stem = path.file_stem().unwrap().to_str().unwrap(); + let random_string = + rand::distributions::Alphanumeric.sample_string(&mut rand::thread_rng(), 20); + let (_, filename) = file_stem.split_once('.').unwrap(); + let filename_with_random_number = format!("{}.{}.{}", filename, random_number, "arrows"); + */ + let mut parquet_path = path.to_owned(); parquet_path.set_file_name(filename); parquet_path.set_extension("parquet"); diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index b7d3a52f..18a3efae 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -26,13 +26,14 @@ use once_cell::sync::OnceCell; use std::io; use crate::{ - option::{CONFIG, JOIN_COMMUNITY}, + metadata::error::stream_info::MetadataError, + option::{Mode, CONFIG, JOIN_COMMUNITY}, rbac::{role::model::DefaultPrivilege, user::User}, storage::ObjectStorageError, utils::uid, }; -use super::object_storage::PARSEABLE_METADATA_FILE_NAME; +use super::{object_storage::parseable_json_path, PARSEABLE_METADATA_FILE_NAME}; // Expose some static variables for internal usage pub static STORAGE_METADATA: OnceCell = OnceCell::new(); @@ -55,6 +56,7 @@ pub struct StorageMetadata { pub deployment_id: uid::Uid, pub users: Vec, pub streams: Vec, + pub server_mode: String, #[serde(default)] pub roles: HashMap>, #[serde(default)] @@ -69,6 +71,7 @@ impl StorageMetadata { staging: CONFIG.staging_dir().to_path_buf(), storage: CONFIG.storage().get_endpoint(), deployment_id: uid::gen(), + server_mode: CONFIG.parseable.mode.to_string(), users: Vec::new(), streams: Vec::new(), roles: HashMap::default(), @@ -92,6 +95,7 @@ impl StorageMetadata { } } +/// deals with the staging directory creation and metadata resolution /// always returns remote metadata as it is source of truth /// overwrites staging metadata while updating storage info pub async fn resolve_parseable_metadata() -> Result { @@ -99,18 +103,8 @@ pub async fn resolve_parseable_metadata() -> Result { - if staging.deployment_id == remote.deployment_id { - EnvChange::None(remote) - } else { - EnvChange::NewRemote - } - } - (None, Some(remote)) => EnvChange::NewStaging(remote), - (Some(_), None) => EnvChange::NewRemote, - (None, None) => EnvChange::CreateBoth, - }; + // Env Change needs to be updated + let check = determine_environment(staging_metadata, remote_metadata); // flags for if metadata needs to be synced let mut overwrite_staging = false; @@ -120,25 +114,62 @@ pub async fn resolve_parseable_metadata() -> Result { // overwrite staging anyways so that it matches remote in case of any divergence overwrite_staging = true; + if CONFIG.parseable.mode == Mode::All { + standalone_when_distributed(Mode::from_string(&metadata.server_mode).expect("mode should be valid at here")) + .map_err(|err| { + ObjectStorageError::Custom(err.to_string()) + })?; + } Ok(metadata) }, EnvChange::NewRemote => { Err("Could not start the server because staging directory indicates stale data from previous deployment, please choose an empty staging directory and restart the server") } EnvChange::NewStaging(mut metadata) => { - create_dir_all(CONFIG.staging_dir())?; - metadata.staging = CONFIG.staging_dir().canonicalize()?; - // this flag is set to true so that metadata is copied to staging - overwrite_staging = true; - // overwrite remote because staging dir has changed. - overwrite_remote = true; - Ok(metadata) + // if server is started in ingest mode,we need to make sure that query mode has been started + // i.e the metadata is updated to reflect the server mode = Query + if Mode::from_string(&metadata.server_mode).unwrap() == Mode::All && CONFIG.parseable.mode == Mode::Ingest { + Err("Starting Ingest Mode is not allowed, Since Query Server has not been started yet") + } else { + create_dir_all(CONFIG.staging_dir())?; + metadata.staging = CONFIG.staging_dir().canonicalize()?; + // this flag is set to true so that metadata is copied to staging + overwrite_staging = true; + // overwrite remote in all and query mode + // because staging dir has changed. + match CONFIG.parseable.mode { + Mode::All => { + standalone_when_distributed(Mode::from_string(&metadata.server_mode).expect("mode should be valid at here")) + .map_err(|err| { + ObjectStorageError::Custom(err.to_string()) + })?; + overwrite_remote = true; + }, + Mode::Query => { + overwrite_remote = true; + metadata.server_mode = CONFIG.parseable.mode.to_string(); + metadata.staging = CONFIG.staging_dir().to_path_buf(); + }, + Mode::Ingest => { + // if ingest server is started fetch the metadata from remote + // update the server mode for local metadata + metadata.server_mode = CONFIG.parseable.mode.to_string(); + metadata.staging = CONFIG.staging_dir().to_path_buf(); + }, + } + Ok(metadata) + } } EnvChange::CreateBoth => { create_dir_all(CONFIG.staging_dir())?; let metadata = StorageMetadata::new(); - // new metadata needs to be set on both staging and remote - overwrite_remote = true; + // new metadata needs to be set + // if mode is query or all then both staging and remote + match CONFIG.parseable.mode { + Mode::All | Mode::Query => overwrite_remote = true, + _ => (), + } + // else only staging overwrite_staging = true; Ok(metadata) } @@ -161,6 +192,32 @@ pub async fn resolve_parseable_metadata() -> Result, + remote_metadata: Option, +) -> EnvChange { + match (staging_metadata, remote_metadata) { + (Some(staging), Some(remote)) => { + // if both staging and remote have same deployment id + if staging.deployment_id == remote.deployment_id { + EnvChange::None(remote) + } else if Mode::from_string(&remote.server_mode).unwrap() == Mode::All + && (CONFIG.parseable.mode == Mode::Query || CONFIG.parseable.mode == Mode::Ingest) + { + // if you are switching to distributed mode from standalone mode + // it will create a new staging rather than a new remote + EnvChange::NewStaging(remote) + } else { + // it is a new remote + EnvChange::NewRemote + } + } + (None, Some(remote)) => EnvChange::NewStaging(remote), + (Some(_), None) => EnvChange::NewRemote, + (None, None) => EnvChange::CreateBoth, + } +} + // variant contain remote metadata #[derive(Debug, Clone, PartialEq, Eq)] pub enum EnvChange { @@ -175,8 +232,18 @@ pub enum EnvChange { CreateBoth, } +fn standalone_when_distributed(remote_server_mode: Mode) -> Result<(), MetadataError> { + // mode::all -> mode::query | mode::ingest allowed + // but mode::query | mode::ingest -> mode::all not allowed + if remote_server_mode == Mode::Query { + return Err(MetadataError::StandaloneWithDistributed("Starting Standalone Mode is not permitted when Distributed Mode is enabled. Please restart the server with Distributed Mode enabled.".to_string())); + } + + Ok(()) +} + pub fn get_staging_metadata() -> io::Result> { - let path = CONFIG.staging_dir().join(PARSEABLE_METADATA_FILE_NAME); + let path = parseable_json_path().to_path(CONFIG.staging_dir()); let bytes = match fs::read(path) { Ok(bytes) => bytes, Err(err) => match err.kind() { diff --git a/server/src/sync.rs b/server/src/sync.rs new file mode 100644 index 00000000..d7eb5d2d --- /dev/null +++ b/server/src/sync.rs @@ -0,0 +1,112 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use clokwerk::{AsyncScheduler, Job, Scheduler, TimeUnits}; +use thread_priority::{ThreadBuilder, ThreadPriority}; +use tokio::sync::oneshot; +use tokio::sync::oneshot::error::TryRecvError; + +use std::panic::{catch_unwind, AssertUnwindSafe}; +use std::thread::{self, JoinHandle}; +use std::time::Duration; + +use crate::option::CONFIG; +use crate::{storage, STORAGE_UPLOAD_INTERVAL}; + +pub(crate) fn object_store_sync() -> (JoinHandle<()>, oneshot::Receiver<()>, oneshot::Sender<()>) { + let (outbox_tx, outbox_rx) = oneshot::channel::<()>(); + let (inbox_tx, inbox_rx) = oneshot::channel::<()>(); + let mut inbox_rx = AssertUnwindSafe(inbox_rx); + let handle = thread::spawn(move || { + let res = catch_unwind(move || { + let rt = actix_web::rt::System::new(); + rt.block_on(async { + let mut scheduler = AsyncScheduler::new(); + scheduler + .every(STORAGE_UPLOAD_INTERVAL.seconds()) + // Extra time interval is added so that this schedular does not race with local sync. + .plus(5u32.seconds()) + .run(|| async { + if let Err(e) = CONFIG.storage().get_object_store().sync().await { + log::warn!("failed to sync local data with object store. {:?}", e); + } + }); + + loop { + tokio::time::sleep(Duration::from_secs(1)).await; + scheduler.run_pending().await; + match AssertUnwindSafe(|| inbox_rx.try_recv())() { + Ok(_) => break, + Err(TryRecvError::Empty) => continue, + Err(TryRecvError::Closed) => { + // should be unreachable but breaking anyways + break; + } + } + } + }) + }); + + if res.is_err() { + outbox_tx.send(()).unwrap(); + } + }); + + (handle, outbox_rx, inbox_tx) +} + +pub(crate) fn run_local_sync() -> (JoinHandle<()>, oneshot::Receiver<()>, oneshot::Sender<()>) { + let (outbox_tx, outbox_rx) = oneshot::channel::<()>(); + let (inbox_tx, inbox_rx) = oneshot::channel::<()>(); + let mut inbox_rx = AssertUnwindSafe(inbox_rx); + + let handle = ThreadBuilder::default() + .name("local-sync") + .priority(ThreadPriority::Max) + .spawn(move |priority_result| { + if priority_result.is_err() { + log::warn!("Max priority cannot be set for sync thread. Make sure that user/program is allowed to set thread priority.") + } + let res = catch_unwind(move || { + let mut scheduler = Scheduler::new(); + scheduler + .every((storage::LOCAL_SYNC_INTERVAL as u32).seconds()) + .run(move || crate::event::STREAM_WRITERS.unset_all()); + + loop { + thread::sleep(Duration::from_millis(50)); + scheduler.run_pending(); + match AssertUnwindSafe(|| inbox_rx.try_recv())() { + Ok(_) => break, + Err(TryRecvError::Empty) => continue, + Err(TryRecvError::Closed) => { + // should be unreachable but breaking anyways + break; + } + } + } + }); + + if res.is_err() { + outbox_tx.send(()).unwrap(); + } + }) + .unwrap(); + + (handle, outbox_rx, inbox_tx) +} diff --git a/server/src/utils.rs b/server/src/utils.rs index 83af01cc..530f2b21 100644 --- a/server/src/utils.rs +++ b/server/src/utils.rs @@ -23,8 +23,12 @@ pub mod json; pub mod uid; pub mod update; +use std::net::{IpAddr, SocketAddr}; + use chrono::{DateTime, NaiveDate, Timelike, Utc}; +use crate::option::CONFIG; + #[allow(dead_code)] pub fn hostname() -> Option { hostname::get() @@ -222,6 +226,12 @@ impl TimePeriod { } } +#[inline(always)] +pub fn get_address() -> (IpAddr, u16) { + let addr = CONFIG.parseable.address.parse::().unwrap(); + (addr.ip(), addr.port()) +} + #[cfg(test)] mod tests { use chrono::DateTime; diff --git a/server/src/utils/arrow/merged_reader.rs b/server/src/utils/arrow/merged_reader.rs index 8a31ae20..ef76ddf3 100644 --- a/server/src/utils/arrow/merged_reader.rs +++ b/server/src/utils/arrow/merged_reader.rs @@ -17,12 +17,11 @@ * */ -use std::{fs::File, io::BufReader, path::PathBuf, sync::Arc}; - use arrow_array::{RecordBatch, TimestampMillisecondArray}; use arrow_ipc::reader::StreamReader; use arrow_schema::Schema; use itertools::kmerge_by; +use std::{fs::File, io::BufReader, path::PathBuf, sync::Arc}; use super::{ adapt_batch,