{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":649955694,"defaultBranch":"main","name":"LLMSys-PaperList","ownerLogin":"AmberLJC","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-06-06T02:34:49.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/42296458?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1686018889.764097","currentOid":""},"activityList":{"items":[{"before":"a7897acdda1dc1b089e1affda7dcc327a7f1a330","after":"97e88cf5e86dac27152abc35b1896799744a7210","ref":"refs/heads/main","pushedAt":"2024-08-30T20:46:35.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"update OSDI' 24","shortMessageHtmlLink":"update OSDI' 24"}},{"before":"f3b7b89132784291f1bf88ce6172ee8533e2cc71","after":"a7897acdda1dc1b089e1affda7dcc327a7f1a330","ref":"refs/heads/main","pushedAt":"2024-08-30T19:43:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"NanoFlow: Towards Optimal Large Language Model Serving Throughput","shortMessageHtmlLink":"NanoFlow: Towards Optimal Large Language Model Serving Throughput"}},{"before":"550998f39e7720056c298e334c4908a4037a0a03","after":"f3b7b89132784291f1bf88ce6172ee8533e2cc71","ref":"refs/heads/main","pushedAt":"2024-08-30T18:26:40.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Update genai framework","shortMessageHtmlLink":"Update genai framework"}},{"before":"11edc9c89bbf274e931007425e99bb4140df8c40","after":"550998f39e7720056c298e334c4908a4037a0a03","ref":"refs/heads/main","pushedAt":"2024-08-26T20:48:57.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Responsive ML inference in multi-tenanted environments using AQUA","shortMessageHtmlLink":"Responsive ML inference in multi-tenanted environments using AQUA"}},{"before":"a60aa011d10a4d9cd6a0e123aa140ebab8bce540","after":"11edc9c89bbf274e931007425e99bb4140df8c40","ref":"refs/heads/main","pushedAt":"2024-08-26T20:42:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Helix: Distributed Serving of Large Language Models via Max-Flow on Heterogeneous GPUs","shortMessageHtmlLink":"Helix: Distributed Serving of Large Language Models via Max-Flow on H…"}},{"before":"c8399ea9a62291fa44991290e37db0528eb78f19","after":"a60aa011d10a4d9cd6a0e123aa140ebab8bce540","ref":"refs/heads/main","pushedAt":"2024-08-26T20:39:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"NanoFlow: Towards Optimal Large Language Model Serving Throughput","shortMessageHtmlLink":"NanoFlow: Towards Optimal Large Language Model Serving Throughput"}},{"before":"910583c166df6694a9cd520c7ec1151f3276986a","after":"c8399ea9a62291fa44991290e37db0528eb78f19","ref":"refs/heads/main","pushedAt":"2024-08-12T22:08:01.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Addressing Model and Data Heterogeneity in Multimodal Large Language Model Training","shortMessageHtmlLink":"Addressing Model and Data Heterogeneity in Multimodal Large Language …"}},{"before":"c5f38fe74c2be8d8dd37d3e501683b9ec012f034","after":"910583c166df6694a9cd520c7ec1151f3276986a","ref":"refs/heads/main","pushedAt":"2024-08-12T22:06:39.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Prepacking: A Simple Method for Fast Prefilling and Increased Throughput in Large Language Models","shortMessageHtmlLink":"Prepacking: A Simple Method for Fast Prefilling and Increased Through…"}},{"before":"641a99f0287c652018d4c1853470cdcff4d70d76","after":"c5f38fe74c2be8d8dd37d3e501683b9ec012f034","ref":"refs/heads/main","pushedAt":"2024-08-08T18:56:05.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"100,000 H100 Clusters: Power, Network Topology, Ethernet vs InfiniBand, Reliability, Failures, Checkpointing","shortMessageHtmlLink":"100,000 H100 Clusters: Power, Network Topology, Ethernet vs InfiniBan…"}},{"before":"7971a5d1d486a261bba32df1896d10b12d69ae22","after":"641a99f0287c652018d4c1853470cdcff4d70d76","ref":"refs/heads/main","pushedAt":"2024-08-08T18:33:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Optimus: Accelerating Large-Scale Multi-Modal LLM Training by Bubble Exploitation","shortMessageHtmlLink":"Optimus: Accelerating Large-Scale Multi-Modal LLM Training by Bubble …"}},{"before":"f057104c56c26d0ed3260ad0c03fc22302c154a4","after":"7971a5d1d486a261bba32df1896d10b12d69ae22","ref":"refs/heads/main","pushedAt":"2024-08-05T19:17:42.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"ByteCheckpoint, llama3","shortMessageHtmlLink":"ByteCheckpoint, llama3"}},{"before":"004f25a6814608bb9d3197b10abb4d1c681de95b","after":"f057104c56c26d0ed3260ad0c03fc22302c154a4","ref":"refs/heads/main","pushedAt":"2024-07-25T23:13:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"ALTO: An Efficient Network Orchestrator for Compound AI Systems","shortMessageHtmlLink":"ALTO: An Efficient Network Orchestrator for Compound AI Systems"}},{"before":"ad2492596fd7de5385b2907a771122ffd73d7790","after":"004f25a6814608bb9d3197b10abb4d1c681de95b","ref":"refs/heads/main","pushedAt":"2024-07-25T22:17:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Eloquent: A More Robust Transmission Scheme for LLM Token Streaming","shortMessageHtmlLink":"Eloquent: A More Robust Transmission Scheme for LLM Token Streaming"}},{"before":"af6d7dc51a6aae7b1e61d145211d3cca0e3f6033","after":"ad2492596fd7de5385b2907a771122ffd73d7790","ref":"refs/heads/main","pushedAt":"2024-07-15T18:27:17.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Enabling Elastic Model Serving with MultiWorld","shortMessageHtmlLink":"Enabling Elastic Model Serving with MultiWorld"}},{"before":"9524b3fef2749b0d32b4f6565ca819809679c200","after":"af6d7dc51a6aae7b1e61d145211d3cca0e3f6033","ref":"refs/heads/main","pushedAt":"2024-07-12T02:57:28.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"FlashAttention-3: Fast and Accurate Attention with Asynchrony and Low-precision","shortMessageHtmlLink":"FlashAttention-3: Fast and Accurate Attention with Asynchrony and Low…"}},{"before":"8cc8ea3152a2362faf8a2d5597af3d4fce00699a","after":"9524b3fef2749b0d32b4f6565ca819809679c200","ref":"refs/heads/main","pushedAt":"2024-07-12T02:55:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Alibaba HPN: A Data Center Network for Large Language Model Training","shortMessageHtmlLink":"Alibaba HPN: A Data Center Network for Large Language Model Training"}},{"before":"4c161b7a0f9bd1577a76629313df49ae354e8b54","after":"8cc8ea3152a2362faf8a2d5597af3d4fce00699a","ref":"refs/heads/main","pushedAt":"2024-07-10T18:17:04.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Lazarus: Resilient and Elastic Training of Mixture-of-Experts Models with Adaptive Expert Placement","shortMessageHtmlLink":"Lazarus: Resilient and Elastic Training of Mixture-of-Experts Models …"}},{"before":"3b211f69a0965b4a28fb5ef466be85027ec883f8","after":"4c161b7a0f9bd1577a76629313df49ae354e8b54","ref":"refs/heads/main","pushedAt":"2024-06-21T23:24:21.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Optimizing Speculative Decoding for Serving Large Language Models Using Goodput","shortMessageHtmlLink":"Optimizing Speculative Decoding for Serving Large Language Models Usi…"}},{"before":"16db8741fd39351b0d0607d65594e8a54e8b0136","after":"3b211f69a0965b4a28fb5ef466be85027ec883f8","ref":"refs/heads/main","pushedAt":"2024-06-21T23:23:19.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Scaling Beyond the GPU Memory Limit for Large Mixture-of-Experts Model Training","shortMessageHtmlLink":"Scaling Beyond the GPU Memory Limit for Large Mixture-of-Experts Mode…"}},{"before":"9fe3564615760963fe37994090a23de90ea794ee","after":"16db8741fd39351b0d0607d65594e8a54e8b0136","ref":"refs/heads/main","pushedAt":"2024-06-10T20:02:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Helix: Distributed Serving of Large Language Models via Max-Flow on Heterogeneous GPUs","shortMessageHtmlLink":"Helix: Distributed Serving of Large Language Models via Max-Flow on H…"}},{"before":"2ef5be3348b6d9b2d82c68f438241ee404c383f0","after":"9fe3564615760963fe37994090a23de90ea794ee","ref":"refs/heads/main","pushedAt":"2024-06-10T20:01:14.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Boosting Large-scale Parallel Training Efficiency with C4","shortMessageHtmlLink":"Boosting Large-scale Parallel Training Efficiency with C4"}},{"before":"8fe93af86d0c74ca2e560d23d2184054f69491fa","after":"2ef5be3348b6d9b2d82c68f438241ee404c383f0","ref":"refs/heads/main","pushedAt":"2024-05-31T19:01:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Pipeline Parallelism with Controllable Memory","shortMessageHtmlLink":"Pipeline Parallelism with Controllable Memory"}},{"before":"738aac53c32979682ea86ed8d8bf31861785b8f1","after":"8fe93af86d0c74ca2e560d23d2184054f69491fa","ref":"refs/heads/main","pushedAt":"2024-05-13T17:21:41.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"add MLSys Courses","shortMessageHtmlLink":"add MLSys Courses"}},{"before":"9b92cc262f7f747af86627c896ee1ce7cd2fdbb3","after":"738aac53c32979682ea86ed8d8bf31861785b8f1","ref":"refs/heads/main","pushedAt":"2024-05-12T02:11:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Oobleck, Gemini, Perseus","shortMessageHtmlLink":"Oobleck, Gemini, Perseus"}},{"before":"45d282816b1f419e4ba6db898e70f79fb6e85709","after":"9b92cc262f7f747af86627c896ee1ce7cd2fdbb3","ref":"refs/heads/main","pushedAt":"2024-05-09T17:28:33.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"89ad425180df16fdac46834200073aa90f52d56c","after":"45d282816b1f419e4ba6db898e70f79fb6e85709","ref":"refs/heads/main","pushedAt":"2024-05-06T20:23:24.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"BlockLLM: Multi-tenant Finer-grained Serving for Large Language Models","shortMessageHtmlLink":"BlockLLM: Multi-tenant Finer-grained Serving for Large Language Models"}},{"before":"c7dd7da0ae080e0bbfbec35441f1571fc9573c21","after":"89ad425180df16fdac46834200073aa90f52d56c","ref":"refs/heads/main","pushedAt":"2024-05-06T20:21:39.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models","shortMessageHtmlLink":"DistriFusion: Distributed Parallel Inference for High-Resolution Diff…"}},{"before":"01175291baaa12e9f05565f01cc02b9342bdbcc4","after":"c7dd7da0ae080e0bbfbec35441f1571fc9573c21","ref":"refs/heads/main","pushedAt":"2024-05-06T20:19:31.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"2462828f7262c6038e3801c36979ee588ec0580a","after":"01175291baaa12e9f05565f01cc02b9342bdbcc4","ref":"refs/heads/main","pushedAt":"2024-04-29T16:04:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"214abed4a56897a6e01067437da8b453de5c9d34","after":"2462828f7262c6038e3801c36979ee588ec0580a","ref":"refs/heads/main","pushedAt":"2024-04-27T03:33:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"AmberLJC","name":"Jiachen LIU","path":"/AmberLJC","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/42296458?s=80&v=4"},"commit":{"message":"Andes: Defining and Enhancing Quality-of-Experience in LLM-Based Text Streaming Services","shortMessageHtmlLink":"Andes: Defining and Enhancing Quality-of-Experience in LLM-Based Text…"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOC0zMFQyMDo0NjozNS4wMDAwMDBazwAAAASpCAH-","startCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOC0zMFQyMDo0NjozNS4wMDAwMDBazwAAAASpCAH-","endCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wNC0yN1QwMzozMzozMC4wMDAwMDBazwAAAAQ7qjZ4"}},"title":"Activity · AmberLJC/LLMSys-PaperList"}