shardz

- Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.acid.vegas/-c.git
Log | Files | Refs | Archive | README | LICENSE

commit f4eb446fef0bd54cec421ae4fa892c5ce7dfea3e
Author: acidvegas <acid.vegas@acid.vegas>
Date: Fri, 6 Dec 2024 23:18:44 -0500

Initial commit

Diffstat:
A.screens/shardz.jpg | 0
ALICENSE | 15+++++++++++++++
AREADME.md | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ashardz.c | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

4 files changed, 128 insertions(+), 0 deletions(-)

diff --git a/.screens/shardz.jpg b/.screens/shardz.jpg
Binary files differ.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2025, acidvegas <acid.vegas@acid.vegas>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,55 @@
+<h1 align="center">Shardz</h1>
+<p align="center">
+    <img src="./.screens/shardz.jpg">
+</p>
+
+Shardz is a lightweight C utility that shards *(splits)* the output of any process for distributed processing. It allows you to easily distribute workloads across multiple processes or machines by splitting input streams into evenly distributed chunks.
+
+## Use Cases
+- Distributing large datasets across multiple workers
+- Parallel processing of log files
+- Load balancing input streams
+- Splitting any line-based input for distributed processing
+
+## Building
+```bash
+gcc -o shardz shardz.c
+```
+
+## Usage
+```bash
+some_command | shardz INDEX/TOTAL
+```
+
+Where:
+- `INDEX` is the shard number (starting from 1)
+- `TOTAL` is the total number of shards
+
+### Examples
+- Machine number 1 would run:
+```bash
+curl https://example.com/large_file.txt | shardz 1/3
+```
+
+- Machine number 2 would run:
+```bash
+curl https://example.com/large_file.txt | shardz 2/3
+```
+
+- Machine number 3 would run:
+```bash
+curl https://example.com/large_file.txt | shardz 3/3
+```
+
+## How It Works
+
+Shardz uses a modulo operation to determine which lines should be processed by each shard. For example, with `3` total shards:
+- Shard 1 processes lines 1, 4, 7, 10, ...
+- Shard 2 processes lines 2, 5, 8, 11, ...
+- Shard 3 processes lines 3, 6, 9, 12, ...
+
+This ensures an even distribution of the workload across all shards.
+
+---
+
+###### Mirrors: [acid.vegas](https://git.acid.vegas/shardz) • [SuperNETs](https://git.supernets.org/acidvegas/shardz) • [GitHub](https://github.com/acidvegas/shardz) • [GitLab](https://gitlab.com/acidvegas/shardz) • [Codeberg](https://codeberg.org/acidvegas/shardz)
diff --git a/shardz.c b/shardz.c
@@ -0,0 +1,57 @@
+// SHARDZ - Shard the output of any process for distributed processin - Developed by acidvegas in C (https://github.com/acidvegas/shardz)
+// shardz.c
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void print_usage(const char* program_name) {
+    fprintf(stderr, "Usage: %s INDEX/TOTAL\n", program_name);
+    exit(1);
+}
+
+int main(int argc, char *argv[]) {
+    if (argc != 2) {
+        print_usage(argv[0]);
+    }
+
+    char *slash = strchr(argv[1], '/');
+    if (!slash) {
+        print_usage(argv[0]);
+    }
+
+    *slash = '\0';
+    char *index_str = argv[1];
+    char *total_str = slash + 1;
+
+    char *endptr;
+    long index = strtol(index_str, &endptr, 10);
+    if (*endptr != '\0' || index < 1) {
+        print_usage(argv[0]);
+    }
+
+    long total = strtol(total_str, &endptr, 10);
+    if (*endptr != '\0' || total < 1) {
+        print_usage(argv[0]);
+    }
+
+    if (index > total) {
+        fprintf(stderr, "Error: INDEX cannot be greater than TOTAL\n");
+        exit(1);
+    }
+
+    char *line = NULL;
+    size_t len = 0;
+    ssize_t read;
+    long current_line = 1;
+
+    while ((read = getline(&line, &len, stdin)) != -1) {
+        if (((current_line - index) % total) == 0) {
+            printf("%s", line);
+        }
+        current_line++;
+    }
+
+    free(line);
+    return 0;
+} 
+\ No newline at end of file