czds

- ICANN Centralized Zone Data Service Tool
git clone git://git.acid.vegas/czds.git
Log | Files | Refs | Archive | README | LICENSE

commit 08ccea270d921475a211c67a650e155afe152fe1
parent 6047e0aed73fb318e23c5ea32b9e6344bd30aaf2
Author: acidvegas <acid.vegas@acid.vegas>
Date: Fri, 21 Mar 2025 20:46:08 -0400

fixed progress

Diffstat:
Mczds/__init__.py | 2+-
Mczds/client.py | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Msetup.py | 2+-

3 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/czds/__init__.py b/czds/__init__.py
@@ -5,7 +5,7 @@
 from .client import CZDS
 
 
-__version__ = '1.2.6'
+__version__ = '1.2.7'
 __author__  = 'acidvegas'
 __email__   = 'acid.vegas@acid.vegas'
 __github__  = 'https://github.com/acidvegas/czds'
 \ No newline at end of file
diff --git a/czds/client.py b/czds/client.py
@@ -141,7 +141,7 @@ class CZDS:
         Decompress a gzip file in place
         
         :param filepath: Path to the gzip file
-        :param cleanup: Whether to remove the original gzip file after decompression
+        :param cleanup: Whether to remove the original gzip file after decompressions
         '''
 
         logging.debug(f'Decompressing {filepath}')
@@ -173,38 +173,71 @@ class CZDS:
             tld = url.split('/')[-1].split('.')[0]  # Extract TLD from URL
             logging.info(f'Starting download of {tld} zone file')
             
-            async with self.session.get(url, headers=self.headers) as response:
-                if response.status != 200:
-                    error_msg = f'Failed to download {tld}: {response.status} {await response.text()}'
-                    logging.error(error_msg)
-                    raise Exception(error_msg)
-
-                if not (content_disposition := response.headers.get('Content-Disposition')):
-                    error_msg = f'Missing Content-Disposition header for {tld}'
-                    logging.error(error_msg)
-                    raise ValueError(error_msg)
-
-                filename = content_disposition.split('filename=')[-1].strip('"')
-                filepath = os.path.join(output_directory, filename)
-
-                async with aiofiles.open(filepath, 'wb') as file:
-                    total_size = 0
-                    while True:
-                        chunk = await response.content.read(8192)
-                        if not chunk:
-                            break
-                        await file.write(chunk)
-                        total_size += len(chunk)
-                    
-                    size_mb = total_size / (1024 * 1024)
-                    logging.info(f'Successfully downloaded {tld} zone file ({size_mb:.2f} MB)')
-
-                if decompress:
-                    await self.gzip_decompress(filepath, cleanup)
-                    filepath = filepath[:-3]  # Remove .gz extension
-                    logging.info(f'Decompressed {tld} zone file')
-
-                return filepath
+            try:
+                async with self.session.get(url, headers=self.headers) as response:
+                    if response.status != 200:
+                        error_msg = f'Failed to download {tld}: {response.status} {await response.text()}'
+                        logging.error(error_msg)
+                        raise Exception(error_msg)
+
+                    # Get expected file size from headers
+                    expected_size = int(response.headers.get('Content-Length', 0))
+                    if not expected_size:
+                        logging.warning(f'No Content-Length header for {tld}')
+
+                    if not (content_disposition := response.headers.get('Content-Disposition')):
+                        error_msg = f'Missing Content-Disposition header for {tld}'
+                        logging.error(error_msg)
+                        raise ValueError(error_msg)
+
+                    filename = content_disposition.split('filename=')[-1].strip('"')
+                    filepath = os.path.join(output_directory, filename)
+
+                    async with aiofiles.open(filepath, 'wb') as file:
+                        total_size = 0
+                        last_progress = 0
+                        async for chunk in response.content.iter_chunked(8192):
+                            await file.write(chunk)
+                            total_size += len(chunk)
+                            if expected_size:
+                                progress = int((total_size / expected_size) * 100)
+                                if progress >= last_progress + 5:  # Log every 5% increase
+                                    logging.info(f'Downloading {tld}: {progress}% ({total_size:,}/{expected_size:,} bytes)')
+                                    last_progress = progress
+
+                        # Verify file size
+                        if expected_size and total_size != expected_size:
+                            error_msg = f'Incomplete download for {tld}: Got {total_size} bytes, expected {expected_size} bytes'
+                            logging.error(error_msg)
+                            os.remove(filepath)  # Clean up incomplete file
+                            raise Exception(error_msg)
+                        
+                        size_mb = total_size / (1024 * 1024)
+                        logging.info(f'Successfully downloaded {tld} zone file ({size_mb:.2f} MB)')
+
+                    if decompress:
+                        try:
+                            # Verify gzip integrity before decompressing
+                            with gzip.open(filepath, 'rb') as test_gzip:
+                                test_gzip.read(1)  # Try reading first byte to verify gzip integrity
+                            
+                            await self.gzip_decompress(filepath, cleanup)
+                            filepath = filepath[:-3]  # Remove .gz extension
+                            logging.info(f'Decompressed {tld} zone file')
+                        except (gzip.BadGzipFile, OSError) as e:
+                            error_msg = f'Failed to decompress {tld}: {str(e)}'
+                            logging.error(error_msg)
+                            os.remove(filepath)  # Clean up corrupted file
+                            raise Exception(error_msg)
+
+                    return filepath
+
+            except Exception as e:
+                logging.error(f'Error downloading {tld}: {str(e)}')
+                # Clean up any partial downloads
+                if 'filepath' in locals() and os.path.exists(filepath):
+                    os.remove(filepath)
+                raise
 
         if semaphore:
             async with semaphore:
diff --git a/setup.py b/setup.py
@@ -11,7 +11,7 @@ with open('README.md', 'r', encoding='utf-8') as fh:
 
 setup(
 	name='czds-api',
-	version='1.2.6',
+	version='1.2.7',
 	author='acidvegas',
 	author_email='acid.vegas@acid.vegas',
 	description='ICANN API for the Centralized Zones Data Service',