Skip to content

Commit

Permalink
Code clean up.
Browse files Browse the repository at this point in the history
  • Loading branch information
eli64s committed Mar 2, 2024
1 parent 96a9e17 commit a502167
Show file tree
Hide file tree
Showing 11 changed files with 168 additions and 254 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "readmeai"
version = "0.5.073"
description = "👾 Automated README file generator, powered by LLM APIs."
version = "0.5.074"
description = "👾 Automated README file generator, powered by large language model APIs."
authors = ["Eli <[email protected]>"]
license = "MIT"
readme = "README.md"
Expand Down
25 changes: 10 additions & 15 deletions readmeai/_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from readmeai.core.utils import get_environment
from readmeai.generators.builder import MarkdownBuilder
from readmeai.models.factory import model_handler
from readmeai.services.git import GitHost, clone_repository
from readmeai.services.git import clone_repository
from readmeai.utils.file_handler import FileHandler

_logger = Logger(__name__)
Expand Down Expand Up @@ -49,13 +49,14 @@ def readme_agent(
try:
conf = ConfigLoader()
conf.config.api.rate_limit = rate_limit
llm_api, llm_model = get_environment(api, model)
conf.config.git = GitSettings(repository=repository)
api, model = get_environment(api, model)
conf.config.llm = conf.config.llm.copy(
update={
"api": llm_api,
"api": api,
"base_url": base_url,
"context_window": context_window,
"model": llm_model,
"model": model,
"temperature": temperature,
"top_p": top_p,
}
Expand All @@ -73,12 +74,6 @@ def readme_agent(
else prompt_for_image(image),
}
)
try:
conf.config.git = GitSettings(repository=repository)
conf.config.git.host_domain = conf.config.git.repository.host
except Exception:
conf.config.git.host_domain = GitHost.LOCAL.name

asyncio.run(readme_generator(conf, output_file))

except Exception as exc:
Expand All @@ -87,18 +82,18 @@ def readme_agent(

async def readme_generator(conf: ConfigLoader, output_file: Path) -> None:
"""Orchestrates the README.md file generation process."""
repo = conf.config.git.repository
_logger.info(f"Repository settings validated: {conf.config.git}")
_logger.info(f"LLM API settings validated: {conf.config.llm}")

with tempfile.TemporaryDirectory() as temp_dir:
await clone_repository(repo, temp_dir)
await clone_repository(conf.config.git.repository, temp_dir)
(
dependencies,
raw_files,
) = preprocessor(conf, temp_dir)
_logger.info(f"Dependencies extracted: {dependencies}")
_logger.info(f"Total files analyzed: {len(raw_files)}")

_logger.info(f"Total files preprocessed: {len(raw_files)}")
_logger.info(f"Dependencies found: {dependencies}")

async with model_handler(conf).use_api() as llm:
responses = await llm.batch_request(dependencies, raw_files)
Expand All @@ -115,8 +110,8 @@ async def readme_generator(conf: ConfigLoader, output_file: Path) -> None:
readme_md = MarkdownBuilder(
conf, dependencies, summaries, temp_dir
).build()
FileHandler().write(output_file, readme_md)

FileHandler().write(output_file, readme_md)
_logger.info("README generation process completed successfully!")
_logger.info(f"README.md file saved to: {output_file}")
_logger.info("Share it @ github.com/eli64s/readme-ai/discussions")
Expand Down
24 changes: 9 additions & 15 deletions readmeai/config/settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Data models and functions for configuring the readme-ai CLI tool."""
"""
Core data models and configuration settings for the readme-ai package.
"""

from __future__ import annotations

Expand All @@ -7,14 +9,10 @@

from pydantic import BaseModel, DirectoryPath, HttpUrl, validator

from readmeai._exceptions import FileReadError
from readmeai.config.validators import GitValidator
from readmeai.core.logger import Logger
from readmeai.utils.file_handler import FileHandler
from readmeai.utils.resource_loader import get_resource_path

_logger = Logger(__name__)


class APISettings(BaseModel):
"""
Expand Down Expand Up @@ -43,8 +41,8 @@ class GitSettings(BaseModel):

repository: Union[HttpUrl, DirectoryPath]
full_name: Optional[str]
host: Optional[str]
host_domain: Optional[str]
host: Optional[str]
name: Optional[str]

_validate_repository = validator("repository", pre=True, always=True)(
Expand All @@ -53,6 +51,9 @@ class GitSettings(BaseModel):
_validate_full_name = validator("full_name", pre=True, always=True)(
GitValidator.validate_full_name
)
_set_host_domain = validator("host_domain", pre=True, always=True)(
GitValidator.set_host_domain
)
_set_host = validator("host", pre=True, always=True)(GitValidator.set_host)
_set_name = validator("name", pre=True, always=True)(GitValidator.set_name)

Expand Down Expand Up @@ -137,12 +138,5 @@ def _load_all_configs(self) -> None:
) in self.config.files.dict().items():
if not file_name.endswith(".toml"):
continue

try:
config_data = get_resource_path(self.file_handler, file_name)
setattr(self, key, config_data)
_logger.debug(f"Loaded config file: {file_name}")

except FileReadError as exc:
setattr(self, key, None)
_logger.warning(f"Config file not found: {file_name} - {exc}")
data_dict = get_resource_path(self.file_handler, file_name)
setattr(self, key, data_dict)
15 changes: 13 additions & 2 deletions readmeai/config/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def validate_full_name(cls, value: Optional[str], values: dict) -> str:
raise GitValidationError(url_or_path)

@classmethod
def set_host(cls, value: Optional[str], values: dict) -> str:
def set_host_domain(cls, value: Optional[str], values: dict) -> str:
"""Sets the Git service host from the repository provided."""
repo = values.get("repository")
if isinstance(repo, Path) or (
Expand All @@ -82,10 +82,21 @@ def set_host(cls, value: Optional[str], values: dict) -> str:
parsed_url = urlparse(str(repo))
for service in GitHost:
if service in parsed_url.netloc:
return service.split(".")[0]
return service

return GitHost.LOCAL

@classmethod
def set_host(cls, value: Optional[str], values: dict) -> str:
"""Set the host based on the repository URL."""
repo = values.get("repository")
if isinstance(repo, Path) or (
isinstance(repo, str) and Path(repo).is_dir()
):
return GitHost.LOCAL.value.lower()
parsed_url = urlsplit(repo)
return parsed_url.netloc.split(".")[0]

@classmethod
def set_name(cls, value: Optional[str], values: dict) -> str:
"""Sets the repository name from the repository provided."""
Expand Down
82 changes: 28 additions & 54 deletions readmeai/core/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
from pathlib import Path
from typing import Generator, List, Tuple

from readmeai.cli.options import ModelOptions as Models
from readmeai.config.settings import ConfigLoader
from readmeai.core.logger import Logger
from readmeai.generators.builder import MarkdownBuilder
from readmeai.models.tokens import count_tokens
from readmeai.parsers.factory import parser_handler

_github_actions_path = ".github/workflows"
Expand All @@ -24,7 +22,6 @@ class FileContext:
file_name: str
file_ext: str
content: str
tokens: int = 0
language: str = field(init=False)
dependencies: List[str] = field(default_factory=list)

Expand All @@ -43,7 +40,6 @@ def __init__(self, config_loader: ConfigLoader):
"""Initializes the RepositoryProcessor class."""
self._logger = Logger(__name__)
self.config_loader = config_loader
self.config = config_loader.config
self.blacklist = config_loader.blacklist.get("blacklist")
self.commands = config_loader.commands
self.languages = config_loader.languages.get("language_names")
Expand All @@ -64,24 +60,22 @@ def create_file_data(
def extract_dependencies(self, file_data: FileContext) -> List[str]:
"""Extracts the dependency file contents using the factory pattern."""
parsers = parser_handler()

if file_data.file_name not in parsers:
return []

parser = parsers.get(file_data.file_name)
dependency_names = parser.parse(content=file_data.content)
dependencies = parser.parse(content=file_data.content)

self._logger.info(
f"Dependency file found: {file_data.file_name}:\n{dependency_names}"
f"Dependency file found: {file_data.file_name}:\n{dependencies}"
)

return dependency_names
return dependencies

def generate_contents(self, repo_path: str) -> List[FileContext]:
"""Generates a List of Dict of file information."""
if isinstance(repo_path, str):
repo_path = Path(repo_path)

return [file_data for file_data in self.generate_file_info(repo_path)]

def generate_file_info(
Expand Down Expand Up @@ -128,15 +122,28 @@ def _filter_file(self, file_path: Path) -> bool:
"""
Determines if a file should be ignored based on configurations.
"""
if (
is_file_ignored(self.config_loader, file_path)
and str(file_path.name) in self.parser_files
):
blacklist = self.config_loader.blacklist["blacklist"]
is_file_ignored = any(
[
file_path.name in blacklist["files"],
file_path.suffix.lstrip(".") in blacklist["extensions"],
any(
dir in file_path.parts for dir in blacklist["directories"]
),
]
)
if is_file_ignored and str(file_path.name) in self.parser_files:
return False

return not file_path.is_file() or is_file_ignored(
self.config_loader, file_path
)
return not file_path.is_file() or is_file_ignored

def _language_mapper(
self, contents: List[FileContext]
) -> List[FileContext]:
"""Maps file extensions to their programming languages."""
for content in contents:
content.language = self.languages.get(content.file_ext, "").lower()
return contents

def _process_file_path(
self, file_path: Path, repo_path: Path
Expand All @@ -154,6 +161,9 @@ def _process_file_path(
)

try:
if file_path.is_dir():
return

with file_path.open(encoding="utf-8") as file:
content = file.read()

Expand All @@ -178,38 +188,14 @@ def _process_file_path(
except (OSError, UnicodeDecodeError) as exc:
self._logger.warning(f"Error reading file {file_path}: {exc}")

def language_mapper(
self, contents: List[FileContext]
) -> List[FileContext]:
"""Maps file extensions to their programming languages."""
for content in contents:
content.language = self.languages.get(content.file_ext, "").lower()
return contents

def tokenize_content(
self, contents: List[FileContext]
) -> List[FileContext]:
"""Tokenize each file content and return the token count."""
if self.config.llm.api == Models.OFFLINE.name:
return contents

for content in contents:
content.tokens = count_tokens(
content.content, self.config.llm.encoder
)
return contents


def preprocessor(
config_loader: ConfigLoader, temp_dir: str
) -> Tuple[List[FileContext], List[str], List[Tuple[str, str]], str]:
"""Processes the repository files and returns the context."""
config = config_loader.config
repo_processor = RepositoryProcessor(config_loader)
repo_context = repo_processor.generate_contents(temp_dir)
repo_context = repo_processor.language_mapper(repo_context)
# repo_context = repo_processor.tokenize_content(repo_context)

repo_context = repo_processor._language_mapper(repo_context)
dependencies, dependency_dict = repo_processor.get_dependencies(
repo_context
)
Expand All @@ -218,20 +204,8 @@ def preprocessor(
(str(context.file_path), context.content) for context in repo_context
]

config.md.tree = MarkdownBuilder(
config_loader.config.md.tree = MarkdownBuilder(
config_loader, dependencies, raw_files, temp_dir
).md_tree

return dependencies, raw_files


def is_file_ignored(config: ConfigLoader, file_path: Path) -> bool:
"""Determines if a file should be ignored based on the configuration."""
blacklist = config.blacklist["blacklist"]
return any(
[
file_path.name in blacklist["files"],
file_path.suffix.lstrip(".") in blacklist["extensions"],
any(dir in file_path.parts for dir in blacklist["directories"]),
]
)
Loading

0 comments on commit a502167

Please sign in to comment.