Skriptet dhe dokumentat

This commit is contained in:
2022-09-12 12:56:12 +02:00
parent 43b340a916
commit 2b15225b73
17 changed files with 171 additions and 0 deletions

49
convert.py Normal file
View File

@@ -0,0 +1,49 @@
import pandas as pd
from os import listdir
from os.path import isfile, join
from docx.api import Document
from enum import Enum
class Klinika(Enum):
KIRURGJI = 'kirurgji'
OBS = 'OBS-GYN'
URGJENCA = 'urgjenca'
def convert(klinika: str):
print(klinika)
columns = []
filtered_columns = []
docx_path = './Word/'
xlsx_path = './Excel/'
list_files = [f for f in listdir(docx_path) if isfile(join(docx_path, f))]
list_paths = []
df = pd.DataFrame()
for file in list_files:
if klinika in file and "lock" not in file:
list_paths.append(join(docx_path, file))
for path in list_paths:
document = Document(path)
for table in document.tables:
for cell in table.rows[0].cells:
columns.append(cell.text.strip().replace('\n', ' '))
RowA = table.rows[0]
table_element = table._tbl
table_element.remove(RowA._tr)
for row in table.rows:
text = [cell.text for cell in row.cells]
df = df.append([text], ignore_index=True)
print(f'{path} Done')
for word in columns:
if word not in filtered_columns:
filtered_columns.append(word)
print(f'{len(filtered_columns)} Columns found')
print('writing to excel...')
df.columns = filtered_columns
excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx")
df.to_excel(excel_path, index=False)
convert(Klinika.OBS.value)