Source code for PAMI.extras.generateDatabase.generateTransactionalDatabase

# generateTransactionalDatabase is a code used to convert the database into Temporal database.
#
#  **Importing this algorithm into a python program**
#  --------------------------------------------------------
#     from PAMI.extras.generateDatabase import generateTransactionalDatabase as db
#     obj = db(10, 5, 10)
#     obj.create()
#     obj.save('db.txt')
#     print(obj.getTransactions()) to get the transactional database as a pandas dataframe

# **Running the code from the command line**
# --------------------------------------------------------
#     python generateDatabase.py 10 5 10 db.txt
#     cat db.txt
#




__copyright__ = """
Copyright (C)  2021 Rage Uday Kiran

     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation, either version 3 of the License, or
     (at your option) any later version.

     This program is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU General Public License for more details.

     You should have received a copy of the GNU General Public License
     along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""


import numpy as np
import pandas as pd
import sys


[docs] class generateTransactionalDatabase: """ :Description Generate a transactional database with the given number of lines, average number of items per line, and total number of items :Attributes: numLines: int - number of lines avgItemsPerLine: int - average number of items per line numItems: int - total number of items :Methods: create: Generate the transactional database save: Save the transactional database to a file getTransactions: Get the transactional database """ def __init__(self, numLines, avgItemsPerLine, numItems) -> None: """ Initialize the transactional database with the given parameters Parameters: numLines: int - number of lines avgItemsPerLine: int - average number of items per line numItems: int - total number of items """ self.numLines = numLines self.avgItemsPerLine = avgItemsPerLine self.numItems = numItems self.db = []
[docs] def tuning(self, array, sumRes) -> list: """ Tune the array so that the sum of the values is equal to sumRes :param array: list of values :type array: list :param sumRes: the sum of the values in the array to be tuned :type sumRes: int :return: list of values with the tuned values and the sum of the values in the array to be tuned and sumRes is equal to sumRes :rtype: list """ while np.sum(array) != sumRes: # get index of largest value randIndex = np.random.randint(0, len(array)) # if sum is too large, decrease the largest value if np.sum(array) > sumRes: array[randIndex] -= 1 # if sum is too small, increase the smallest value else: #minIndex = np.argmin(array) array[randIndex] += 1 return array
[docs] def generateArray(self, nums, avg, maxItems) -> list: """ Generate a random array of length n whose values average to m :param nums: number of values :type nums: list :param avg: average value :type avg: float :param maxItems: maximum value :type maxItems: int :return: random array :rtype: list """ # generate n random values values = np.random.randint(1, maxItems, nums) sumRes = nums * avg self.tuning(values, sumRes) # if any value is less than 1, increase it and tune the array again while np.any(values < 1): for i in range(nums): if values[i] < 1: values[i] += 1 self.tuning(values, sumRes) while np.any(values > maxItems): for i in range(nums): if values[i] > maxItems: values[i] -= 1 self.tuning(values, sumRes) # if all values are same then randomly increase one value and decrease another while np.all(values == values[0]): values[np.random.randint(0, nums)] += 1 self.tuning(values, sumRes) return values
[docs] def create(self) -> None: """ Generate the transactional database :return: None """ #db = set() values = self.generate_array(self.numLines, self.avgItemsPerLine, self.numItems) for value in values: line = np.random.choice(range(1, self.numItems + 1), value, replace=False) self.db.append(line)
[docs] def save(self,sep,filename) -> None: """ Save the transactional database to a file :param sep: separator :type sep: str :param filename: name of the file :type filename: str :return: None """ with open(filename, 'w') as f: for line in self.db: f.write(sep.join(map(str, line)) + '\n')
[docs] def getTransactions(self) -> pd.DataFrame: """ Get the transactional database :return: the transactional database :rtype: pd.DataFrame """ df = pd.DataFrame(self.db) return df
if __name__ == "__main__": # test the class db = generateTransactionalDatabase(10, 5, 10) db.create() db.save('\t', 'db1.txt') #print(db.getTransactions()) obj = generateTransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3])) obj.create() obj.save(sys.argv[5], sys.argv[4]) # print(obj.getTransactions())