Source code for PAMI.extras.topKPatterns

# topKPatterns is used to find top k length patterns in input file.
#
# **Importing this algorithm into a python program**
# --------------------------------------------------------
#
#     from PAMI.extras.syntheticDataGenerator import topKPatterns as tK
#
#     obj = tK.topKPatterns(" ", 10, "\t")
#
#     obj.save()
#




__copyright__ = """
Copyright (C)  2021 Rage Uday Kiran

     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation, either version 3 of the License, or
     (at your option) any later version.

     This program is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU General Public License for more details.

     You should have received a copy of the GNU General Public License
     along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""


[docs] class topKPatterns: """ :Description: find top k length patterns in input file. :Attributes: inputFile : str input file name or path k : int rank of pattern length. default is 10 sep : str separator which separate patterns in input file. default is tab space :Methods: getTopKPatterns() return top k patterns as dict storeTopKPatterns(outputFile) store top k patterns into output file. **Importing this algorithm into a python program** -------------------------------------------------------- .. code-block:: python from PAMI.extras.syntheticDataGenerator import topKPatterns as plt obj = plt.scatterPlotSpatialPoints(" ", 10, "\t" ) obj.save() """ def __init__(self, inputFile: str, k: int=10, sep: str='\t') -> None: self.inputFile = inputFile self.k = k self.sep = sep
[docs] def getTopKPatterns(self) -> dict: """ get top k length patterns. user can defined k value. :return: top k length patterns as dictionary. top k patterns = {patternId: pattern} """ with open(self.inputFile, 'r') as f: patterns = [[item for item in line.strip().split(':')][0].split(self.sep)[:-1] for line in f] patterns = sorted(patterns, key=lambda x: len(x[0]), reverse=True) return {patternId: patterns[patternId - 1] for patternId in range(1, int(self.k)+1)}
[docs] def save(self, outputFile: str) -> None: """ store top k length patterns into file. user can defined k value. :param outputFile: output file name or path :type outputFile: str """ with open(self.inputFile, 'r') as f: patterns = [[item for item in line.strip().split(':')][0].split(self.sep)[:-1] for line in f] patterns = sorted(patterns, key=lambda x: len(x[0]), reverse=True) with open(outputFile, 'w') as f: patternId = 1 for pattern in patterns[:self.k]: for item in pattern: f.write(f'{patternId}\t{item}\n')